• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python sequences.parse_fasta函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中skbio.parse.sequences.parse_fasta函数的典型用法代码示例。如果您正苦于以下问题:Python parse_fasta函数的具体用法?Python parse_fasta怎么用?Python parse_fasta使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了parse_fasta函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_split_fasta_diff_num_seqs_per_file_alt

    def test_split_fasta_diff_num_seqs_per_file_alt(self):
        """split_fasta funcs always catches all seqs
        """
        # start with 59 seqs (b/c it's prime, so should make more
        # confusing splits)
        in_seqs = SequenceCollection.from_fasta_records(
            [('seq%s' % k, 'AACCTTAA') for k in range(59)], DNA)
        infile = in_seqs.to_fasta().split('\n')

        # test seqs_per_file from 1 to 1000
        for i in range(1, 1000):
            fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
                                         prefix='split_fasta_tests',
                                         suffix='')
            close(fd)

            actual = split_fasta(infile, i, filename_prefix)

            actual_seqs = []
            for fp in actual:
                actual_seqs += list(open(fp))
            # remove the files now, so if the test fails they still get
            # cleaned up
            remove_files(actual)

            # building seq collections from infile and the split files result in
            # equivalent seq collections
            self.assertEqual(
                SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
                SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
开发者ID:ElDeveloper,项目名称:qiime,代码行数:30,代码来源:test_split.py


示例2: fast_denoiser

def fast_denoiser(sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True, titanium=False):
    """wrapper function calling methods from the Denoiser package."""
    if num_cpus > 1:
        denoise_seqs(
            sff_fps,
            fasta_fp,
            tmp_outdir,
            primer=primer,
            cluster=True,
            num_cpus=num_cpus,
            verbose=verbose,
            titanium=titanium,
        )
    else:
        denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, verbose=verbose, titanium=titanium)

    # read centroids and singletons
    centroids = parse_fasta(open(tmp_outdir + "/centroids.fasta"))
    singletons = parse_fasta(open(tmp_outdir + "/singletons.fasta"))

    seqs = chain(centroids, singletons)

    # read mapping
    mapping = {}
    cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt")
    for i, cluster in enumerate(cluster_mapping):
        cluster, members = cluster.split(":")
        members = members.split()
        clust = [cluster]
        clust.extend(members)
        mapping[i] = clust

    return seqs, mapping
开发者ID:Honglongwu,项目名称:qiime,代码行数:33,代码来源:denoise_wrapper.py


示例3: test_split_fasta_diff_num_seqs_per_file

    def test_split_fasta_diff_num_seqs_per_file(self):
        """split_fasta funcs as expected when diff num seqs go to each file
        """
        fd, filename_prefix = mkstemp(dir=get_qiime_temp_dir(),
                                     prefix='split_fasta_tests',
                                     suffix='')
        close(fd)
        infile = ['>seq1', 'AACCTTAA', '>seq2', 'TTAACC', 'AATTAA',
                  '>seq3', 'CCTT--AA']

        actual = split_fasta(infile, 2, filename_prefix)

        actual_seqs = []
        for fp in actual:
            actual_seqs += list(open(fp))
        remove_files(actual)

        expected = ['%s.%d.fasta' % (filename_prefix, i) for i in range(2)]
        # list of file paths is as expected
        self.assertEqual(actual, expected)
        # building seq collections from infile and the split files result in
        # equivalent seq collections
        self.assertEqual(
            SequenceCollection.from_fasta_records(parse_fasta(infile), DNA),
            SequenceCollection.from_fasta_records(parse_fasta(actual_seqs), DNA))
开发者ID:ElDeveloper,项目名称:qiime,代码行数:25,代码来源:test_split.py


示例4: setUp

    def setUp(self):
        """ """
        self.fasta_lines1 = fasta_lines1.split("\n")
        self.fasta_lines1_mixed_case = fasta_lines1_mixed_case.split("\n")

        self.fasta_lines1_exp = list(parse_fasta(fasta_lines1_exp.split("\n")))
        self.fasta_lines1_mixed_case_exp = list(parse_fasta(fasta_lines1_mixed_case_exp.split("\n")))
        self.fasta_lines1_exp_null_desc_mapper = list(parse_fasta(fasta_lines1_exp_null_desc_mapper.split("\n")))
开发者ID:Honglongwu,项目名称:qiime,代码行数:8,代码来源:test_adjust_seq_orientation.py


示例5: __call__

    def __call__(self, seq_path, result_path=None, log_path=None,
                 failure_path=None):
        # load candidate sequences
        seq_file = open(seq_path, 'U')
        candidate_sequences = parse_fasta(seq_file)

        # load template sequences
        template_alignment = []
        template_alignment_fp = self.Params['template_filepath']
        for seq_id, seq in parse_fasta(open(template_alignment_fp)):
            # replace '.' characters with '-' characters
            template_alignment.append((seq_id, seq.replace('.', '-').upper()))
        try:
            template_alignment = LoadSeqs(data=template_alignment, moltype=DNA,
                                          aligned=DenseAlignment)
        except KeyError as e:
            raise KeyError('Only ACGT-. characters can be contained in template alignments.' +
                           ' The offending character was: %s' % e)

        # initialize_logger
        logger = NastLogger(log_path)

        # get function for pairwise alignment method
        pairwise_alignment_f = pairwise_alignment_methods[
            self.Params['pairwise_alignment_method']]

        pynast_aligned, pynast_failed = pynast_seqs(
            candidate_sequences,
            template_alignment,
            min_pct=self.Params['min_pct'],
            min_len=self.Params['min_len'],
            align_unaligned_seqs_f=pairwise_alignment_f,
            logger=logger,
            temp_dir=get_qiime_temp_dir())

        logger.record(str(self))

        if failure_path is not None:
            fail_file = open(failure_path, 'w')
            for seq in pynast_failed:
                fail_file.write(seq.toFasta())
                fail_file.write('\n')
            fail_file.close()

        if result_path is not None:
            result_file = open(result_path, 'w')
            for seq in pynast_aligned:
                result_file.write(seq.toFasta())
                result_file.write('\n')
            result_file.close()
            return None
        else:
            try:
                return LoadSeqs(data=pynast_aligned, aligned=DenseAlignment)
            except ValueError:
                return {}
开发者ID:MicrobiomeResearch,项目名称:qiime,代码行数:56,代码来源:align_seqs.py


示例6: __call__

    def __call__(self, seq_path, result_path=None, log_path=None,
                 failure_path=None):
        # load candidate sequences
        seq_file = open(seq_path, 'U')
        candidate_sequences = parse_fasta(seq_file)

        # load template sequences
        template_alignment = []
        template_alignment_fp = self.Params['template_filepath']
        for seq_id, seq in parse_fasta(open(template_alignment_fp)):
            # replace '.' characters with '-' characters
            template_alignment.append((seq_id, seq.replace('.', '-').upper()))
        template_alignment = Alignment.from_fasta_records(
                    template_alignment, DNASequence, validate=True)

        # initialize_logger
        logger = NastLogger(log_path)

        # get function for pairwise alignment method
        pairwise_alignment_f = pairwise_alignment_methods[
            self.Params['pairwise_alignment_method']]

        pynast_aligned, pynast_failed = pynast_seqs(
            candidate_sequences,
            template_alignment,
            min_pct=self.Params['min_pct'],
            min_len=self.Params['min_len'],
            align_unaligned_seqs_f=pairwise_alignment_f,
            logger=logger,
            temp_dir=get_qiime_temp_dir())

        logger.record(str(self))

        for i, seq in enumerate(pynast_failed):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_failed[i] = skb_seq
        pynast_failed = SequenceCollection(pynast_failed)

        for i, seq in enumerate(pynast_aligned):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_aligned[i] = skb_seq
        pynast_aligned = Alignment(pynast_aligned)

        if failure_path is not None:
            fail_file = open(failure_path, 'w')
            fail_file.write(pynast_failed.to_fasta())
            fail_file.close()

        if result_path is not None:
            result_file = open(result_path, 'w')
            result_file.write(pynast_aligned.to_fasta())
            result_file.close()
            return None
        else:
            return pynast_aligned
开发者ID:Kleptobismol,项目名称:qiime,代码行数:55,代码来源:align_seqs.py


示例7: setUp

    def setUp(self):
        fd, self.pynast_test1_input_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
        close(fd)
        with open(self.pynast_test1_input_fp, "w") as f:
            f.write(pynast_test1_input_fasta)

        fd, self.pynast_test1_template_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
        close(fd)
        with open(self.pynast_test1_template_fp, "w") as f:
            f.write(pynast_test1_template_fasta)

        fd, self.pynast_test_template_w_dots_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
        close(fd)
        with open(self.pynast_test_template_w_dots_fp, "w") as f:
            f.write(pynast_test1_template_fasta.replace("-", "."))

        fd, self.pynast_test_template_w_u_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
        close(fd)
        with open(self.pynast_test_template_w_u_fp, "w") as f:
            f.write(pynast_test1_template_fasta.replace("T", "U"))

        fd, self.pynast_test_template_w_lower_fp = mkstemp(prefix="PyNastAlignerTests_", suffix="template.fasta")
        close(fd)
        with open(self.pynast_test_template_w_lower_fp, "w") as f:
            f.write(pynast_test1_template_fasta.lower())

        # create temp file names (and touch them so we can reliably
        # clean them up)
        fd, self.result_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
        close(fd)
        open(self.result_fp, "w").close()
        fd, self.failure_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".fasta")
        close(fd)
        open(self.failure_fp, "w").close()
        fd, self.log_fp = mkstemp(prefix="PyNastAlignerTests_", suffix=".log")
        close(fd)
        open(self.log_fp, "w").close()

        self._paths_to_clean_up = [
            self.pynast_test1_input_fp,
            self.result_fp,
            self.failure_fp,
            self.log_fp,
            self.pynast_test1_template_fp,
            self.pynast_test_template_w_dots_fp,
            self.pynast_test_template_w_u_fp,
            self.pynast_test_template_w_lower_fp,
        ]

        self.pynast_test1_aligner = PyNastAligner({"template_filepath": self.pynast_test1_template_fp, "min_len": 15})

        self.pynast_test1_expected_aln = Alignment.from_fasta_records(parse_fasta(pynast_test1_expected_alignment), DNA)
        self.pynast_test1_expected_fail = SequenceCollection.from_fasta_records(
            parse_fasta(pynast_test1_expected_failure), DNA
        )
开发者ID:Honglongwu,项目名称:qiime,代码行数:55,代码来源:test_align_seqs.py


示例8: test_deblur_with_non_default_error_profile

    def test_deblur_with_non_default_error_profile(self):
        error_dist = [
            1,
            0.05,
            0.000005,
            0.000005,
            0.000005,
            0.000005,
            0.0000025,
            0.0000025,
            0.0000025,
            0.0000025,
            0.0000025,
            0.0000005,
            0.0000005,
            0.0000005,
            0.0000005,
        ]
        seqs_f = StringIO(TEST_SEQS_2)

        obs = deblur(parse_fasta(seqs_f), error_dist=error_dist)
        exp = [
            Sequence(
                "E.Coli-999;size=720;",
                "tacggagggtgcaagcgttaatcggaattactgggcgtaaagcgcacgcaggcggt"
                "ttgttaagtcagatgtgaaatccccgggctcaacctgggaactgcatctgatactg"
                "gcaagcttgagtctcgtagaggggggcagaattccag",
            )
        ]

        # Trying with a numpy array
        error_dist = np.array(
            [
                1,
                0.05,
                0.000005,
                0.000005,
                0.000005,
                0.000005,
                0.0000025,
                0.0000025,
                0.0000025,
                0.0000025,
                0.0000025,
                0.0000005,
                0.0000005,
                0.0000005,
                0.0000005,
            ]
        )
        seqs_f = StringIO(TEST_SEQS_2)
        obs = deblur(parse_fasta(seqs_f), error_dist=error_dist)

        self.assertEqual(obs, exp)
开发者ID:davideflo,项目名称:deblur,代码行数:54,代码来源:test_deblurring.py


示例9: combine_mappings

def combine_mappings(fasta_fh, mapping_fh, denoised_seqs_fh,
                     otu_picker_otu_map_fh, out_dir):
    """Combine denoiser and OTU picker mapping file, replace flowgram IDs.

    fasta_fh: a fasta file with labels as produced by Qiime's split_libraries.py
             used to replace flowgram id with the unique se_sample_id

    mapping_fh: The cluster mapping from the denoiser.py

    denoised_seqs_fh: the Fasta output files from denoiser.py

    otu_picker_map_fh: cluster map from otu picker on denoised_seqs_fh

    out_dir: output directory
    """

     # read in mapping from split_library file
    labels = imap(lambda a_b: a_b[0], parse_fasta(fasta_fh))
    # mapping from seq_id to sample_id
    sample_id_mapping = extract_read_to_sample_mapping(labels)

    denoiser_mapping = read_denoiser_mapping(mapping_fh)
    # read in cd_hit otu map
    # and write out combined otu_picker+denoiser map
    otu_fh = open(out_dir + "/denoised_otu_map.txt", "w")
    for otu_line in otu_picker_otu_map_fh:
        otu_split = otu_line.split()

        otu = otu_split[0]
        ids = otu_split[1:]

        get_sample_id = sample_id_mapping.get
        # concat lists
        # make sure the biggest one is first for pick_repr
        all_ids = sort_ids(ids, denoiser_mapping)
        all_ids.extend(sum([denoiser_mapping[id] for id in ids], []))
        try:
            otu_fh.write("%s\t" % otu +
                         "\t".join(map(get_sample_id, all_ids)) + "\n")
        except TypeError:
            # get returns Null if denoiser_mapping id not present in
            # sample_id_mapping
            print "Found id in denoiser output, which was not found in split_libraries " +\
                "output FASTA file. Wrong file?"
            exit()

    fasta_out_fh = open(out_dir + "/denoised_all.fasta", "w")
    for label, seq in parse_fasta(denoised_seqs_fh):
        id = label.split()[0]
        newlabel = "%s %s" % (sample_id_mapping[id], id)
        fasta_out_fh.write(BiologicalSequence(seq, id=newlabel).to_fasta())
开发者ID:Kleptobismol,项目名称:qiime,代码行数:51,代码来源:denoise_postprocess.py


示例10: test_call_write_to_file

 def test_call_write_to_file(self):
     """ReferenceRepSetPicker.__call__ otu map correctly written to file"""
     app = ReferenceRepSetPicker(params={'Algorithm': 'first',
                                         'ChoiceF': first_id})
     app(self.tmp_seq_filepath,
         self.tmp_otu_filepath,
         self.ref_seq_filepath,
         result_path=self.result_filepath)
     with open(self.result_filepath) as f:
         actual = SequenceCollection.from_fasta_records(parse_fasta(f), DNA)
     expected = SequenceCollection.from_fasta_records(
         parse_fasta(rep_seqs_reference_result_file_exp.split('\n')), DNA)
     # we don't care about order in the results
     self.assertEqual(set(actual), set(expected))
开发者ID:ElDeveloper,项目名称:qiime,代码行数:14,代码来源:test_pick_rep_set.py


示例11: seqs_from_file

def seqs_from_file(ids, file_lines):
    """Extract labels and seqs from file"""

    for label, seq in parse_fasta(file_lines):

        if id_from_fasta_label_line(label) in ids:
            yield label, seq
开发者ID:ElDeveloper,项目名称:qiime,代码行数:7,代码来源:exclude_seqs_by_blast.py


示例12: check_fasta_seqs_lens

def check_fasta_seqs_lens(input_fasta_fp):
    """ Creates bins of sequence lens

    Useful for checking for valid aligned sequences.

    input_fasta_fp:  input fasta filepath
    """

    seq_lens = defaultdict(int)

    input_fasta_f = open(input_fasta_fp, "U")

    for label, seq in parse_fasta(input_fasta_f):
        seq_lens[len(seq)] += 1

    input_fasta_f.close()

    formatted_seq_lens = []

    for curr_key in seq_lens:
        formatted_seq_lens.append((seq_lens[curr_key], curr_key))

    formatted_seq_lens.sort(reverse=True)

    return formatted_seq_lens
开发者ID:Springbudder,项目名称:qiime,代码行数:25,代码来源:validate_demultiplexed_fasta.py


示例13: output_test

 def output_test(self, aligned_basename):
     """ Test results of test_load_zip() and test_load_gzip()
     """
     f_log = open(aligned_basename + ".log", "U")
     f_log_str = f_log.read()
     self.assertTrue("Total reads passing E-value threshold" in f_log_str)
     self.assertTrue("Total reads for de novo clustering" in f_log_str)
     self.assertTrue("Total OTUs" in f_log_str)
     f_log.seek(0)
     for line in f_log:
         if line.startswith("    Total reads passing E-value threshold"):
             num_hits = (re.split('Total reads passing E-value threshold = | \(', line)[1]).strip()
         elif line.startswith("    Total reads for de novo clustering"):
             num_failures_log = (re.split('Total reads for de novo clustering = ',
                           line)[1]).strip()
         elif line.startswith(" Total OTUs"):
             num_clusters_log = (re.split('Total OTUs = ', line)[1]).strip()
     f_log.close()
     # Correct number of reads mapped
     self.assertEqual("99999", num_hits)
     # Correct number of clusters recorded
     self.assertEqual("272", num_clusters_log)
     # Correct number of clusters in OTU-map
     with open(aligned_basename + "_otus.txt", 'U') as f_otumap:
         num_clusters_file = sum(1 for line in f_otumap)
     self.assertEqual(272, num_clusters_file)
     num_failures_file = 0
     with open(aligned_basename + "_denovo.fasta", 'U') as f_denovo:
         for label, seq in parse_fasta(f_denovo):
             num_failures_file += 1
     # Correct number of reads for de novo clustering
     self.assertEqual(num_failures_log, str(num_failures_file))
开发者ID:biocore,项目名称:sortmerna,代码行数:32,代码来源:test_sortmerna_zlib.py


示例14: align_two_alignments

def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.
        - Mafft profile alignment only works with aligned sequences. Alignment
        object used to handle unaligned sequences.

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln2 = Alignment(aln2,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map,MolType=moltype)
    
    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_paths',\
        params=params,
        SuppressStderr=False)
    app._command = 'mafft-profile'
    
    aln1_path = app._tempfile_as_multiline_string(aln1_int_map.toFasta())
    aln2_path = app._tempfile_as_multiline_string(aln2_int_map.toFasta())
    filepaths = [aln1_path,aln2_path]
    
    #Get results using int_map as input to app
    res = app(filepaths)

    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['StdOut']))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        key = k.replace('_seed_','')
        new_alignment[aln1_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(aln1_path)
    remove(aln2_path)
    remove('pre')
    remove('trace')
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
开发者ID:ElDeveloper,项目名称:brokit,代码行数:60,代码来源:mafft.py


示例15: test_main

    def test_main(self):
        """Denoiser should always give same result on test data"""

        expected = ">FS8APND01D3TW3 | cluster size: 94 \nCTCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC\n"

        expected_map = """FS8APND01EWRS4:
FS8APND01DXG45:
FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN
FS8APND01BSTVP:
FS8APND01EFK0W:
FS8APND01DCIOO:
FS8APND01CKOMZ:
"""

        command = " ".join(["denoiser.py",
                            "--force", "-o", self.test_dir, "-i",
                            "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME])

        result = Popen(command, shell=True, universal_newlines=True,
                       stdout=PIPE, stderr=STDOUT).stdout.read()
        self.result_dir = self.test_dir

        observed = "".join(list(open(self.result_dir + "centroids.fasta")))
        self.assertEqual(observed, expected)

        self.assertEqual(
            len(list(parse_fasta(open(self.result_dir + "singletons.fasta")))),
            6)

        observed = "".join(
            list(open(self.result_dir + "denoiser_mapping.txt")))
        self.assertEqual(observed, expected_map)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:32,代码来源:test_denoiser.py


示例16: test_dereplicate_seqs_remove_singletons

    def test_dereplicate_seqs_remove_singletons(self):
        """ Test dereplicate_seqs() method functionality with
            removing singletons
        """
        seqs = [("seq1", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"),
                ("seq2", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"),
                ("seq3", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"),
                ("seq4", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCT"),
                ("seq5", "TACCAGCCCCTTAAGTGGTAGGGACGATTATTTGGCCTAAAGCGTCCG"),
                ("seq6", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT"),
                ("seq7", "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT")]
        seqs_fp = join(self.working_dir, "seqs.fasta")
        with open(seqs_fp, 'w') as seqs_f:
            for seq in seqs:
                seqs_f.write(">%s\n%s\n" % seq)

        output_fp = join(self.working_dir, "seqs_derep.fasta")
        log_fp = join(self.working_dir, "seqs_derep.log")

        dereplicate_seqs(seqs_fp=seqs_fp,
                         output_fp=output_fp)
        self.assertTrue(isfile(output_fp))
        self.assertTrue(isfile(log_fp))

        exp = [("seq1;size=3;",
                "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAAAGCGTCCG"),
               ("seq6;size=2;",
                "CTGCAAGGCTAGGGGGCGGGAGAGGCGGGTGGTACTTGAGGGGAGAAT")]

        with open(output_fp, 'U') as out_f:
            act = [item for item in parse_fasta(out_f)]

        self.assertEqual(act, exp)
开发者ID:RNAer,项目名称:deblur,代码行数:33,代码来源:test_workflow.py


示例17: getResult

    def getResult(self, aln_path, *args, **kwargs):
        """Returns alignment from sequences.

        Currently does not allow parameter tuning of program and uses
        default parameters -- this is bad and should be fixed.

        #TODO: allow command-line access to important aln params.
        """
        module = self.Params['Module']
        # standard qiime says we just consider the first word as the unique ID
        # the rest of the defline of the fasta alignment often doesn't match
        # the otu names in the otu table
        with open(aln_path) as aln_f:
            seqs = Alignment.from_fasta_records(
                parse_fasta(aln_f, label_to_name=lambda x: x.split()[0]),
                DNA)
        # This ugly little line of code lets us pass a skbio Alignment when a
        # a cogent alignment is expected.
        seqs.getIntMap = seqs.int_map
        result = module.build_tree_from_alignment(seqs, moltype=DNA_cogent)

        try:
            root_method = kwargs['root_method']
            if root_method == 'midpoint':
                result = root_midpt(result)
            elif root_method == 'tree_method_default':
                pass
        except KeyError:
            pass
        return result
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:30,代码来源:make_phylogeny.py


示例18: setUp

 def setUp(self):
     
     # create a list of files to cleanup
     self._paths_to_clean_up = []
     self._dirs_to_clean_up = []
     
     # load query seqs
     self.seqs = Alignment(parse_fasta(QUERY_SEQS.split()))
     
     # generate temp filename
     tmp_dir='/tmp'
     self.outfile = get_tmp_filename(tmp_dir)
     
     # create and write out reference sequence file
     self.outfasta=splitext(self.outfile)[0]+'.fasta'
     fastaout=open(self.outfasta,'w')
     fastaout.write(REF_SEQS)
     fastaout.close()
     self._paths_to_clean_up.append(self.outfasta)
     
     # create and write out starting tree file
     self.outtree=splitext(self.outfile)[0]+'.tree'
     treeout=open(self.outtree,'w')
     treeout.write(REF_TREE)
     treeout.close()
     self._paths_to_clean_up.append(self.outtree)
开发者ID:ElDeveloper,项目名称:brokit,代码行数:26,代码来源:test_parsinsert.py


示例19: _generate_training_files

    def _generate_training_files(self):
        """Returns a tuple of file objects suitable for passing to the
        RdpTrainer application controller.
        """
        tmp_dir = get_qiime_temp_dir()
        training_set = RdpTrainingSet()
        reference_seqs_file = open(self.Params['reference_sequences_fp'], 'U')
        id_to_taxonomy_file = open(self.Params['id_to_taxonomy_fp'], 'U')

        for seq_id, seq in parse_fasta(reference_seqs_file):
            training_set.add_sequence(seq_id, seq)

        for line in id_to_taxonomy_file:
            seq_id, lineage_str = map(strip, line.split('\t'))
            training_set.add_lineage(seq_id, lineage_str)

        training_set.dereplicate_taxa()

        rdp_taxonomy_file = NamedTemporaryFile(
            prefix='RdpTaxonAssigner_taxonomy_', suffix='.txt', dir=tmp_dir)
        rdp_taxonomy_file.write(training_set.get_rdp_taxonomy())
        rdp_taxonomy_file.seek(0)

        rdp_training_seqs_file = NamedTemporaryFile(
            prefix='RdpTaxonAssigner_training_seqs_', suffix='.fasta',
            dir=tmp_dir)
        for rdp_id, seq in training_set.get_training_seqs():
            rdp_training_seqs_file.write('>%s\n%s\n' % (rdp_id, seq))
        rdp_training_seqs_file.seek(0)

        self._training_set = training_set

        return rdp_taxonomy_file, rdp_training_seqs_file
开发者ID:elinekl,项目名称:qiime,代码行数:33,代码来源:assign_taxonomy.py


示例20: filter_fasta

def filter_fasta(input_seqs_f, output_seqs_f, seqs_to_keep, negate=False,
                 seqid_f=None):
    """ Write filtered input_seqs to output_seqs_f which contains only seqs_to_keep

        input_seqs can be the output of parse_fasta or parse_fastq
    """
    if seqid_f is None:
        seqs_to_keep_lookup = {}.fromkeys([seq_id.split()[0]
                                           for seq_id in seqs_to_keep])

        # Define a function based on the value of negate
        if not negate:
            def keep_seq(seq_id):
                return seq_id.split()[0] in seqs_to_keep_lookup
        else:
            def keep_seq(seq_id):
                return seq_id.split()[0] not in seqs_to_keep_lookup

    else:
        if not negate:
            keep_seq = seqid_f
        else:
            keep_seq = lambda x: not seqid_f(x)

    for seq_id, seq in parse_fasta(input_seqs_f):
        if keep_seq(seq_id):
            output_seqs_f.write('>%s\n%s\n' % (seq_id, seq))
    output_seqs_f.close()
开发者ID:Springbudder,项目名称:qiime,代码行数:28,代码来源:filter.py



注:本文中的skbio.parse.sequences.parse_fasta函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python composition.ancom函数代码示例发布时间:2022-05-27
下一篇:
Python metadata.IntervalMetadata类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap