• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python parse.parse_rarefaction函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中qiime.parse.parse_rarefaction函数的典型用法代码示例。如果您正苦于以下问题:Python parse_rarefaction函数的具体用法?Python parse_rarefaction怎么用?Python parse_rarefaction使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了parse_rarefaction函数的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: setUp

 def setUp(self):
     """define data for tests"""
     # small amount of redundancy here since setUp called at each test, but
     # limited tests means little concern
     self.rarefaction_file = \
         ['\tsequences per sample\titeration\tSam1\tSam2\tSam3\tSam4\tSam5\tSam6',
         'rare480.txt\t480\t0\t2.52800404052\t2.3614611247\t2.59867416108\t3.56970811181\t3.44800265895\t1.9433560517',
         'rare480.txt\t480\t1\t2.06375457238\t3.32293450758\t3.4189896645\t3.35312890712\t3.10763472113\t2.78155253726',
         'rare480.txt\t480\t2\t2.44788730109\t3.42464996459\t2.24541787295\t2.491419231\t2.60106690099\t5.40828403581',
         'rare480.txt\t480\t3\t5.1846120153\t3.67022675065\t1.54879964908\t2.8055801405\t4.3086171269\t3.87761898868',
         'rare910.txt\t910\t0\t2.67580703282\t1.72405794627\t2.15312863498\t2.4300954476\t3.7753658185\t3.36198860355',
         'rare910.txt\t910\t1\t4.10226466956\t2.24587945345\t3.02932964779\t2.98218513619\t3.73316846484\t1.85879566537',
         'rare910.txt\t910\t2\t1.65800670063\t2.42281993323\t3.02400997565\t3.271608097\t2.99265263795\t3.68802382515',
         'rare910.txt\t910\t3\t2.50976021964\t2.43976761056\t3.32119905587\t2.47487750248\t1.901408525\t3.42883742207',
         'rare500.txt\t500\t0\t3.42225118215\tn/a\t4.03758268426\t2.35344629448\t2.26690085385\t1.80164570104',
         'rare850.txt\t850\t0\t4.2389858006\t4.97464230229\t1.53451087057\t3.35785261181\t1.91658777533\t2.32583475424',
         'rare850.txt\t850\t1\t2.81445883827\tn/a\t2.54767461948\t1.38835207925\t3.70018890199\t1.57359105209',
         'rare850.txt\t850\t2\t2.9340493412\t3.95897035158\tn/a\t2.07761860166\t3.42393336685\t2.6927305603']
     self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
     self.mapping_file = \
         ['#SampleID\tDose\tLinkerPrimerSequence\tWeight\tTTD\tDescription',
         '#Comment Line',
         'Sam1\t1xDose\tATCG\tHigh\t31\ts1_desc',
         'Sam2\t1xDose\tACCG\tLow\t67\ts2_desc',
         'Sam3\t2xDose\tACGT\tMed\t21\ts3_desc',
         'Sam4\t2xDose\tAACG\tLow\t55\ts4_desc',
         'Sam5\tControl\tCGTC\tLow\t67\ts5_desc',
         'Sam6\t1xDose\tACCT\tLow\t55\ts6_desc']
     self.mapping_data = parse_mapping_file_to_dict(self.mapping_file)[0]
开发者ID:Jorge-C,项目名称:qiime,代码行数:29,代码来源:test_compare_alpha_diversity.py


示例2: generate_alpha_diversity_boxplots

def generate_alpha_diversity_boxplots(rarefaction_lines,
                                      mapping_lines,
                                      category,
                                      depth=None):
    rarefaction_data = parse_rarefaction(rarefaction_lines)
    
    category_value_to_sample_ids = \
     get_category_value_to_sample_ids(mapping_lines,
                                      category)
    
    per_sample_average_diversities = \
     get_per_sample_average_diversities(rarefaction_data,
                                        category,
                                        depth)
    
    per_category_value_average_diversities = \
     collapse_sample_diversities_by_category_value(category_value_to_sample_ids,
                                                   per_sample_average_diversities)
    
    # sort the data alphabetically
    sorted_per_category_value_average_diversities = \
     per_category_value_average_diversities.items()
    sorted_per_category_value_average_diversities.sort()
    
    x_tick_labels = []
    distributions = []
    for cat, avg_diversities in sorted_per_category_value_average_diversities:
        x_tick_labels.append("%s (n=%d)" % (cat, len(avg_diversities)))
        distributions.append(avg_diversities)
    
    return generate_box_plots(distributions,
                              x_tick_labels=x_tick_labels)
开发者ID:icaro-henrique,项目名称:qiime,代码行数:32,代码来源:compare_alpha_diversity.py


示例3: compare_alpha_diversities

def compare_alpha_diversities(rarefaction_lines, mapping_lines, 
                              category, depth):
    """compares alpha diversities
    
    inputs:
        rarefaction_file - rarefaction file which gives scores for 
        various rarefactions and depths
        
        mapping_file - file that has ID's and categories that the ID's
        fall in
        
        category - the category to be compared, is a string
        
        depth - the depth of the rarefaction_file to use, is an integer
    
    outputs:
        results - a nested dictionary which specifies the category as
        the top level key, and as its value, dictionaries which give the
        results of the t_two_sample test for all unique pairs of values
        in the specified category
    
    """
     
    rarefaction_data = parse_rarefaction(rarefaction_lines)
    mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
    value_pairs = make_value_pairs_from_category(mapping_data, category)
    
    category_values_Ids = make_category_values_Id_dict(mapping_data, 
                                                       category)
    
    SampleId_pairs = map_category_value_pairs_to_Ids(value_pairs,
                                                    category_values_Ids)
    
    map_from_Id_to_col = make_SampleIds_rarefaction_columns_dict(
                                                       rarefaction_data)
    
    reduced_rarefaction_mtx = extract_rarefaction_scores_at_depth(depth,
                                                       rarefaction_data)
    
    results = {category:{}}
    
    for pair in range(len(SampleId_pairs)):
        i=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][0],
                           reduced_rarefaction_mtx, map_from_Id_to_col))
        
        j=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][1],
                           reduced_rarefaction_mtx, map_from_Id_to_col))
        
        results[category][(str(value_pairs[pair][0]),
                           str(value_pairs[pair][1]))] =\
                          t_two_sample(i,j)
    
    return results
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:53,代码来源:compare_alpha_diversity.py


示例4: mean_alpha

def mean_alpha(alpha_dict, depth):
    """mean collated alpha diversity data at a given depth

    Input:
    alpha_dict: dictionary where the values are the lines of a collated alpha
    diversity data files and the keys are the names of each of these files with
    no extension, this name is usually the metric used to compute the alpha
    diversity.
    depth: selected depth to mean the computed alpha diversity values for the
    alpha_dict data.

    Output:
    metrics: list of metric names i. e. the name of each collated alpha div file
    sample_ids: list of sample identifiers represented
    data: a list of lists with the mean of alpha diversity data at a given
    depth for the different metrics, each column is a different metric.
    """

    assert type(alpha_dict) == dict, "Input data must be a dictionary"
    assert depth >= 0 and type(depth) == int, "The spcified depth must be a "+\
        "positive integer."

    metrics = []
    sample_ids = []
    data = []

    for key, value in alpha_dict.iteritems():
        metrics.append('{0}_even_{1}'.format(key, depth))
        identifiers, _, _, rarefaction_data = parse_rarefaction(value)

        # check all the files have the same sample ids in the same order
        if sample_ids:
            if not sample_ids == identifiers[3:]:
                raise (ValueError, "Non-matching sample ids were found in the "
                    "collated alpha diversity files. Make sure all the files "
                    "contain data for the same samples.")
        else:
            sample_ids = identifiers[3:]

        # find all the data at the desired depth and get the mean values, remove
        # the first two elements ([depth, iteration]) as those are not needed
        data.append(array([row[2:] for row in rarefaction_data if\
            row[0] == depth]).mean(axis=0))

    # transpose the data to match the formatting of non-collated alpha div data
    data = array(data).T.tolist()

    return metrics, sample_ids, data
开发者ID:cmhill,项目名称:qiime,代码行数:48,代码来源:add_alpha_to_mapping_file.py


示例5: _collect_alpha_diversity_boxplot_data

def _collect_alpha_diversity_boxplot_data(rarefaction_f, metadata_map,
                                          rarefaction_depth, split_category,
                                          comparison_category):
    """Pulls data from rarefaction file based on supplied categories."""
    # Pull out rarefaction data for the specified depth.
    rarefaction = parse_rarefaction(rarefaction_f)

    # First three vals are part of the header, so ignore them.
    sample_ids = rarefaction[0][3:]

    # First two vals are depth and iteration number, so ignore them.
    rarefaction_data = [row[2:] for row in rarefaction[3]
                        if row[0] == rarefaction_depth]

    if not rarefaction_data:
        raise ValueError("Rarefaction depth of %d could not be found in "
                         "collated alpha diversity file." % rarefaction_depth)

    # Build up dict mapping (body site, [self|other]) -> distribution.
    plot_data = defaultdict(list)
    for row in rarefaction_data:
        assert len(sample_ids) == len(row)
        for sample_id, adiv_val in zip(sample_ids, row):
            if not isnan(adiv_val):
                split_cat_val = metadata_map.getCategoryValue(sample_id,
                                                              split_category)
                comp_cat_val = metadata_map.getCategoryValue(sample_id,
                        comparison_category)

                plot_data[split_cat_val, comp_cat_val].append(adiv_val)

    # Format tick labels as '<body site> (self|other)' and sort alphabetically.
    plot_data = sorted(map(lambda e: ('%s (%s)' %
                                      (e[0][0], e[0][1]), e[1]),
                           plot_data.items()))
    x_tick_labels = []
    dists = []
    for label, dist in plot_data:
        x_tick_labels.append(label)
        dists.append(dist)

    return x_tick_labels, dists
开发者ID:biocore,项目名称:my-microbes,代码行数:42,代码来源:util.py


示例6: setUp

    def setUp(self):
        """define some top-level data"""

        self.data={}
        self.data['xaxis']=[10.0]
        self.sample_dict={'Sample1':{10.00: [1.3276140000000001]}}
        self.data['yvals']={'Sample1': [1.3276140000000001]}
        self.data['err']={'Sample1': [.1]}
        self.xmax=140
        self.ymax=20
        self.std_type='stddev'
        self.ops=['Sample1']
        self.mapping_category='SampleID'
        self.imagetype='png'
        self.resolution=70
        self.mapping_lookup={'SampleID-Sample1':'col_0_row_0'}
        self.data['map']=[['SampleID','Day'],['Sample1','Day1']]
        self.color_prefs={'SampleID': {'column': 'SampleID', 'color': \
                          {'Sample1': '#ff0000'}}}
        self.groups={'Sample1':['Sample1']}
        self.background_color='black'
        self.label_color='white'
        self.labelname='SampleID'
        self.rare_data={'color': {'Sample1': '#ff0000'}, \
            'series': {'Sample1': [2.0515300000000001],}, \
             'headers': ['test.txt','SampleID'], 'xaxis': [10.0], \
             'error': {'Sample1': [0.0]}, 'options': ['Sample1']}
        self.fpath='/tmp/'
        self.output_dir='/tmp/'
        self.metric_name='test'
        self._paths_to_clean_up = []
        self._folders_to_cleanup = []
        self.rarefaction_file_data=[[10.0, 0.0, 1.0], [10.0, 1.0, 3.0]]
        d = {'redtowhite3_0':'#7fff00','redtowhite3_1':'#7fff00'}
        self.data_colors = color_dict_to_objects(d)
        self.colors={'Sample1':'redtowhite3_0','Sample2':'redtowhite3_1'}
        self.colors2={'Sample1':'redtowhite3_0'}
        self.mappingfile = ['#SampleID\tSex\tAge',
                            '123\tF\t32',
                            '234\tM\t30',
                            '345\tM\t32']
        #self.p_mappingfile = parse_mapping_file(self.mappingfile,\
        #                                            strip_quotes=True)
        self.rarefactionfile=[\
                    '\tsequences per sample\titeration\t123\t234\t345',
                    'rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996',
                    'rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055',
                    'rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725',
                    'rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474',
                    'rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928',
                    'rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642']
                    
        self.rares = {'test.txt': (['', 'sequences per sample', 'iteration', \
                      'Sample1'], [], ['rare1.txt', 'rare2.txt'], \
                      [[10.0, 2.0, 7.0, 7.0, 9.0], [10.0, 2.0, 7.0, 7.0, 9.0]])}
        self.col_headers, self.comments, self.rarefaction_fns, \
        self.rarefaction_data = parse_rarefaction(self.rarefactionfile)
        self.matrix, self.seqs_per_samp, self.sampleIDs = \
        get_rarefaction_data(self.rarefaction_data, self.col_headers)
        self.ave_seqs_per_sample1 = {'Sample1':[2.03172,9.4417849999999994,\
        12.508435]}
        self.ave_seqs_per_sample = {'123':[2.03172,9.4417849999999994,\
        12.508435],'234':[0.42876999999999998,0.42876999999999998,\
        0.42876999999999998],'345':[2.255255,9.625995,11.58785]}
        self.collapsed_ser_sex = {'M':[1.3420125000000001,5.0273824999999999,\
        6.0083099999999998], 'F':[2.03172,9.4417849999999994,12.508435]}
        self.err_ser_sex = {'M':[0.91324250000000007,4.5986124999999998,\
        5.5795399999999997],'F':[0.0,0.0,0.0]}
        self.rarefaction_legend_mat_init={'test': {'SampleID': {}}}
        self.col_headers2=['', 'sequences per sample', 'iteration', 'Sample1', \
                           'Sample2']
                           
        self.rarefaction_data_mat={'SampleID': {'Sample1': {'test': {'ave': ['     7.000'], 'err': ['       nan']}}}}
       
        self.rarefaction_legend_mat={'test': {'samples': {'Sample1': {'color': '#ff0000', 'link': 'html_plots/testcol_0_row_0.png'}}, 'groups': {'SampleID': {'Sample1': {'groupcolor': '#ff0000', 'groupsamples': ['Sample1']}}}}}
        self.exp_err_series_ave={'M': [1.571915, 6.49885, 8.1750183333333339]}
开发者ID:DDomogala3,项目名称:qiime,代码行数:76,代码来源:test_make_rarefaction_plots.py


示例7: test_parse_rarefaction

 def test_parse_rarefaction(self):
     self.rarefactionfile = ['\tsequences per sample\titeration\t123\t234\t345',
                             'rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996',
                             'rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055',
                             'rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725',
                             'rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474',
                             'rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928',
                             'rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642']
     
     self.col_headers = ['', 'sequences per sample', 'iteration', '123', '234', '345']
     self.comments = []
     self.rarefaction_fns = ['rare10.txt', 'rare10.txt', 'rare310.txt', 'rare310.txt', 'rare610.txt', 'rare610.txt']
     self.rarefaction_data = [[10.0, 0.0, 1.9918100000000001, 0.42876999999999998, 2.1399599999999999], [10.0, 1.0, 2.0716299999999999, 0.42876999999999998, 2.3705500000000002], [310.0, 0.0, 8.8311499999999992, 0.42876999999999998, 11.007250000000001], [310.0, 1.0, 10.05242, 0.42876999999999998, 8.2447400000000002], [610.0, 0.0, 12.030670000000001, 0.42876999999999998, 11.58928], [610.0, 1.0, 12.9862, 0.42876999999999998, 11.58642]]
     
     test_col_headers, test_comments, test_rarefaction_fns, test_rarefaction_data = parse_rarefaction(self.rarefactionfile)
     self.assertEqual(test_col_headers, self.col_headers)
     self.assertEqual(test_comments, self.comments)
     self.assertEqual(test_rarefaction_fns, self.rarefaction_fns)
     self.assertEqual(test_rarefaction_data, self.rarefaction_data)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:19,代码来源:test_parse.py


示例8: compare_alpha_diversities

def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, 
    depth=None, test_type='nonparametric', num_permutations=999):
    """Compares alpha diversity values for differences per category treatment.
    Notes: 
     Returns a defaultdict which as keys has the pairs of treatments being 
     compared, and as values, lists of (pval,tval) tuples for each comparison at
     for a given iteration.     
    Inputs:
     rarefaction_lines - list of lines, result of multiple rarefactions.
     mapping_lines - list of lines, mapping file lines. 
     category - str, the category to be compared, eg 'Treatment' or 'Age'.
     depth - int, depth of the rarefaction file to use. if None, then will use 
     the deepest available in the file. 
     test_type - str, the type of t-test to perform. Must be either
     'parametric' or 'nonparametric'.
     num_permutations - int, the number of Monte Carlo permutations to use if
     test_type is 'nonparametric'.    
    """
    if test_type == 'nonparametric' and num_permutations < 1:
        raise ValueError("Invalid number of permutations: %d. Must be greater "
                         "than zero." % num_permutations)
    
    rarefaction_data = parse_rarefaction(rarefaction_lines)
    mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
    # samid_pairs, treatment_pairs are in the same order
    samid_pairs, treatment_pairs = sampleId_pairs(mapping_data, 
        rarefaction_data, category)
    
    # extract only rows of the rarefaction data that are at the given depth
    # if depth is not given default to the deepest rarefaction available
    # rarefaction file is not guaranteed to be in order of rarefaction depth
    if depth == None:
        depth = array(rarefaction_data[3])[:,0].max()

    rare_mat = array([row for row in rarefaction_data[3] if row[0]==depth])
    
    # Average each col of the rarefaction mtx. Computing t test on averages over
    # all iterations. Avoids more comps which kills signifigance. 
    rare_mat = (rare_mat.sum(0)/rare_mat.shape[0])[2:] #remove depth,iter cols
    sids = rarefaction_data[0][3:] # 0-2 are header strings
    
    ttest_results = {}
    for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
        # if there is only 1 sample for each treatment in a comparison, and mc
        # using mc method, will error (e.g. mc_t_two_sample([1],[1]).
        if len(sid_pair[0])==1 and len(sid_pair[1])==1:
            ttest_results[treatment_pair]= (None,None)
        else:
            pair0_indices = [sids.index(i) for i in sid_pair[0]]
            pair1_indices = [sids.index(i) for i in sid_pair[1]]
            i = rare_mat.take(pair0_indices)
            j = rare_mat.take(pair1_indices)
            # found discussion of how to quickly check an array for nan here:
            # http://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy
            if isnan(np_min(i)) or isnan(np_min(j)):
                ttest_results[treatment_pair]= (None,None)
                continue
            if test_type == 'parametric':
                obs_t, p_val = t_two_sample(i,j)
            elif test_type == 'nonparametric':
                obs_t, _, _, p_val = mc_t_two_sample(i,j, 
                    permutations=num_permutations)
                if p_val != None: 
                    p_val = float(format_p_value_for_num_iters(p_val, 
                        num_iters=num_permutations))
                elif p_val ==  None: #None will error in format_p_val
                    obs_t, p_val = None, None
            else:
                raise ValueError("Invalid test type '%s'." % test_type)
            ttest_results[treatment_pair]= (obs_t,p_val)
    # create dict of average alpha diversity values
    alphadiv_avgs = {}
    for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
        # calculate the alpha diversity average, std vals. choosing only first
        # treatment pair doesn't guarantees full covering, must look at both
        for sid_list, treatment_str in zip(sid_pair, treatment_pair):
            # check if already computed and added
            if not treatment_str in alphadiv_avgs.keys():
                alphadiv_vals = \
                    rare_mat.take([sids.index(i) for i in sid_list])
                ad_mean = alphadiv_vals.mean()
                ad_std = alphadiv_vals.std()
                alphadiv_avgs[treatment_str] = (ad_mean, ad_std) 
    return ttest_results, alphadiv_avgs
开发者ID:icaro-henrique,项目名称:qiime,代码行数:84,代码来源:compare_alpha_diversity.py


示例9: setUp

    def setUp(self):
        """define some top-level data"""

        self.data = {}
        self.data["xaxis"] = [10.0]
        self.sample_dict = {"Sample1": {10.00: [1.3276140000000001]}}
        self.data["yvals"] = {"Sample1": [1.3276140000000001]}
        self.data["err"] = {"Sample1": [0.1]}
        self.xmax = 140
        self.ymax = 20
        self.std_type = "stddev"
        self.ops = ["Sample1"]
        self.mapping_category = "SampleID"
        self.imagetype = "png"
        self.resolution = 70
        self.mapping_lookup = {"SampleID-Sample1": "col_0_row_0"}
        self.data["map"] = [["SampleID", "Day"], ["Sample1", "Day1"]]
        self.color_prefs = {"SampleID": {"column": "SampleID", "color": {"Sample1": "#ff0000"}}}
        self.groups = {"Sample1": ["Sample1"]}
        self.background_color = "black"
        self.label_color = "white"
        self.labelname = "SampleID"
        self.rare_data = {
            "color": {"Sample1": "#ff0000"},
            "series": {"Sample1": [2.0515300000000001]},
            "headers": ["test.txt", "SampleID"],
            "xaxis": [10.0],
            "error": {"Sample1": [0.0]},
            "options": ["Sample1"],
        }
        self.fpath = "/tmp/"
        self.output_dir = "/tmp/"
        self.metric_name = "test"
        self._paths_to_clean_up = []
        self._folders_to_cleanup = []
        self.rarefaction_file_data = [[10.0, 0.0, 1.0], [10.0, 1.0, 3.0]]
        d = {"redtowhite3_0": "#7fff00", "redtowhite3_1": "#7fff00"}
        self.data_colors = color_dict_to_objects(d)
        self.colors = {"Sample1": "redtowhite3_0", "Sample2": "redtowhite3_1"}
        self.colors2 = {"Sample1": "redtowhite3_0"}
        self.mappingfile = ["#SampleID\tSex\tAge", "123\tF\t32", "234\tM\t30", "345\tM\t32"]
        # self.p_mappingfile = parse_mapping_file(self.mappingfile,\
        #                                            strip_quotes=True)
        self.rarefactionfile = [
            "\tsequences per sample\titeration\t123\t234\t345",
            "rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996",
            "rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055",
            "rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725",
            "rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474",
            "rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928",
            "rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642",
        ]

        self.rares = {
            "test.txt": (
                ["", "sequences per sample", "iteration", "Sample1"],
                [],
                ["rare1.txt", "rare2.txt"],
                [[10.0, 2.0, 7.0, 7.0, 9.0], [10.0, 2.0, 7.0, 7.0, 9.0]],
            )
        }
        self.col_headers, self.comments, self.rarefaction_fns, self.rarefaction_data = parse_rarefaction(
            self.rarefactionfile
        )
        self.matrix, self.seqs_per_samp, self.sampleIDs = get_rarefaction_data(self.rarefaction_data, self.col_headers)
        self.ave_seqs_per_sample1 = {"Sample1": [2.03172, 9.4417849999999994, 12.508435]}
        self.ave_seqs_per_sample = {
            "123": [2.03172, 9.4417849999999994, 12.508435],
            "234": [0.42876999999999998, 0.42876999999999998, 0.42876999999999998],
            "345": [2.255255, 9.625995, 11.58785],
        }
        self.collapsed_ser_sex = {
            "M": [1.3420125000000001, 5.0273824999999999, 6.0083099999999998],
            "F": [2.03172, 9.4417849999999994, 12.508435],
        }
        self.err_ser_sex = {"M": [0.91324250000000007, 4.5986124999999998, 5.5795399999999997], "F": [0.0, 0.0, 0.0]}
        self.rarefaction_legend_mat_init = {"test": {"SampleID": {}}}
        self.col_headers2 = ["", "sequences per sample", "iteration", "Sample1", "Sample2"]

        self.rarefaction_data_mat = {"SampleID": {"Sample1": {"test": {"ave": ["     7.000"], "err": ["       nan"]}}}}

        self.rarefaction_legend_mat = {
            "test": {
                "samples": {"Sample1": {"color": "#ff0000", "link": "html_plots/testcol_0_row_0.png"}},
                "groups": {"SampleID": {"Sample1": {"groupcolor": "#ff0000", "groupsamples": ["Sample1"]}}},
            }
        }
        self.exp_err_series_ave = {"M": [1.571915, 6.49885, 8.1750183333333339]}
开发者ID:rob-knight,项目名称:qiime,代码行数:88,代码来源:test_make_rarefaction_plots.py


示例10: setUp

 def setUp(self):
     """define data for tests"""
     self.rarefaction_file = \
      ['\tsequences per sample\titeration\t123\t234\t345\t456',
       'rare10.txt\t10\t0\t1.99181\t5.42877\t2.13996\t0.002322',
       'rare10.txt\t10\t1\t2.07163\t1.42877\t2.37055\t0.01219',
       'rare310.txt\t310\t0\t8.83115\t6.42877\t11.00725\t0.18233',
       'rare310.txt\t310\t1\t10.05242\t9.42877\t8.24474\t0.99229',
       'rare810.txt\t810\t0\t12.03067\tn/a\t11.58928\t0.8993',
       'rare910.txt\t910\t1\t12.9862\t2.42877\t11.58642\t1.22563']
     
     self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
     
     self.mapping_file = \
     ['#SampleID\tTreatment\tLinker'+\
      'PrimerSequence\tDose\tTTD\tDescription',
      '#Comment Line',
      '123\tAAAA\tBBBB\tHigh\t31\tM_ID_123',
      '234\tCCCC\tDDDD\tLow\t67\tM_ID_234',
      '345\tAAAA\tFFFF\tMed\t21\tM_ID_345',
      '456\tAAAA\tGGGG\tLow\t67\tM_ID_456'
      ]
     
     self.mapping_data = \
      parse_mapping_file_to_dict(self.mapping_file)[0]
     self.value_pairs_Dose = \
      [('Low','Med'),('Low','High'),('Med','High')]                         
     self.value_pairs_TTD = \
      [('67', '21'), ('67', '31'), ('21', '31')]
     self.value_pairs_Treatment = \
      [('CCCC', 'AAAA')]
     self.cat_val_Dose = \
      {'High': ['123'], 'Low': ['234', '456'], 'Med': ['345']}
     self.cat_val_TTD = \
      {'21': ['345'], '31': ['123'], '67': ['234', '456']}
     self.cat_val_Treatment = \
      {'AAAA': ['345', '123', '456'], 'CCCC': ['234']}
     self.Id_pairs_Dose = \
      [(['234', '456'], ['345']), (['234', '456'], ['123']),
       (['345'], ['123'])]
     self.Id_pairs_TTD = \
      [(['234', '456'], ['345']), (['234', '456'], ['123']),
       (['345'], ['123'])]
     
     self.Id_pairs_Treatment = \
      [(['234'], ['345', '123', '456'])]
     
     self.rarefaction_cols_dict = \
      {'123': 0, '234': 1, '345': 2, '456':3}
    
     self.extracted_mtx_10 = \
      array([[ 1.99181,  5.42877,  2.13996, 0.002322],
            [ 2.07163,  1.42877,  2.37055, 0.01219]])
     
     self.extracted_mtx_310 = \
      array([[  8.83115,   6.42877,  11.00725, 0.18233],
            [ 10.05242,   9.42877,   8.24474, 0.99229]])
     
     self.extracted_mtx_910 = \
      array([[ 12.9862 ,   2.42877,  11.58642, 1.22563]])
     
     self.sample_pair1 = \
      (['234'], ['345', '123'])
         
     self.rarefaction_mtx_for_sample_pair1_0 = \
      array([[ 5.42877],
            [ 1.42877]])
     
     self.rarefaction_mtx_for_sample_pair1_1 = \
      array([[ 2.13996,  1.99181],
            [ 2.37055,  2.07163]])
     
     self.compared_alpha_diversities_TTD = {'TTD': {('67', '21'):
         (-0.27929839680103463, 0.79386220041241184), ('21', '31'):
         (1.8321466933860993, 0.20839398129924847), ('67', '31'):
         (-0.16318504125427058, 0.87828549279958279)}}
开发者ID:DDomogala3,项目名称:qiime,代码行数:76,代码来源:test_compare_alpha_diversity.py


示例11: mean_alpha

def mean_alpha(alpha_dict, depth):
    """mean collated alpha diversity data at a given depth

    Input:
    alpha_dict: dictionary where the values are the lines of a collated alpha
    diversity data files and the keys are the names of each of these files with
    no extension, this name is usually the metric used to compute the alpha
    diversity.
    depth: selected depth to mean the computed alpha diversity values for the
    alpha_dict data. If None is passed, the highest depth will be used.

    Output:
    metrics: list of metric names i. e. the name of each collated alpha div file
    sample_ids: list of sample identifiers represented
    data: a list of lists with the mean of alpha diversity data at a given
    depth for the different metrics, each column is a different metric.
    """

    assert type(alpha_dict) == dict, "Input data must be a dictionary"
    assert depth == None or (depth >= 0 and type(depth) == int), "The "+\
        "specified depth must be a positive integer."

    metrics = []
    sample_ids = []
    data = []

    for key, value in alpha_dict.iteritems():
        identifiers, _, _, rarefaction_data = parse_rarefaction(value)

        # if depth is specified as None use the highest available, retrieve it
        # on a per file basis so you make sure the value exists for all files
        if depth == None:
            _depth = int(max([row[0] for row in rarefaction_data]))
        else:
            _depth = depth
        metrics.append('{0}_even_{1}'.format(key, _depth))

        # check there are elements with the desired rarefaction depth
        if sum([1 for row in rarefaction_data if row[0] == _depth]) == 0:
            # get a sorted list of strings with the available rarefaction depths
            available_rarefaction_depths = map(str, sorted(list(set([row[0] for
                row in rarefaction_data]))))
            raise ValueError, ("The depth %d does not exist in the collated "
                "alpha diversity file for the metric: %s. The available depths "
                "are: %s."%(_depth,key,', '.join(available_rarefaction_depths)))

        # check all the files have the same sample ids in the same order
        if sample_ids:
            if not sample_ids == identifiers[3:]:
                raise ValueError, ("Non-matching sample ids were found in the "
                    "collated alpha diversity files. Make sure all the files "
                    "contain data for the same samples.")
        else:
            sample_ids = identifiers[3:]

        # find all the data at the desired depth and get the mean values, remove
        # the first two elements ([depth, iteration]) as those are not needed
        data.append(array([row[2:] for row in rarefaction_data if\
            row[0] == _depth]).mean(axis=0))

    # transpose the data to match the formatting of non-collated alpha div data
    data = array(data).T.tolist()

    return metrics, sample_ids, data
开发者ID:EESI,项目名称:FizzyQIIME,代码行数:64,代码来源:add_alpha_to_mapping_file.py


示例12: main

def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)

    ops = {}
    input_dir = options.input_dir

    rares = {}
    if isdir(input_dir):
        rarenames = listdir(input_dir)
        rarenames = [r for r in rarenames if not r.startswith(".")]
        for r in rarenames:
            try:
                rarefl = open(path.join(input_dir, r), "U").readlines()
                rares[r] = parse_rarefaction(rarefl)
            except (IOError):
                option_parser.error("Problem with rarefaction file. %s" % exc_info()[1])
                exit(0)
    else:
        try:
            input_file = input_dir.split(",")
            for i in range(len(input_file)):
                input_path = split(input_file[i])[-1]
                rarefl = open(input_file[i], "U").readlines()
                rares[input_path] = parse_rarefaction(rarefl)
        except (IOError):
            option_parser.error("Problem with rarefaction file. %s" % exc_info()[1])
            exit(0)
    if options.imagetype not in ["png", "svg", "pdf"]:
        option_parser.error("Supplied extension not supported.")
        exit(0)
    else:
        imagetype = options.imagetype

    try:
        resolution = int(options.resolution)
    except (ValueError):
        option_parser.error("Inavlid resolution.")
        exit(0)

    # Get the command-line options.
    prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options(
        options
    )

    # output directory check
    if isinstance(options.output_dir, str) and options.output_dir != ".":
        if exists(options.output_dir):
            output_dir = options.output_dir
        else:
            try:
                create_dir(options.output_dir, False)
                output_dir = options.output_dir
            except (ValueError):
                option_parser.error("Could not create output directory.")
                exit(0)
    else:
        output_dir = get_random_directory_name()

    # Generate the plots and html text
    ymax = options.ymax
    suppress_webpage = options.suppress_html_output
    html_output = make_averages(
        prefs, data, background_color, label_color, rares, output_dir, resolution, imagetype, ymax, suppress_webpage
    )

    if html_output:
        # Write the html file.
        outfile = open(path.join(output_dir, "rarefaction_plots.html"), "w")
        outfile.write(html_output)
        outfile.close()
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:70,代码来源:make_rarefaction_plots.py


示例13: compare_alpha_diversities

def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth,
    test_type='nonparametric', num_permutations=999):
    """Compares alpha diversity values for differences per category treatment.
    Notes: 
     Returns a defaultdict which as keys has the pairs of treatments being 
     compared, and as values, lists of (pval,tval) tuples for each comparison at
     for a given iteration.     
    Inputs:
     rarefaction_lines - list of lines, result of multiple rarefactions.
     mapping_lines - list of lines, mapping file lines. 
     category - str, the category to be compared, eg 'Treatment' or 'Age'.
     depth - int, depth of the rarefaction file to use.
     test_type - str, the type of t-test to perform. Must be either
     'parametric' or 'nonparametric'.
     num_permutations - int, the number of Monte Carlo permutations to use if
     test_type is 'nonparametric'.    
    """
    if test_type == 'nonparametric' and num_permutations < 1:
        raise ValueError("Invalid number of permutations: %d. Must be greater "
                         "than zero." % num_permutations)
     
    rarefaction_data = parse_rarefaction(rarefaction_lines)
    mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
    # samid_pairs, treatment_pairs are in the same order
    samid_pairs, treatment_pairs = sampleId_pairs(mapping_data, 
        rarefaction_data, category)
    
    # extract only rows of the rarefaction data that are at the given depth
    rare_mat = array([row for row in rarefaction_data[3] if row[0]==depth])
    
    # Average each col of the rarefaction mtx. Computing t test on averages over
    # all iterations. Avoids more comps which kills signifigance. 
    rare_mat = (rare_mat.sum(0)/rare_mat.shape[0])[2:] #remove depth,iter cols
    sids = rarefaction_data[0][3:] # 0-2 are header strings
    results = {}
    for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
        # if there is only 1 sample for each treatment in a comparison, and mc
        # using mc method, will error (e.g. mc_t_two_sample([1],[1]).
        if len(sid_pair[0])==1 and len(sid_pair[1])==1:
            t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1])
            results[t_key]= (None,None)
        else:
            pair0_indices = [sids.index(i) for i in sid_pair[0]]
            pair1_indices = [sids.index(i) for i in sid_pair[1]]
            t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1])
            i = rare_mat.take(pair0_indices)
            j = rare_mat.take(pair1_indices)
            # found discussion of how to quickly check an array for nan here:
            # http://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy
            if isnan(np_min(i)) or isnan(np_min(j)):
                results[t_key]= (None,None)
                continue
            if test_type == 'parametric':
                obs_t, p_val = t_two_sample(i,j)
            elif test_type == 'nonparametric':
                obs_t, _, _, p_val = mc_t_two_sample(i,j, 
                    permutations=num_permutations)
                if p_val != None: 
                    p_val = float(format_p_value_for_num_iters(p_val, 
                        num_iters=num_permutations))
                elif p_val ==  None: #None will error in format_p_val
                    obs_t, p_val = None, None
            else:
                raise ValueError("Invalid test type '%s'." % test_type)
            results[t_key]= (obs_t,p_val)
    return results
开发者ID:binma,项目名称:qiime,代码行数:66,代码来源:compare_alpha_diversity.py


示例14: setUp

 def setUp(self):
     """define data for tests"""
     self.rarefaction_file = \
      ['\tsequences per sample\titeration\t123\t234\t345',
       'rare10.txt\t10\t0\t1.99181\t5.42877\t2.13996',
       'rare10.txt\t10\t1\t2.07163\t1.42877\t2.37055',
       'rare310.txt\t310\t0\t8.83115\t6.42877\t11.00725',
       'rare310.txt\t310\t1\t10.05242\t9.42877\t8.24474',
       'rare810.txt\t810\t0\t12.03067\tn/a\t11.58928',
       'rare910.txt\t910\t1\t12.9862\t2.42877\t11.58642']
     
     self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
     
     self.mapping_file = \
     ['#SampleID\tTreatment\tLinker'+\
      'PrimerSequence\tDose\tTTD\tDescription',
      '#Comment Line',
      '123\tAAAA\tBBBB\tHigh\t31\tM_ID_123',
      '234\tCCCC\tDDDD\tLow\t67\tM_ID_234',
      '345\tAAAA\tFFFF\tMed\t21\tM_ID_345']
     
     self.mapping_data = \
      parse_mapping_file_to_dict(self.mapping_file)[0]
     self.value_pairs_Dose = \
      [('Low','Med'),('Low','High'),('Med','High')]                         
     self.value_pairs_TTD = \
      [('67', '21'), ('67', '31'), ('21', '31')]
     self.value_pairs_Treatment = \
      [('CCCC', 'AAAA')]
     self.cat_val_Dose = \
      {'High': ['123'], 'Low': ['234'], 'Med': ['345']}
     self.cat_val_TTD = \
      {'21': ['345'], '31': ['123'], '67': ['234']}
     self.cat_val_Treatment = \
      {'AAAA': ['345', '123'], 'CCCC': ['234']}
     self.Id_pairs_Dose = \
      [(['234'], ['345']), (['234'], ['123']), (['345'], ['123'])]
     self.Id_pairs_TTD = \
      [(['234'], ['345']), (['234'], ['123']), (['345'], ['123'])]
     
     self.Id_pairs_Treatment = \
      [(['234'], ['345', '123'])]
     
     self.rarefaction_cols_dict = \
      {'123': 0, '234': 1, '345': 2}
    
     self.extracted_mtx_10 = \
      array([[ 1.99181,  5.42877,  2.13996],
            [ 2.07163,  1.42877,  2.37055]])
     
     self.extracted_mtx_310 = \
      array([[  8.83115,   6.42877,  11.00725],
            [ 10.05242,   9.42877,   8.24474]])
     
     self.extracted_mtx_910 = \
      array([[ 12.9862 ,   2.42877,  11.58642]])
     
     self.sample_pair1 = \
      (['234'], ['345', '123'])
         
     self.rarefaction_mtx_for_sample_pair1_0 = \
      array([[ 5.42877],
            [ 1.42877]])
     
     self.rarefaction_mtx_for_sample_pair1_1 = \
      array([[ 2.13996,  1.99181],
            [ 2.37055,  2.07163]])
     
     self.compared_alpha_diversities_TTD = \
      {'TTD':{('21','31'):(1.8321466933860993,0.20839398129924847),
      ('67', '21'): (0.58578495700890432, 0.61731739324369639),
      ('67', '31'): (0.69838596448703294, 0.55721515283248324)}}
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:72,代码来源:test_compare_alpha_diversities.py


示例15: compare_alpha_diversities

def compare_alpha_diversities(rarefaction_lines, mapping_lines, category,
                              depth=None, test_type='nonparametric', num_permutations=999):
    """Compares alpha diversity values for differences per category treatment.

    Notes:
     Returns a defaultdict which as keys has the pairs of treatments being
     compared, and as values, lists of (pval,tval) tuples for each comparison at
     for a given iteration.
    Inputs:
     rarefaction_lines - list of lines, result of multiple rarefactions.
     mapping_lines - list of lines, mapping file lines.
     category - str, the category to be compared, eg 'Treatment' or 'Age'.
     depth - int, depth of the rarefaction file to use. if None, then will use
     the deepest available in the file.
     test_type - str, the type of t-test to perform. Must be either
     'parametric' or 'nonparametric'.
     num_permutations - int, the number of Monte Carlo permutations to use if
     test_type is 'nonparametric'.
    """
    if test_type == 'nonparametric' and num_permutations < 1:
        raise ValueError("Invalid number of permutations: %d. Must be greater "
                         "than zero." % num_permutations)

    rarefaction_data = parse_rarefaction(rarefaction_lines)
    mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
    # samid_pairs, treatment_pairs are in the same order
    samid_pairs, treatment_pairs = sampleId_pairs(mapping_data,
                                                  rarefaction_data, category)

    ps_avg_div = get_per_sample_average_diversities(rarefaction_data, depth)

    ttest_results, ad_avgs = {}, {}
    for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
        # if there is only 1 sample for each treatment in a comparison, and mc
        # using mc method, will error (e.g. mc_t_two_sample([1],[1]).
        if len(sid_pair[0]) == 1 and len(sid_pair[1]) == 1:
            ttest_results[treatment_pair] = (None, None)
            # add alpha diversity averages and standard deviations. since their
            # is only a single sample if we are in this part of the loop, we can
            # just record the sample value as the avg and 0 as the std.
            ad_avgs[treatment_pair[0]] = (sid_pair[0][0], 0.)
            ad_avgs[treatment_pair[1]] = (sid_pair[1][0], 0.)
        else:
            i = array([ps_avg_div[x] for x in sid_pair[0]])
            j = array([ps_avg_div[x] for x in sid_ 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python rarefaction.RarefactionMaker类代码示例发布时间:2022-05-26
下一篇:
Python parse.parse_qiime_parameters函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap