本文整理汇总了Python中qiime.parse.parse_rarefaction函数的典型用法代码示例。如果您正苦于以下问题:Python parse_rarefaction函数的具体用法?Python parse_rarefaction怎么用?Python parse_rarefaction使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_rarefaction函数的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: setUp
def setUp(self):
"""define data for tests"""
# small amount of redundancy here since setUp called at each test, but
# limited tests means little concern
self.rarefaction_file = \
['\tsequences per sample\titeration\tSam1\tSam2\tSam3\tSam4\tSam5\tSam6',
'rare480.txt\t480\t0\t2.52800404052\t2.3614611247\t2.59867416108\t3.56970811181\t3.44800265895\t1.9433560517',
'rare480.txt\t480\t1\t2.06375457238\t3.32293450758\t3.4189896645\t3.35312890712\t3.10763472113\t2.78155253726',
'rare480.txt\t480\t2\t2.44788730109\t3.42464996459\t2.24541787295\t2.491419231\t2.60106690099\t5.40828403581',
'rare480.txt\t480\t3\t5.1846120153\t3.67022675065\t1.54879964908\t2.8055801405\t4.3086171269\t3.87761898868',
'rare910.txt\t910\t0\t2.67580703282\t1.72405794627\t2.15312863498\t2.4300954476\t3.7753658185\t3.36198860355',
'rare910.txt\t910\t1\t4.10226466956\t2.24587945345\t3.02932964779\t2.98218513619\t3.73316846484\t1.85879566537',
'rare910.txt\t910\t2\t1.65800670063\t2.42281993323\t3.02400997565\t3.271608097\t2.99265263795\t3.68802382515',
'rare910.txt\t910\t3\t2.50976021964\t2.43976761056\t3.32119905587\t2.47487750248\t1.901408525\t3.42883742207',
'rare500.txt\t500\t0\t3.42225118215\tn/a\t4.03758268426\t2.35344629448\t2.26690085385\t1.80164570104',
'rare850.txt\t850\t0\t4.2389858006\t4.97464230229\t1.53451087057\t3.35785261181\t1.91658777533\t2.32583475424',
'rare850.txt\t850\t1\t2.81445883827\tn/a\t2.54767461948\t1.38835207925\t3.70018890199\t1.57359105209',
'rare850.txt\t850\t2\t2.9340493412\t3.95897035158\tn/a\t2.07761860166\t3.42393336685\t2.6927305603']
self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
self.mapping_file = \
['#SampleID\tDose\tLinkerPrimerSequence\tWeight\tTTD\tDescription',
'#Comment Line',
'Sam1\t1xDose\tATCG\tHigh\t31\ts1_desc',
'Sam2\t1xDose\tACCG\tLow\t67\ts2_desc',
'Sam3\t2xDose\tACGT\tMed\t21\ts3_desc',
'Sam4\t2xDose\tAACG\tLow\t55\ts4_desc',
'Sam5\tControl\tCGTC\tLow\t67\ts5_desc',
'Sam6\t1xDose\tACCT\tLow\t55\ts6_desc']
self.mapping_data = parse_mapping_file_to_dict(self.mapping_file)[0]
开发者ID:Jorge-C,项目名称:qiime,代码行数:29,代码来源:test_compare_alpha_diversity.py
示例2: generate_alpha_diversity_boxplots
def generate_alpha_diversity_boxplots(rarefaction_lines,
mapping_lines,
category,
depth=None):
rarefaction_data = parse_rarefaction(rarefaction_lines)
category_value_to_sample_ids = \
get_category_value_to_sample_ids(mapping_lines,
category)
per_sample_average_diversities = \
get_per_sample_average_diversities(rarefaction_data,
category,
depth)
per_category_value_average_diversities = \
collapse_sample_diversities_by_category_value(category_value_to_sample_ids,
per_sample_average_diversities)
# sort the data alphabetically
sorted_per_category_value_average_diversities = \
per_category_value_average_diversities.items()
sorted_per_category_value_average_diversities.sort()
x_tick_labels = []
distributions = []
for cat, avg_diversities in sorted_per_category_value_average_diversities:
x_tick_labels.append("%s (n=%d)" % (cat, len(avg_diversities)))
distributions.append(avg_diversities)
return generate_box_plots(distributions,
x_tick_labels=x_tick_labels)
开发者ID:icaro-henrique,项目名称:qiime,代码行数:32,代码来源:compare_alpha_diversity.py
示例3: compare_alpha_diversities
def compare_alpha_diversities(rarefaction_lines, mapping_lines,
category, depth):
"""compares alpha diversities
inputs:
rarefaction_file - rarefaction file which gives scores for
various rarefactions and depths
mapping_file - file that has ID's and categories that the ID's
fall in
category - the category to be compared, is a string
depth - the depth of the rarefaction_file to use, is an integer
outputs:
results - a nested dictionary which specifies the category as
the top level key, and as its value, dictionaries which give the
results of the t_two_sample test for all unique pairs of values
in the specified category
"""
rarefaction_data = parse_rarefaction(rarefaction_lines)
mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
value_pairs = make_value_pairs_from_category(mapping_data, category)
category_values_Ids = make_category_values_Id_dict(mapping_data,
category)
SampleId_pairs = map_category_value_pairs_to_Ids(value_pairs,
category_values_Ids)
map_from_Id_to_col = make_SampleIds_rarefaction_columns_dict(
rarefaction_data)
reduced_rarefaction_mtx = extract_rarefaction_scores_at_depth(depth,
rarefaction_data)
results = {category:{}}
for pair in range(len(SampleId_pairs)):
i=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][0],
reduced_rarefaction_mtx, map_from_Id_to_col))
j=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][1],
reduced_rarefaction_mtx, map_from_Id_to_col))
results[category][(str(value_pairs[pair][0]),
str(value_pairs[pair][1]))] =\
t_two_sample(i,j)
return results
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:53,代码来源:compare_alpha_diversity.py
示例4: mean_alpha
def mean_alpha(alpha_dict, depth):
"""mean collated alpha diversity data at a given depth
Input:
alpha_dict: dictionary where the values are the lines of a collated alpha
diversity data files and the keys are the names of each of these files with
no extension, this name is usually the metric used to compute the alpha
diversity.
depth: selected depth to mean the computed alpha diversity values for the
alpha_dict data.
Output:
metrics: list of metric names i. e. the name of each collated alpha div file
sample_ids: list of sample identifiers represented
data: a list of lists with the mean of alpha diversity data at a given
depth for the different metrics, each column is a different metric.
"""
assert type(alpha_dict) == dict, "Input data must be a dictionary"
assert depth >= 0 and type(depth) == int, "The spcified depth must be a "+\
"positive integer."
metrics = []
sample_ids = []
data = []
for key, value in alpha_dict.iteritems():
metrics.append('{0}_even_{1}'.format(key, depth))
identifiers, _, _, rarefaction_data = parse_rarefaction(value)
# check all the files have the same sample ids in the same order
if sample_ids:
if not sample_ids == identifiers[3:]:
raise (ValueError, "Non-matching sample ids were found in the "
"collated alpha diversity files. Make sure all the files "
"contain data for the same samples.")
else:
sample_ids = identifiers[3:]
# find all the data at the desired depth and get the mean values, remove
# the first two elements ([depth, iteration]) as those are not needed
data.append(array([row[2:] for row in rarefaction_data if\
row[0] == depth]).mean(axis=0))
# transpose the data to match the formatting of non-collated alpha div data
data = array(data).T.tolist()
return metrics, sample_ids, data
开发者ID:cmhill,项目名称:qiime,代码行数:48,代码来源:add_alpha_to_mapping_file.py
示例5: _collect_alpha_diversity_boxplot_data
def _collect_alpha_diversity_boxplot_data(rarefaction_f, metadata_map,
rarefaction_depth, split_category,
comparison_category):
"""Pulls data from rarefaction file based on supplied categories."""
# Pull out rarefaction data for the specified depth.
rarefaction = parse_rarefaction(rarefaction_f)
# First three vals are part of the header, so ignore them.
sample_ids = rarefaction[0][3:]
# First two vals are depth and iteration number, so ignore them.
rarefaction_data = [row[2:] for row in rarefaction[3]
if row[0] == rarefaction_depth]
if not rarefaction_data:
raise ValueError("Rarefaction depth of %d could not be found in "
"collated alpha diversity file." % rarefaction_depth)
# Build up dict mapping (body site, [self|other]) -> distribution.
plot_data = defaultdict(list)
for row in rarefaction_data:
assert len(sample_ids) == len(row)
for sample_id, adiv_val in zip(sample_ids, row):
if not isnan(adiv_val):
split_cat_val = metadata_map.getCategoryValue(sample_id,
split_category)
comp_cat_val = metadata_map.getCategoryValue(sample_id,
comparison_category)
plot_data[split_cat_val, comp_cat_val].append(adiv_val)
# Format tick labels as '<body site> (self|other)' and sort alphabetically.
plot_data = sorted(map(lambda e: ('%s (%s)' %
(e[0][0], e[0][1]), e[1]),
plot_data.items()))
x_tick_labels = []
dists = []
for label, dist in plot_data:
x_tick_labels.append(label)
dists.append(dist)
return x_tick_labels, dists
开发者ID:biocore,项目名称:my-microbes,代码行数:42,代码来源:util.py
示例6: setUp
def setUp(self):
"""define some top-level data"""
self.data={}
self.data['xaxis']=[10.0]
self.sample_dict={'Sample1':{10.00: [1.3276140000000001]}}
self.data['yvals']={'Sample1': [1.3276140000000001]}
self.data['err']={'Sample1': [.1]}
self.xmax=140
self.ymax=20
self.std_type='stddev'
self.ops=['Sample1']
self.mapping_category='SampleID'
self.imagetype='png'
self.resolution=70
self.mapping_lookup={'SampleID-Sample1':'col_0_row_0'}
self.data['map']=[['SampleID','Day'],['Sample1','Day1']]
self.color_prefs={'SampleID': {'column': 'SampleID', 'color': \
{'Sample1': '#ff0000'}}}
self.groups={'Sample1':['Sample1']}
self.background_color='black'
self.label_color='white'
self.labelname='SampleID'
self.rare_data={'color': {'Sample1': '#ff0000'}, \
'series': {'Sample1': [2.0515300000000001],}, \
'headers': ['test.txt','SampleID'], 'xaxis': [10.0], \
'error': {'Sample1': [0.0]}, 'options': ['Sample1']}
self.fpath='/tmp/'
self.output_dir='/tmp/'
self.metric_name='test'
self._paths_to_clean_up = []
self._folders_to_cleanup = []
self.rarefaction_file_data=[[10.0, 0.0, 1.0], [10.0, 1.0, 3.0]]
d = {'redtowhite3_0':'#7fff00','redtowhite3_1':'#7fff00'}
self.data_colors = color_dict_to_objects(d)
self.colors={'Sample1':'redtowhite3_0','Sample2':'redtowhite3_1'}
self.colors2={'Sample1':'redtowhite3_0'}
self.mappingfile = ['#SampleID\tSex\tAge',
'123\tF\t32',
'234\tM\t30',
'345\tM\t32']
#self.p_mappingfile = parse_mapping_file(self.mappingfile,\
# strip_quotes=True)
self.rarefactionfile=[\
'\tsequences per sample\titeration\t123\t234\t345',
'rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996',
'rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055',
'rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725',
'rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474',
'rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928',
'rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642']
self.rares = {'test.txt': (['', 'sequences per sample', 'iteration', \
'Sample1'], [], ['rare1.txt', 'rare2.txt'], \
[[10.0, 2.0, 7.0, 7.0, 9.0], [10.0, 2.0, 7.0, 7.0, 9.0]])}
self.col_headers, self.comments, self.rarefaction_fns, \
self.rarefaction_data = parse_rarefaction(self.rarefactionfile)
self.matrix, self.seqs_per_samp, self.sampleIDs = \
get_rarefaction_data(self.rarefaction_data, self.col_headers)
self.ave_seqs_per_sample1 = {'Sample1':[2.03172,9.4417849999999994,\
12.508435]}
self.ave_seqs_per_sample = {'123':[2.03172,9.4417849999999994,\
12.508435],'234':[0.42876999999999998,0.42876999999999998,\
0.42876999999999998],'345':[2.255255,9.625995,11.58785]}
self.collapsed_ser_sex = {'M':[1.3420125000000001,5.0273824999999999,\
6.0083099999999998], 'F':[2.03172,9.4417849999999994,12.508435]}
self.err_ser_sex = {'M':[0.91324250000000007,4.5986124999999998,\
5.5795399999999997],'F':[0.0,0.0,0.0]}
self.rarefaction_legend_mat_init={'test': {'SampleID': {}}}
self.col_headers2=['', 'sequences per sample', 'iteration', 'Sample1', \
'Sample2']
self.rarefaction_data_mat={'SampleID': {'Sample1': {'test': {'ave': [' 7.000'], 'err': [' nan']}}}}
self.rarefaction_legend_mat={'test': {'samples': {'Sample1': {'color': '#ff0000', 'link': 'html_plots/testcol_0_row_0.png'}}, 'groups': {'SampleID': {'Sample1': {'groupcolor': '#ff0000', 'groupsamples': ['Sample1']}}}}}
self.exp_err_series_ave={'M': [1.571915, 6.49885, 8.1750183333333339]}
开发者ID:DDomogala3,项目名称:qiime,代码行数:76,代码来源:test_make_rarefaction_plots.py
示例7: test_parse_rarefaction
def test_parse_rarefaction(self):
self.rarefactionfile = ['\tsequences per sample\titeration\t123\t234\t345',
'rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996',
'rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055',
'rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725',
'rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474',
'rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928',
'rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642']
self.col_headers = ['', 'sequences per sample', 'iteration', '123', '234', '345']
self.comments = []
self.rarefaction_fns = ['rare10.txt', 'rare10.txt', 'rare310.txt', 'rare310.txt', 'rare610.txt', 'rare610.txt']
self.rarefaction_data = [[10.0, 0.0, 1.9918100000000001, 0.42876999999999998, 2.1399599999999999], [10.0, 1.0, 2.0716299999999999, 0.42876999999999998, 2.3705500000000002], [310.0, 0.0, 8.8311499999999992, 0.42876999999999998, 11.007250000000001], [310.0, 1.0, 10.05242, 0.42876999999999998, 8.2447400000000002], [610.0, 0.0, 12.030670000000001, 0.42876999999999998, 11.58928], [610.0, 1.0, 12.9862, 0.42876999999999998, 11.58642]]
test_col_headers, test_comments, test_rarefaction_fns, test_rarefaction_data = parse_rarefaction(self.rarefactionfile)
self.assertEqual(test_col_headers, self.col_headers)
self.assertEqual(test_comments, self.comments)
self.assertEqual(test_rarefaction_fns, self.rarefaction_fns)
self.assertEqual(test_rarefaction_data, self.rarefaction_data)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:19,代码来源:test_parse.py
示例8: compare_alpha_diversities
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category,
depth=None, test_type='nonparametric', num_permutations=999):
"""Compares alpha diversity values for differences per category treatment.
Notes:
Returns a defaultdict which as keys has the pairs of treatments being
compared, and as values, lists of (pval,tval) tuples for each comparison at
for a given iteration.
Inputs:
rarefaction_lines - list of lines, result of multiple rarefactions.
mapping_lines - list of lines, mapping file lines.
category - str, the category to be compared, eg 'Treatment' or 'Age'.
depth - int, depth of the rarefaction file to use. if None, then will use
the deepest available in the file.
test_type - str, the type of t-test to perform. Must be either
'parametric' or 'nonparametric'.
num_permutations - int, the number of Monte Carlo permutations to use if
test_type is 'nonparametric'.
"""
if test_type == 'nonparametric' and num_permutations < 1:
raise ValueError("Invalid number of permutations: %d. Must be greater "
"than zero." % num_permutations)
rarefaction_data = parse_rarefaction(rarefaction_lines)
mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
# samid_pairs, treatment_pairs are in the same order
samid_pairs, treatment_pairs = sampleId_pairs(mapping_data,
rarefaction_data, category)
# extract only rows of the rarefaction data that are at the given depth
# if depth is not given default to the deepest rarefaction available
# rarefaction file is not guaranteed to be in order of rarefaction depth
if depth == None:
depth = array(rarefaction_data[3])[:,0].max()
rare_mat = array([row for row in rarefaction_data[3] if row[0]==depth])
# Average each col of the rarefaction mtx. Computing t test on averages over
# all iterations. Avoids more comps which kills signifigance.
rare_mat = (rare_mat.sum(0)/rare_mat.shape[0])[2:] #remove depth,iter cols
sids = rarefaction_data[0][3:] # 0-2 are header strings
ttest_results = {}
for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
# if there is only 1 sample for each treatment in a comparison, and mc
# using mc method, will error (e.g. mc_t_two_sample([1],[1]).
if len(sid_pair[0])==1 and len(sid_pair[1])==1:
ttest_results[treatment_pair]= (None,None)
else:
pair0_indices = [sids.index(i) for i in sid_pair[0]]
pair1_indices = [sids.index(i) for i in sid_pair[1]]
i = rare_mat.take(pair0_indices)
j = rare_mat.take(pair1_indices)
# found discussion of how to quickly check an array for nan here:
# http://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy
if isnan(np_min(i)) or isnan(np_min(j)):
ttest_results[treatment_pair]= (None,None)
continue
if test_type == 'parametric':
obs_t, p_val = t_two_sample(i,j)
elif test_type == 'nonparametric':
obs_t, _, _, p_val = mc_t_two_sample(i,j,
permutations=num_permutations)
if p_val != None:
p_val = float(format_p_value_for_num_iters(p_val,
num_iters=num_permutations))
elif p_val == None: #None will error in format_p_val
obs_t, p_val = None, None
else:
raise ValueError("Invalid test type '%s'." % test_type)
ttest_results[treatment_pair]= (obs_t,p_val)
# create dict of average alpha diversity values
alphadiv_avgs = {}
for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
# calculate the alpha diversity average, std vals. choosing only first
# treatment pair doesn't guarantees full covering, must look at both
for sid_list, treatment_str in zip(sid_pair, treatment_pair):
# check if already computed and added
if not treatment_str in alphadiv_avgs.keys():
alphadiv_vals = \
rare_mat.take([sids.index(i) for i in sid_list])
ad_mean = alphadiv_vals.mean()
ad_std = alphadiv_vals.std()
alphadiv_avgs[treatment_str] = (ad_mean, ad_std)
return ttest_results, alphadiv_avgs
开发者ID:icaro-henrique,项目名称:qiime,代码行数:84,代码来源:compare_alpha_diversity.py
示例9: setUp
def setUp(self):
"""define some top-level data"""
self.data = {}
self.data["xaxis"] = [10.0]
self.sample_dict = {"Sample1": {10.00: [1.3276140000000001]}}
self.data["yvals"] = {"Sample1": [1.3276140000000001]}
self.data["err"] = {"Sample1": [0.1]}
self.xmax = 140
self.ymax = 20
self.std_type = "stddev"
self.ops = ["Sample1"]
self.mapping_category = "SampleID"
self.imagetype = "png"
self.resolution = 70
self.mapping_lookup = {"SampleID-Sample1": "col_0_row_0"}
self.data["map"] = [["SampleID", "Day"], ["Sample1", "Day1"]]
self.color_prefs = {"SampleID": {"column": "SampleID", "color": {"Sample1": "#ff0000"}}}
self.groups = {"Sample1": ["Sample1"]}
self.background_color = "black"
self.label_color = "white"
self.labelname = "SampleID"
self.rare_data = {
"color": {"Sample1": "#ff0000"},
"series": {"Sample1": [2.0515300000000001]},
"headers": ["test.txt", "SampleID"],
"xaxis": [10.0],
"error": {"Sample1": [0.0]},
"options": ["Sample1"],
}
self.fpath = "/tmp/"
self.output_dir = "/tmp/"
self.metric_name = "test"
self._paths_to_clean_up = []
self._folders_to_cleanup = []
self.rarefaction_file_data = [[10.0, 0.0, 1.0], [10.0, 1.0, 3.0]]
d = {"redtowhite3_0": "#7fff00", "redtowhite3_1": "#7fff00"}
self.data_colors = color_dict_to_objects(d)
self.colors = {"Sample1": "redtowhite3_0", "Sample2": "redtowhite3_1"}
self.colors2 = {"Sample1": "redtowhite3_0"}
self.mappingfile = ["#SampleID\tSex\tAge", "123\tF\t32", "234\tM\t30", "345\tM\t32"]
# self.p_mappingfile = parse_mapping_file(self.mappingfile,\
# strip_quotes=True)
self.rarefactionfile = [
"\tsequences per sample\titeration\t123\t234\t345",
"rare10.txt\t10\t0\t1.99181\t0.42877\t2.13996",
"rare10.txt\t10\t1\t2.07163\t0.42877\t2.37055",
"rare310.txt\t310\t0\t8.83115\t0.42877\t11.00725",
"rare310.txt\t310\t1\t10.05242\t0.42877\t8.24474",
"rare610.txt\t610\t0\t12.03067\t0.42877\t11.58928",
"rare610.txt\t610\t1\t12.9862\t0.42877\t11.58642",
]
self.rares = {
"test.txt": (
["", "sequences per sample", "iteration", "Sample1"],
[],
["rare1.txt", "rare2.txt"],
[[10.0, 2.0, 7.0, 7.0, 9.0], [10.0, 2.0, 7.0, 7.0, 9.0]],
)
}
self.col_headers, self.comments, self.rarefaction_fns, self.rarefaction_data = parse_rarefaction(
self.rarefactionfile
)
self.matrix, self.seqs_per_samp, self.sampleIDs = get_rarefaction_data(self.rarefaction_data, self.col_headers)
self.ave_seqs_per_sample1 = {"Sample1": [2.03172, 9.4417849999999994, 12.508435]}
self.ave_seqs_per_sample = {
"123": [2.03172, 9.4417849999999994, 12.508435],
"234": [0.42876999999999998, 0.42876999999999998, 0.42876999999999998],
"345": [2.255255, 9.625995, 11.58785],
}
self.collapsed_ser_sex = {
"M": [1.3420125000000001, 5.0273824999999999, 6.0083099999999998],
"F": [2.03172, 9.4417849999999994, 12.508435],
}
self.err_ser_sex = {"M": [0.91324250000000007, 4.5986124999999998, 5.5795399999999997], "F": [0.0, 0.0, 0.0]}
self.rarefaction_legend_mat_init = {"test": {"SampleID": {}}}
self.col_headers2 = ["", "sequences per sample", "iteration", "Sample1", "Sample2"]
self.rarefaction_data_mat = {"SampleID": {"Sample1": {"test": {"ave": [" 7.000"], "err": [" nan"]}}}}
self.rarefaction_legend_mat = {
"test": {
"samples": {"Sample1": {"color": "#ff0000", "link": "html_plots/testcol_0_row_0.png"}},
"groups": {"SampleID": {"Sample1": {"groupcolor": "#ff0000", "groupsamples": ["Sample1"]}}},
}
}
self.exp_err_series_ave = {"M": [1.571915, 6.49885, 8.1750183333333339]}
开发者ID:rob-knight,项目名称:qiime,代码行数:88,代码来源:test_make_rarefaction_plots.py
示例10: setUp
def setUp(self):
"""define data for tests"""
self.rarefaction_file = \
['\tsequences per sample\titeration\t123\t234\t345\t456',
'rare10.txt\t10\t0\t1.99181\t5.42877\t2.13996\t0.002322',
'rare10.txt\t10\t1\t2.07163\t1.42877\t2.37055\t0.01219',
'rare310.txt\t310\t0\t8.83115\t6.42877\t11.00725\t0.18233',
'rare310.txt\t310\t1\t10.05242\t9.42877\t8.24474\t0.99229',
'rare810.txt\t810\t0\t12.03067\tn/a\t11.58928\t0.8993',
'rare910.txt\t910\t1\t12.9862\t2.42877\t11.58642\t1.22563']
self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
self.mapping_file = \
['#SampleID\tTreatment\tLinker'+\
'PrimerSequence\tDose\tTTD\tDescription',
'#Comment Line',
'123\tAAAA\tBBBB\tHigh\t31\tM_ID_123',
'234\tCCCC\tDDDD\tLow\t67\tM_ID_234',
'345\tAAAA\tFFFF\tMed\t21\tM_ID_345',
'456\tAAAA\tGGGG\tLow\t67\tM_ID_456'
]
self.mapping_data = \
parse_mapping_file_to_dict(self.mapping_file)[0]
self.value_pairs_Dose = \
[('Low','Med'),('Low','High'),('Med','High')]
self.value_pairs_TTD = \
[('67', '21'), ('67', '31'), ('21', '31')]
self.value_pairs_Treatment = \
[('CCCC', 'AAAA')]
self.cat_val_Dose = \
{'High': ['123'], 'Low': ['234', '456'], 'Med': ['345']}
self.cat_val_TTD = \
{'21': ['345'], '31': ['123'], '67': ['234', '456']}
self.cat_val_Treatment = \
{'AAAA': ['345', '123', '456'], 'CCCC': ['234']}
self.Id_pairs_Dose = \
[(['234', '456'], ['345']), (['234', '456'], ['123']),
(['345'], ['123'])]
self.Id_pairs_TTD = \
[(['234', '456'], ['345']), (['234', '456'], ['123']),
(['345'], ['123'])]
self.Id_pairs_Treatment = \
[(['234'], ['345', '123', '456'])]
self.rarefaction_cols_dict = \
{'123': 0, '234': 1, '345': 2, '456':3}
self.extracted_mtx_10 = \
array([[ 1.99181, 5.42877, 2.13996, 0.002322],
[ 2.07163, 1.42877, 2.37055, 0.01219]])
self.extracted_mtx_310 = \
array([[ 8.83115, 6.42877, 11.00725, 0.18233],
[ 10.05242, 9.42877, 8.24474, 0.99229]])
self.extracted_mtx_910 = \
array([[ 12.9862 , 2.42877, 11.58642, 1.22563]])
self.sample_pair1 = \
(['234'], ['345', '123'])
self.rarefaction_mtx_for_sample_pair1_0 = \
array([[ 5.42877],
[ 1.42877]])
self.rarefaction_mtx_for_sample_pair1_1 = \
array([[ 2.13996, 1.99181],
[ 2.37055, 2.07163]])
self.compared_alpha_diversities_TTD = {'TTD': {('67', '21'):
(-0.27929839680103463, 0.79386220041241184), ('21', '31'):
(1.8321466933860993, 0.20839398129924847), ('67', '31'):
(-0.16318504125427058, 0.87828549279958279)}}
开发者ID:DDomogala3,项目名称:qiime,代码行数:76,代码来源:test_compare_alpha_diversity.py
示例11: mean_alpha
def mean_alpha(alpha_dict, depth):
"""mean collated alpha diversity data at a given depth
Input:
alpha_dict: dictionary where the values are the lines of a collated alpha
diversity data files and the keys are the names of each of these files with
no extension, this name is usually the metric used to compute the alpha
diversity.
depth: selected depth to mean the computed alpha diversity values for the
alpha_dict data. If None is passed, the highest depth will be used.
Output:
metrics: list of metric names i. e. the name of each collated alpha div file
sample_ids: list of sample identifiers represented
data: a list of lists with the mean of alpha diversity data at a given
depth for the different metrics, each column is a different metric.
"""
assert type(alpha_dict) == dict, "Input data must be a dictionary"
assert depth == None or (depth >= 0 and type(depth) == int), "The "+\
"specified depth must be a positive integer."
metrics = []
sample_ids = []
data = []
for key, value in alpha_dict.iteritems():
identifiers, _, _, rarefaction_data = parse_rarefaction(value)
# if depth is specified as None use the highest available, retrieve it
# on a per file basis so you make sure the value exists for all files
if depth == None:
_depth = int(max([row[0] for row in rarefaction_data]))
else:
_depth = depth
metrics.append('{0}_even_{1}'.format(key, _depth))
# check there are elements with the desired rarefaction depth
if sum([1 for row in rarefaction_data if row[0] == _depth]) == 0:
# get a sorted list of strings with the available rarefaction depths
available_rarefaction_depths = map(str, sorted(list(set([row[0] for
row in rarefaction_data]))))
raise ValueError, ("The depth %d does not exist in the collated "
"alpha diversity file for the metric: %s. The available depths "
"are: %s."%(_depth,key,', '.join(available_rarefaction_depths)))
# check all the files have the same sample ids in the same order
if sample_ids:
if not sample_ids == identifiers[3:]:
raise ValueError, ("Non-matching sample ids were found in the "
"collated alpha diversity files. Make sure all the files "
"contain data for the same samples.")
else:
sample_ids = identifiers[3:]
# find all the data at the desired depth and get the mean values, remove
# the first two elements ([depth, iteration]) as those are not needed
data.append(array([row[2:] for row in rarefaction_data if\
row[0] == _depth]).mean(axis=0))
# transpose the data to match the formatting of non-collated alpha div data
data = array(data).T.tolist()
return metrics, sample_ids, data
开发者ID:EESI,项目名称:FizzyQIIME,代码行数:64,代码来源:add_alpha_to_mapping_file.py
示例12: main
def main():
option_parser, options, args = parse_command_line_parameters(**script_info)
ops = {}
input_dir = options.input_dir
rares = {}
if isdir(input_dir):
rarenames = listdir(input_dir)
rarenames = [r for r in rarenames if not r.startswith(".")]
for r in rarenames:
try:
rarefl = open(path.join(input_dir, r), "U").readlines()
rares[r] = parse_rarefaction(rarefl)
except (IOError):
option_parser.error("Problem with rarefaction file. %s" % exc_info()[1])
exit(0)
else:
try:
input_file = input_dir.split(",")
for i in range(len(input_file)):
input_path = split(input_file[i])[-1]
rarefl = open(input_file[i], "U").readlines()
rares[input_path] = parse_rarefaction(rarefl)
except (IOError):
option_parser.error("Problem with rarefaction file. %s" % exc_info()[1])
exit(0)
if options.imagetype not in ["png", "svg", "pdf"]:
option_parser.error("Supplied extension not supported.")
exit(0)
else:
imagetype = options.imagetype
try:
resolution = int(options.resolution)
except (ValueError):
option_parser.error("Inavlid resolution.")
exit(0)
# Get the command-line options.
prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options(
options
)
# output directory check
if isinstance(options.output_dir, str) and options.output_dir != ".":
if exists(options.output_dir):
output_dir = options.output_dir
else:
try:
create_dir(options.output_dir, False)
output_dir = options.output_dir
except (ValueError):
option_parser.error("Could not create output directory.")
exit(0)
else:
output_dir = get_random_directory_name()
# Generate the plots and html text
ymax = options.ymax
suppress_webpage = options.suppress_html_output
html_output = make_averages(
prefs, data, background_color, label_color, rares, output_dir, resolution, imagetype, ymax, suppress_webpage
)
if html_output:
# Write the html file.
outfile = open(path.join(output_dir, "rarefaction_plots.html"), "w")
outfile.write(html_output)
outfile.close()
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:70,代码来源:make_rarefaction_plots.py
示例13: compare_alpha_diversities
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth,
test_type='nonparametric', num_permutations=999):
"""Compares alpha diversity values for differences per category treatment.
Notes:
Returns a defaultdict which as keys has the pairs of treatments being
compared, and as values, lists of (pval,tval) tuples for each comparison at
for a given iteration.
Inputs:
rarefaction_lines - list of lines, result of multiple rarefactions.
mapping_lines - list of lines, mapping file lines.
category - str, the category to be compared, eg 'Treatment' or 'Age'.
depth - int, depth of the rarefaction file to use.
test_type - str, the type of t-test to perform. Must be either
'parametric' or 'nonparametric'.
num_permutations - int, the number of Monte Carlo permutations to use if
test_type is 'nonparametric'.
"""
if test_type == 'nonparametric' and num_permutations < 1:
raise ValueError("Invalid number of permutations: %d. Must be greater "
"than zero." % num_permutations)
rarefaction_data = parse_rarefaction(rarefaction_lines)
mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
# samid_pairs, treatment_pairs are in the same order
samid_pairs, treatment_pairs = sampleId_pairs(mapping_data,
rarefaction_data, category)
# extract only rows of the rarefaction data that are at the given depth
rare_mat = array([row for row in rarefaction_data[3] if row[0]==depth])
# Average each col of the rarefaction mtx. Computing t test on averages over
# all iterations. Avoids more comps which kills signifigance.
rare_mat = (rare_mat.sum(0)/rare_mat.shape[0])[2:] #remove depth,iter cols
sids = rarefaction_data[0][3:] # 0-2 are header strings
results = {}
for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
# if there is only 1 sample for each treatment in a comparison, and mc
# using mc method, will error (e.g. mc_t_two_sample([1],[1]).
if len(sid_pair[0])==1 and len(sid_pair[1])==1:
t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1])
results[t_key]= (None,None)
else:
pair0_indices = [sids.index(i) for i in sid_pair[0]]
pair1_indices = [sids.index(i) for i in sid_pair[1]]
t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1])
i = rare_mat.take(pair0_indices)
j = rare_mat.take(pair1_indices)
# found discussion of how to quickly check an array for nan here:
# http://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy
if isnan(np_min(i)) or isnan(np_min(j)):
results[t_key]= (None,None)
continue
if test_type == 'parametric':
obs_t, p_val = t_two_sample(i,j)
elif test_type == 'nonparametric':
obs_t, _, _, p_val = mc_t_two_sample(i,j,
permutations=num_permutations)
if p_val != None:
p_val = float(format_p_value_for_num_iters(p_val,
num_iters=num_permutations))
elif p_val == None: #None will error in format_p_val
obs_t, p_val = None, None
else:
raise ValueError("Invalid test type '%s'." % test_type)
results[t_key]= (obs_t,p_val)
return results
开发者ID:binma,项目名称:qiime,代码行数:66,代码来源:compare_alpha_diversity.py
示例14: setUp
def setUp(self):
"""define data for tests"""
self.rarefaction_file = \
['\tsequences per sample\titeration\t123\t234\t345',
'rare10.txt\t10\t0\t1.99181\t5.42877\t2.13996',
'rare10.txt\t10\t1\t2.07163\t1.42877\t2.37055',
'rare310.txt\t310\t0\t8.83115\t6.42877\t11.00725',
'rare310.txt\t310\t1\t10.05242\t9.42877\t8.24474',
'rare810.txt\t810\t0\t12.03067\tn/a\t11.58928',
'rare910.txt\t910\t1\t12.9862\t2.42877\t11.58642']
self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
self.mapping_file = \
['#SampleID\tTreatment\tLinker'+\
'PrimerSequence\tDose\tTTD\tDescription',
'#Comment Line',
'123\tAAAA\tBBBB\tHigh\t31\tM_ID_123',
'234\tCCCC\tDDDD\tLow\t67\tM_ID_234',
'345\tAAAA\tFFFF\tMed\t21\tM_ID_345']
self.mapping_data = \
parse_mapping_file_to_dict(self.mapping_file)[0]
self.value_pairs_Dose = \
[('Low','Med'),('Low','High'),('Med','High')]
self.value_pairs_TTD = \
[('67', '21'), ('67', '31'), ('21', '31')]
self.value_pairs_Treatment = \
[('CCCC', 'AAAA')]
self.cat_val_Dose = \
{'High': ['123'], 'Low': ['234'], 'Med': ['345']}
self.cat_val_TTD = \
{'21': ['345'], '31': ['123'], '67': ['234']}
self.cat_val_Treatment = \
{'AAAA': ['345', '123'], 'CCCC': ['234']}
self.Id_pairs_Dose = \
[(['234'], ['345']), (['234'], ['123']), (['345'], ['123'])]
self.Id_pairs_TTD = \
[(['234'], ['345']), (['234'], ['123']), (['345'], ['123'])]
self.Id_pairs_Treatment = \
[(['234'], ['345', '123'])]
self.rarefaction_cols_dict = \
{'123': 0, '234': 1, '345': 2}
self.extracted_mtx_10 = \
array([[ 1.99181, 5.42877, 2.13996],
[ 2.07163, 1.42877, 2.37055]])
self.extracted_mtx_310 = \
array([[ 8.83115, 6.42877, 11.00725],
[ 10.05242, 9.42877, 8.24474]])
self.extracted_mtx_910 = \
array([[ 12.9862 , 2.42877, 11.58642]])
self.sample_pair1 = \
(['234'], ['345', '123'])
self.rarefaction_mtx_for_sample_pair1_0 = \
array([[ 5.42877],
[ 1.42877]])
self.rarefaction_mtx_for_sample_pair1_1 = \
array([[ 2.13996, 1.99181],
[ 2.37055, 2.07163]])
self.compared_alpha_diversities_TTD = \
{'TTD':{('21','31'):(1.8321466933860993,0.20839398129924847),
('67', '21'): (0.58578495700890432, 0.61731739324369639),
('67', '31'): (0.69838596448703294, 0.55721515283248324)}}
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:72,代码来源:test_compare_alpha_diversities.py
示例15: compare_alpha_diversities
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category,
depth=None, test_type='nonparametric', num_permutations=999):
"""Compares alpha diversity values for differences per category treatment.
Notes:
Returns a defaultdict which as keys has the pairs of treatments being
compared, and as values, lists of (pval,tval) tuples for each comparison at
for a given iteration.
Inputs:
rarefaction_lines - list of lines, result of multiple rarefactions.
mapping_lines - list of lines, mapping file lines.
category - str, the category to be compared, eg 'Treatment' or 'Age'.
depth - int, depth of the rarefaction file to use. if None, then will use
the deepest available in the file.
test_type - str, the type of t-test to perform. Must be either
'parametric' or 'nonparametric'.
num_permutations - int, the number of Monte Carlo permutations to use if
test_type is 'nonparametric'.
"""
if test_type == 'nonparametric' and num_permutations < 1:
raise ValueError("Invalid number of permutations: %d. Must be greater "
"than zero." % num_permutations)
rarefaction_data = parse_rarefaction(rarefaction_lines)
mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
# samid_pairs, treatment_pairs are in the same order
samid_pairs, treatment_pairs = sampleId_pairs(mapping_data,
rarefaction_data, category)
ps_avg_div = get_per_sample_average_diversities(rarefaction_data, depth)
ttest_results, ad_avgs = {}, {}
for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs):
# if there is only 1 sample for each treatment in a comparison, and mc
# using mc method, will error (e.g. mc_t_two_sample([1],[1]).
if len(sid_pair[0]) == 1 and len(sid_pair[1]) == 1:
ttest_results[treatment_pair] = (None, None)
# add alpha diversity averages and standard deviations. since their
# is only a single sample if we are in this part of the loop, we can
# just record the sample value as the avg and 0 as the std.
ad_avgs[treatment_pair[0]] = (sid_pair[0][0], 0.)
ad_avgs[treatment_pair[1]] = (sid_pair[1][0], 0.)
else:
i = array([ps_avg_div[x] for x in sid_pair[0]])
j = array([ps_avg_div[x] for x in sid_
|
请发表评论