本文整理汇总了Python中qiime.parse.parse_mapping_file_to_dict函数的典型用法代码示例。如果您正苦于以下问题:Python parse_mapping_file_to_dict函数的具体用法?Python parse_mapping_file_to_dict怎么用?Python parse_mapping_file_to_dict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_mapping_file_to_dict函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: make_node_attr_table
def make_node_attr_table(otu_node_lines, sample_node_lines,
sample_color, otu_color, sample_size, otu_size, sample_shape,
otu_shape):
'''Make a preference table to load as node attributes for cytoscape.
This file makes it easy to color, shape, and size the nodes according
to the desire of the user. The color, size, and shape inputs are lists
of strings that specify fields in the headers of otu_node_lines
and sample_node_lines. The output will be as follows:
#NodeID NodeType Abundance Color Shape Size
otu1 otu 45 Bacteria_bacteriodales spc56 xyz
sample1 sample 56 post_treatment tp_5 abc
In the above example the user has passed sample_color as ['Treatment']
and sample1 happens to be post treatment. For otu_color they passed
['kingdom', 'phylum'] and otu1 had kingdom Bacteria and phylum
bacteriodales. This allows arbitrary numbers of color, size, shape
combos to be created so that everything is fully customizable. If more
than one field is passed the values for those fields will be joined
with a '_'.
Inputs:
otu_node_lines - list of strs, output of make_otu_node_table
sample_node_lines - list of strs, output of make_sample_node_table
_colors, _size, _shape - each of these 6 fields must be a list of
strings which identify which header fields are desired.
'''
# no comments
sample_nodes = parse_mapping_file_to_dict(sample_node_lines)[0]
otu_nodes = parse_mapping_file_to_dict(otu_node_lines)[0]
header = '#NodeID\tNodeType\tAbundance\tColor\tSize\tShape'
lines = [header]
# make list of nodes that includes samples and otus
nodes = sample_nodes.keys() + otu_nodes.keys()
# make 5 lists which will be the columns of the output file
nids, nodetypes, abundances, colors, sizes, shapes = [], [], [], [], [], []
for node in nodes:
if node in otu_nodes:
nodetype_val = 'otu'
abundance_val = otu_nodes[node]['Abundance']
color_val = '_'.join([otu_nodes[node][i] for i in otu_color])
size_val = '_'.join([otu_nodes[node][i] for i in otu_size])
shape_val = '_'.join([otu_nodes[node][i] for i in otu_shape])
elif node in sample_nodes:
nodetype_val = 'sample'
abundance_val = sample_nodes[node]['Abundance']
color_val = '_'.join([sample_nodes[node][i] for i in sample_color])
size_val = '_'.join([sample_nodes[node][i] for i in sample_size])
shape_val = '_'.join([sample_nodes[node][i] for i in sample_shape])
nids.append(node)
nodetypes.append(nodetype_val)
abundances.append(abundance_val)
colors.append(color_val)
sizes.append(size_val)
shapes.append(shape_val)
nls = ['\t'.join(vals) for vals in zip(nids, nodetypes, abundances, colors,
sizes, shapes)]
return lines + nls
开发者ID:Springbudder,项目名称:qiime,代码行数:57,代码来源:make_bipartite_network.py
示例2: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
otu_table_fp = opts.biom_fp
map_fp = opts.map_fp
output_dir = opts.output_dir
taxonomy_key = opts.observation_md_header_key
scolors = opts.scolors.split(',')
ocolors = opts.ocolors.split(',')
sshapes = opts.sshapes.split(',')
oshapes = opts.oshapes.split(',')
ssizes = opts.ssizes.split(',')
osizes = opts.osizes.split(',')
md_fields = opts.md_fields.split(',')
# check that the otu fields asked for are available
shared_options = ['NodeType','Abundance']
if not all([i in md_fields+shared_options for i in ocolors+oshapes+osizes]):
option_parser.error('The fields specified for observation colors, sizes, or '+\
'shapes are not in either the shared options (NodeType,Abundance)'+\
' or the supplied md_fields. These fields must be a subset of '+\
'the union of these sets. Have you passed ocolors, osizes or '+\
'oshapes that are not in the md_fields?')
# check that the sample fields asked for are available. mapping file
# elements should all have same metadata keys
sopts = parse_mapping_file_to_dict(map_fp)[0].items()[0][1].keys()
if not all([i in sopts+shared_options for i in scolors+sshapes+ssizes]):
option_parser.error('The fields specified for sample colors, sizes, or '+\
'shapes are not in either the shared options (NodeType,Abundance)'+\
' or the supplied mapping file. These fields must be a subset of '+\
'the union of these sets. Have you passed scolors, ssizes or '+\
'sshapes that are not in the mapping file headers?')
# actual compuation begins
try:
create_dir(output_dir, fail_on_exist=True)
except OSError:
option_parser.error('Directory already exists. Will not overwrite.')
bt = parse_biom_table(open(otu_table_fp))
pmf = parse_mapping_file_to_dict(map_fp)[0] # [1] is comments, don't need
sample_node_table = make_sample_node_table(bt, pmf)
otu_node_table = make_otu_node_table(bt, opts.observation_md_header_key,
md_fields)
node_attr_table = make_node_attr_table(otu_node_table, sample_node_table,
scolors, ocolors, ssizes, osizes, sshapes, oshapes)
edge_table = make_edge_table(bt)
_write_table(sample_node_table, os.path.join(output_dir,'SampleNodeTable.txt'))
_write_table(otu_node_table, os.path.join(output_dir,'OTUNodeTable.txt'))
_write_table(node_attr_table, os.path.join(output_dir,'NodeAttrTable.txt'))
_write_table(edge_table, os.path.join(output_dir,'EdgeTable.txt'))
开发者ID:oscaredd,项目名称:qiime,代码行数:53,代码来源:make_bipartite_network.py
示例3: view_metadata_categories_from_mapping_file
def view_metadata_categories_from_mapping_file(mapping_file):
""" Print list of metadata categories """
mapping_fp = open(mapping_file, 'rU')
mapping_dict, comments = parse_mapping_file_to_dict(mapping_fp)
key = mapping_dict.keys()[0]
for category, value in mapping_dict[key].iteritems():
print "%s (e.g. '%s')" % (category, value)
开发者ID:samfway,项目名称:machine-learning,代码行数:7,代码来源:parse.py
示例4: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.output_path != None:
outf = open(opts.output_path,'w')
else:
outf = sys.stdout
dists = parse_distmat(open(opts.input_path,'U'))
map_data = parse_mapping_file_to_dict(open(opts.map,'U'))
diff_dists, same_dists = clust_qual_ratio(dists, map_data, opts.category)
if opts.short:
print >> outf, numpy.mean(diff_dists)/numpy.mean(same_dists)
else:
print >> outf, "dissimilarity ratio between/within (large for clustered data):"
print >> outf, numpy.mean(diff_dists)/numpy.mean(same_dists)
print >> outf, "dissimilarities between clusters: mean, std, num:"
print >> outf, '\t'.join(map(str,[numpy.mean(diff_dists), numpy.std(diff_dists),
len(diff_dists)]))
print >> outf, "dissimilarities within clusters: mean, std, num:"
print >> outf, '\t'.join(map(str,[numpy.mean(same_dists), numpy.std(same_dists),
len(same_dists)]))
开发者ID:gxenomics,项目名称:qiime,代码行数:25,代码来源:cluster_quality.py
示例5: test_make_sample_node_table
def test_make_sample_node_table(self):
"""Test that the sample node table is created correctly."""
# test when sampleids in biom == sampleids in mapping file
bt = parse_biom_table(BIOM_STRING_1)
mf_dict = parse_mapping_file_to_dict(MF_LINES.split("\n"))[0]
obs = make_sample_node_table(bt, mf_dict)
exp = [
"#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet",
"s1\tsample\t148.0\t1\ta\tpre\thf",
"s2\tsample\t156.0\t2\ta\tpre\tlf",
"s3\tsample\t164.0\t3\ta\tpre\thf",
"s4\tsample\t172.0\t4\ta\tpost\tlf",
"s5\tsample\t180.0\t5\ta\tpost\tmf",
]
self.assertEqual(obs, exp)
# test when sampleids in biom are a subset of sampleids in mapping file
bt = parse_biom_table(BIOM_STRING_2)
obs = make_sample_node_table(bt, mf_dict)
exp = [
"#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet",
"s3\tsample\t164.0\t3\ta\tpre\thf",
"s4\tsample\t172.0\t4\ta\tpost\tlf",
"s5\tsample\t180.0\t5\ta\tpost\tmf",
]
self.assertEqual(obs, exp)
开发者ID:Honglongwu,项目名称:qiime,代码行数:25,代码来源:test_make_bipartite_network.py
示例6: distance_matrix
def distance_matrix(input_path, column):
""" calculates distance matrix on a single column of a mapping file
inputs:
input_path (file handler)
column (str)
"""
data, comments = parse_mapping_file_to_dict(input_path)
column_data = []
column_headers = []
for i in data:
if column not in data[i]:
stderr.write("\n\nNo column: '%s' in the mapping file. Existing columns are: %s\n\n" % (column,data[i].keys()))
exit(1)
try:
column_data.append(float(data[i][column]))
except ValueError:
stderr.write("\n\nall the values in the column '%s' must be numeric but '%s' has '%s'\n\n"\
% (column,i,data[i][column]))
exit(1)
column_headers.append(i)
data_row = array(column_data)
data_col = reshape(data_row, (1, len(data_row)))
dist_mtx = abs(data_row-data_col.T)
return format_distance_matrix(column_headers, dist_mtx)
开发者ID:DDomogala3,项目名称:qiime,代码行数:28,代码来源:distance_matrix_from_mapping.py
示例7: multiple_file_DA_DESeq2
def multiple_file_DA_DESeq2(input_dir, output_dir, mapping_fp, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots):
"""perform DESeq2 negative binomial Wald differential abundance test on a directory of raw abundance OTU matrices
"""
if not exists(output_dir):
makedirs(output_dir)
file_names = [fname for fname in listdir(input_dir) if not (fname.startswith('.')\
or isdir(fname))]
for fname in file_names:
base_fname, ext = splitext(fname)
original_fname = base_fname+'.biom'
hdf5_infile = join(input_dir, original_fname)
tmp_bt = load_table(hdf5_infile)
tmp_pmf, _ = parse_mapping_file_to_dict(mapping_fp)
check_mapping_file_category(tmp_bt, mapping_fp, mapping_category, subcategory_1, subcategory_2)
tmp_bt.add_metadata(tmp_pmf, 'sample')
outfile = join(output_dir, 'DESeq2_DA_'+base_fname+'.txt')
outfile_diagnostic = join(output_dir, 'DESeq2_diagnostic_plots_'+base_fname+'.pdf')
with tempfile.NamedTemporaryFile(dir=get_qiime_temp_dir(),
prefix='QIIME-differential-abundance-temp-table-',
suffix='.biom') as temp_fh:
temp_fh.write(tmp_bt.to_json('forR'))
temp_fh.flush()
run_DESeq2(temp_fh.name, outfile, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots, outfile_diagnostic)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:25,代码来源:differential_abundance.py
示例8: multiple_file_DA_fitZIG
def multiple_file_DA_fitZIG(input_dir, output_dir, mapping_fp, mapping_category, subcategory_1, subcategory_2):
"""perform metagenomeSeq's Zero Inflated Gaussian (ZIG) OTU differential abundance test on a directory of raw abundance OTU matrices
"""
if not exists(output_dir):
makedirs(output_dir)
file_names = [fname for fname in listdir(input_dir) if not (fname.startswith('.')\
or isdir(fname))]
for fname in file_names:
base_fname, ext = splitext(fname)
original_fname = base_fname+'.biom'
hdf5_infile = join(input_dir, original_fname)
tmp_bt = load_table(hdf5_infile)
tmp_pmf, _ = parse_mapping_file_to_dict(mapping_fp)
check_mapping_file_category(tmp_bt, mapping_fp, mapping_category, subcategory_1, subcategory_2)
tmp_bt.add_metadata(tmp_pmf, 'sample')
#make temporary json biom version - R currently does not have hdf5
outfile = join(output_dir, 'fitZIG_DA_'+base_fname+'.txt')
with tempfile.NamedTemporaryFile(dir=get_qiime_temp_dir(),
prefix='QIIME-differential-abundance-temp-table-',
suffix='.biom') as temp_fh:
temp_fh.write(tmp_bt.to_json('forR'))
temp_fh.flush()
run_fitZIG(temp_fh.name, outfile, mapping_category, subcategory_1, subcategory_2)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:25,代码来源:differential_abundance.py
示例9: print_info
def print_info(opts):
"""
@opt: options from the command line. see --help
"""
# import the the data using qiimes utilities. the o variable
# contains the dictionary with the map file contents.
try:
o,c = parse_mapping_file_to_dict(open(opts.map))
except IOError:
raise("File may not exist. ")
# ---- print the metadata
if opts.print_meta:
s = set()
print "Meta-Data: "
for k in o.keys():
s = s.union(set(o[k].keys()))
for r in s:
print " " + r
# ---- print the sample IDs
if opts.print_ids:
print "SampleIDs: "
for k in o.keys():
print " " + k
# ---- print a column from the map file
if opts.print_col != None:
s = set()
print "Column: " + opts.print_col + ": "
for k in o.keys():
s = s.union(set([o[k][opts.print_col]]))
for r in s:
print " " + r
return None
开发者ID:EESI,项目名称:MiDaPP,代码行数:35,代码来源:map_analyzer.py
示例10: choose_gradient_subset
def choose_gradient_subset(otu_table_f, map_f, category, num_total_samples):
otu_table = parse_biom_table(otu_table_f)
mdm, _ = parse_mapping_file_to_dict(map_f)
try:
map_f.seek(0)
except AttributeError:
pass
if num_total_samples > len(otu_table.SampleIds):
raise InvalidSubsetSize("Too many total samples (%d) were specified "
"as a gradient subset size. There are only %d "
"total samples to choose a subset from." %
(num_total_samples, len(otu_table.SampleIds)))
# Only keep the sample IDs that are in both the mapping file and OTU table.
# Sort the samples according to the gradient category.
samp_ids = [(samp_id, float(metadata[category]))
for samp_id, metadata in mdm.items()
if samp_id in otu_table.SampleIds]
samp_ids.sort(key=lambda samp_id: samp_id[1])
samp_ids_to_keep = [samp_id[0] for samp_id in
_choose_items_from_bins(samp_ids, num_total_samples)]
assert len(samp_ids_to_keep) == num_total_samples, \
"%d != %d" % (len(samp_ids_to_keep), num_total_samples)
assert len(samp_ids_to_keep) == len(set(samp_ids_to_keep)), \
"Duplicate sample IDs in subset"
return (filter_samples_from_otu_table(otu_table, samp_ids_to_keep, 0, inf),
filter_mapping_file_from_mapping_f(map_f, samp_ids_to_keep))
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:32,代码来源:simulate.py
示例11: setUp
def setUp(self):
"""define data for tests"""
# small amount of redundancy here since setUp called at each test, but
# limited tests means little concern
self.rarefaction_file = \
['\tsequences per sample\titeration\tSam1\tSam2\tSam3\tSam4\tSam5\tSam6',
'rare480.txt\t480\t0\t2.52800404052\t2.3614611247\t2.59867416108\t3.56970811181\t3.44800265895\t1.9433560517',
'rare480.txt\t480\t1\t2.06375457238\t3.32293450758\t3.4189896645\t3.35312890712\t3.10763472113\t2.78155253726',
'rare480.txt\t480\t2\t2.44788730109\t3.42464996459\t2.24541787295\t2.491419231\t2.60106690099\t5.40828403581',
'rare480.txt\t480\t3\t5.1846120153\t3.67022675065\t1.54879964908\t2.8055801405\t4.3086171269\t3.87761898868',
'rare910.txt\t910\t0\t2.67580703282\t1.72405794627\t2.15312863498\t2.4300954476\t3.7753658185\t3.36198860355',
'rare910.txt\t910\t1\t4.10226466956\t2.24587945345\t3.02932964779\t2.98218513619\t3.73316846484\t1.85879566537',
'rare910.txt\t910\t2\t1.65800670063\t2.42281993323\t3.02400997565\t3.271608097\t2.99265263795\t3.68802382515',
'rare910.txt\t910\t3\t2.50976021964\t2.43976761056\t3.32119905587\t2.47487750248\t1.901408525\t3.42883742207',
'rare500.txt\t500\t0\t3.42225118215\tn/a\t4.03758268426\t2.35344629448\t2.26690085385\t1.80164570104',
'rare850.txt\t850\t0\t4.2389858006\t4.97464230229\t1.53451087057\t3.35785261181\t1.91658777533\t2.32583475424',
'rare850.txt\t850\t1\t2.81445883827\tn/a\t2.54767461948\t1.38835207925\t3.70018890199\t1.57359105209',
'rare850.txt\t850\t2\t2.9340493412\t3.95897035158\tn/a\t2.07761860166\t3.42393336685\t2.6927305603']
self.rarefaction_data = parse_rarefaction(self.rarefaction_file)
self.mapping_file = \
['#SampleID\tDose\tLinkerPrimerSequence\tWeight\tTTD\tDescription',
'#Comment Line',
'Sam1\t1xDose\tATCG\tHigh\t31\ts1_desc',
'Sam2\t1xDose\tACCG\tLow\t67\ts2_desc',
'Sam3\t2xDose\tACGT\tMed\t21\ts3_desc',
'Sam4\t2xDose\tAACG\tLow\t55\ts4_desc',
'Sam5\tControl\tCGTC\tLow\t67\ts5_desc',
'Sam6\t1xDose\tACCT\tLow\t55\ts6_desc']
self.mapping_data = parse_mapping_file_to_dict(self.mapping_file)[0]
开发者ID:Jorge-C,项目名称:qiime,代码行数:29,代码来源:test_compare_alpha_diversity.py
示例12: make_profiles_by_category
def make_profiles_by_category(mapping_fp, taxa_level, category):
""" Creates a list of profiles for each unique value in the category
Inputs:
mapping_fp: filepath to the mapping file
category: mapping file category to split data over
defaults to HOST_SUBJECT_ID
Returns a dictionary keyed by the values on that category and a list of
profiles as values
"""
# Parse the mapping file
map_f = open(mapping_fp, 'U')
mapping_data, comments = parse_mapping_file_to_dict(map_f)
map_f.close()
# Get a list of unique keys for the specified category
if category == 'SampleID':
result = {}
for sid in mapping_data:
result[sid] = [make_profile_by_sid(mapping_data, sid, taxa_level)]
else:
values = set([mapping_data[sid][category] for sid in mapping_data])
result = {}
# Loop over each value in that category
for value in values:
# Re-open the mapping file
map_f = open(mapping_fp, 'U')
# Get sample ids that match the value
sids = sample_ids_from_metadata_description(map_f,
category+":"+value)
map_f.close()
# Create the list with all the profiles of the sample IDs in this
# category value
result[value] = [make_profile_by_sid(mapping_data,
sid, taxa_level) for sid in sids]
return result
开发者ID:squirrelo,项目名称:SCGM,代码行数:34,代码来源:profile.py
示例13: alpha_diversity_by_sample_type
def alpha_diversity_by_sample_type(adiv_fs, mapping_f,
mapping_category='Sample_Type'):
mapping_dict, mapping_comments = parse_mapping_file_to_dict(mapping_f)
sample_type_map = {}
#sample_type_counts = defaultdict(int)
for samp_id in mapping_dict:
sample_type_map[samp_id] = mapping_dict[samp_id][mapping_category]
#sample_type_counts[sample_type_map[samp_id]] += 1
sample_type_to_adiv = defaultdict(list)
for adiv_f in adiv_fs:
adiv_data = [line.strip().split('\t')
for line in adiv_f if line.strip()][1:]
for samp_id, adiv in adiv_data:
sample_type = sample_type_map[samp_id]
# TODO do we need to normalize this? how?
#adiv = float(adiv) / sample_type_counts[sample_type]
adiv = float(adiv)
sample_type_to_adiv[sample_type].append(adiv)
plotting_data = [(median(v), '%s (n=%d)' % (k, len(v)), v) for k, v in
sample_type_to_adiv.items()]
plotting_data.sort()
plot_fig = generate_box_plots([dist[2] for dist in
plotting_data], x_tick_labels=[dist[1] for dist in plotting_data],
x_label=mapping_category, y_label='Alpha Diversity',
title='Alpha Diversity by %s' % mapping_category)
tight_layout()
return plotting_data, plot_fig
开发者ID:seangibbons,项目名称:isme14,代码行数:31,代码来源:alpha_diversity_by_sample_type.py
示例14: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
data, comments = parse_mapping_file_to_dict(opts.input_path)
column_headers = []
if ',' not in opts.column:
column_data = []
column_name = opts.column
for i in data:
if column_name not in data[i]:
raise ValueError(
"No column: '%s' in the mapping file. Existing columns are: %s" %
(column_name, data[i].keys()))
try:
column_data.append(float(data[i][opts.column]))
except ValueError:
raise ValueError(
"All the values in the column '%s' must be numeric but '%s' has '%s'" %
(column_name, i, data[i][column_name]))
column_headers.append(i)
dtx_mtx = compute_distance_matrix_from_metadata(column_data)
else:
latitudes = []
longitudes = []
try:
latitude, longitude = opts.column.split(',')
except ValueError:
raise ValueError(
"This script accepts a maximum of 2 colums separated by comma and you passed: %s" %
(opts.column))
for i in data:
if latitude not in data[i] or longitude not in data[i]:
raise ValueError(
"One of these columns or both do not exist: '%s' or '%s' in the mapping file. Existing columns are: %s" %
(latitude, longitude, data[i].keys()))
try:
latitudes.append(float(data[i][latitude]))
longitudes.append(float(data[i][longitude]))
except ValueError:
raise ValueError(
"All the values in the columnd '%s' & '%s' must be numeric but '%s' has '%s'" %
(latitude, longitude, i, data[i][column_name]))
column_headers.append(i)
dtx_mtx = calculate_dist_vincenty(latitudes, longitudes)
dtx_txt = format_distance_matrix(column_headers, dtx_mtx)
outfilepath = os.path.join(opts.output_fp)
f = open(outfilepath, 'w')
f.write(dtx_txt)
f.close()
开发者ID:TheSchwa,项目名称:qiime,代码行数:57,代码来源:distance_matrix_from_mapping.py
示例15: test_sampleId_pairs
def test_sampleId_pairs(self):
"""Test that sampleId_pairs returns the correct combos/sampleId's."""
# expected values
dose_vps = \
[('1xDose', '2xDose'), ('1xDose', 'Control'),
('2xDose', 'Control')]
ttd_vps = \
[('31', '21'), ('31', '55'), ('31', '67'), ('21', '55'),
('21', '67'), ('55', '67')]
dose_sids = \
[(['Sam1', 'Sam2', 'Sam6'], ['Sam3', 'Sam4']),
(['Sam1', 'Sam2', 'Sam6'], ['Sam5']),
(['Sam3', 'Sam4'], ['Sam5'])]
ttd_sids = \
[(['Sam1'], ['Sam3']),
(['Sam1'], ['Sam4', 'Sam6']),
(['Sam1'], ['Sam2', 'Sam5']),
(['Sam3'], ['Sam4', 'Sam6']),
(['Sam3'], ['Sam2', 'Sam5']),
(['Sam4', 'Sam6'], ['Sam2', 'Sam5'])]
# observed values
obs_dose_sids, obs_dose_vps = sampleId_pairs(self.mapping_data,
self.rarefaction_data, 'Dose')
obs_ttd_sids, obs_ttd_vps = sampleId_pairs(self.mapping_data,
self.rarefaction_data, 'TTD')
# sort -- order is unimportant and depends on way presented in mf
self.assertEqual(dose_vps.sort(), obs_dose_vps.sort())
self.assertEqual(dose_sids.sort(), obs_dose_sids.sort())
self.assertEqual(ttd_vps.sort(), obs_ttd_vps.sort())
self.assertEqual(ttd_sids.sort(), obs_ttd_sids.sort())
# check errors when no samples had this category
self.assertRaises(ValueError, sampleId_pairs, self.mapping_data,
self.rarefaction_data, 'DNE')
# check no error if map file has more sampleids than rarefaction data
superset_mf = \
['#SampleID\tDose\tLinkerPrimerSequence\tWeight\tTTD\tDescription',
'#Comment Line',
'Sam1\t1xDose\tATCG\tHigh\t31\ts1_desc',
'Sam2\t1xDose\tACCG\tLow\t67\ts2_desc',
'Sam3\t2xDose\tACGT\tMed\t21\ts3_desc',
'Sam4\t2xDose\tAACG\tLow\t55\ts4_desc',
'Sam5\tControl\tCGTC\tLow\t67\ts5_desc',
'Sam6\t1xDose\tACCT\tLow\t55\ts6_desc',
'Sam7\t4xDose\tACCT\tLow\t55\ts7_desc',
'Sam8\t3xDose\tACCT\tLow\t55\ts8_desc',
'Sam9\t1xDose\tACCT\tLow\t55\ts9_desc']
# (mf, comments)
superset_mf = parse_mapping_file_to_dict(superset_mf)[0]
obs_dose_sids, obs_dose_vps = sampleId_pairs(superset_mf,
self.rarefaction_data, 'Dose')
self.assertEqual(dose_vps.sort(), obs_dose_vps.sort())
self.assertEqual(dose_sids.sort(), obs_dose_sids.sort())
开发者ID:ElDeveloper,项目名称:qiime,代码行数:57,代码来源:test_compare_alpha_diversity.py
示例16: test_parse_mapping_file_to_dict
def test_parse_mapping_file_to_dict(self):
"""parse_mapping_file functions as expected"""
s1 = ['#sample\ta\tb', '#comment line to skip',\
'x \t y \t z ', ' ', '#more skip', 'i\tj\tk']
exp = ([['x','y','z'],['i','j','k']],\
['sample','a','b'],\
['comment line to skip','more skip'])
mapdict, comments = parse_mapping_file_to_dict(s1)
expdict = {'x':{'a':'y','b':'z'}, 'i':{'a':'j','b':'k'}}
self.assertEqual(mapdict, expdict)
self.assertEqual(comments, ['comment line to skip','more skip'])
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:11,代码来源:test_parse.py
示例17: compare_alpha_diversities
def compare_alpha_diversities(rarefaction_lines, mapping_lines,
category, depth):
"""compares alpha diversities
inputs:
rarefaction_file - rarefaction file which gives scores for
various rarefactions and depths
mapping_file - file that has ID's and categories that the ID's
fall in
category - the category to be compared, is a string
depth - the depth of the rarefaction_file to use, is an integer
outputs:
results - a nested dictionary which specifies the category as
the top level key, and as its value, dictionaries which give the
results of the t_two_sample test for all unique pairs of values
in the specified category
"""
rarefaction_data = parse_rarefaction(rarefaction_lines)
mapping_data = parse_mapping_file_to_dict(mapping_lines)[0]
value_pairs = make_value_pairs_from_category(mapping_data, category)
category_values_Ids = make_category_values_Id_dict(mapping_data,
category)
SampleId_pairs = map_category_value_pairs_to_Ids(value_pairs,
category_values_Ids)
map_from_Id_to_col = make_SampleIds_rarefaction_columns_dict(
rarefaction_data)
reduced_rarefaction_mtx = extract_rarefaction_scores_at_depth(depth,
rarefaction_data)
results = {category:{}}
for pair in range(len(SampleId_pairs)):
i=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][0],
reduced_rarefaction_mtx, map_from_Id_to_col))
j=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][1],
reduced_rarefaction_mtx, map_from_Id_to_col))
results[category][(str(value_pairs[pair][0]),
str(value_pairs[pair][1]))] =\
t_two_sample(i,j)
return results
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:53,代码来源:compare_alpha_diversity.py
示例18: load_data
def load_data(biom_path, map_path):
"""
@biom_path
@map_path
@data
@labn
@labs
@classes
"""
obj,comm = parse_mapping_file_to_dict(open(map_path, "U"))
labn,labs,classes = extract_labels(obj)
data = extract_data(biom_path)
return data, labn, labs, classes
开发者ID:gditzler,项目名称:MiDaPP,代码行数:13,代码来源:hmp.py
示例19: DA_fitZIG
def DA_fitZIG(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2):
"""perform metagenomeSeq's Zero Inflated Gaussian (ZIG) OTU differential abundance testing"""
tmp_bt = load_table(input_path)
tmp_pmf, _ = parse_mapping_file_to_dict(mapping_fp)
check_mapping_file_category(tmp_bt, mapping_fp, mapping_category, subcategory_1, subcategory_2)
tmp_bt.add_metadata(tmp_pmf, 'sample')
with tempfile.NamedTemporaryFile(dir=get_qiime_temp_dir(),
prefix='QIIME-differential-abundance-temp-table-',
suffix='.biom') as temp_fh:
temp_fh.write(tmp_bt.to_json('forR'))
temp_fh.flush()
run_fitZIG(temp_fh.name, out_path, mapping_category, subcategory_1, subcategory_2)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:13,代码来源:differential_abundance.py
示例20: _collate_gradient_pcoa_plot_data
def _collate_gradient_pcoa_plot_data(coords_f, map_f, category):
pc_data = parse_coords(coords_f)
coords_d = dict(zip(pc_data[0], pc_data[1]))
# Build list of (gradient value, sid) tuples.
map_dict = parse_mapping_file_to_dict(map_f)[0]
sorted_sids = sorted([(float(md[category]), sid)
for sid, md in map_dict.items()])
xs = [coords_d[sid][0] for _, sid in sorted_sids]
ys = [coords_d[sid][1] for _, sid in sorted_sids]
gradient = [cat_val for cat_val, _ in sorted_sids]
return xs, ys, gradient
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:14,代码来源:simulate.py
注:本文中的qiime.parse.parse_mapping_file_to_dict函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论