本文整理汇总了Python中qiime.parse.fields_to_dict函数的典型用法代码示例。如果您正苦于以下问题:Python fields_to_dict函数的具体用法?Python fields_to_dict怎么用?Python fields_to_dict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fields_to_dict函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: map_otu_map_files
def map_otu_map_files(otu_files, failures_file=None):
# passing delim=None splits on any whitespace, so can handle mixed tabs
# and spaces
result = fields_to_dict(otu_files[0], delim=None)
for otu_file in otu_files[1:]:
current_otu_map = fields_to_dict(otu_file, delim=None)
result = expand_otu_map_seq_ids(current_otu_map, result)
if failures_file:
result = expand_failures(failures_file, result)
return result
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:10,代码来源:pick_otus.py
示例2: rewrite_otu_table_with_taxonomy
def rewrite_otu_table_with_taxonomy(taxon_lines, otu_lines, id_map_lines=None,
outfile=stdout):
"""Rewrites OTU table including taxonomy."""
taxonomy = fields_to_dict(taxon_lines)
#sometimes have extra fields after OTU id
new_taxonomy = {}
for k, v in taxonomy.items():
new_taxonomy[k.split()[0]] = v
taxonomy = new_taxonomy
taxonomy = fix_taxonomy_delimiters(taxonomy)
if id_map_lines:
id_map = dict([map(strip, line.split('\t')) for line in
id_map_lines])
new_taxonomy = dict([(id_map[k], v) for k, v in taxonomy.items()
if k in id_map])
assert new_taxonomy != taxonomy
taxonomy = new_taxonomy
for line in otu_lines:
if not line.endswith('\n'):
line += '\n'
if line.startswith('#OTU ID'):
outfile.write(line[:-1]+'\tConsensus Lineage\n')
elif line.startswith('#'):
outfile.write(line)
else:
id_, rest = line.split('\t', 1)
t = taxonomy.get(id_, 'None')
outfile.write(line[:-1]+'\t'+t+'\n')
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:30,代码来源:add_taxa.py
示例3: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
sample_id_map_fp = opts.sample_id_map_fp
if sample_id_map_fp:
sample_id_map = dict([(k,v[0]) \
for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
else:
sample_id_map = None
input_dm_fps = opts.input_dms.split(',')
output_f = open(opts.output_fp,'w')
output_f.write(comment)
output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n')
num_iterations = opts.num_iterations
for i,fp1 in enumerate(input_dm_fps):
for fp2 in input_dm_fps[i+1:]:
(dm1_labels, dm1), (dm2_labels, dm2) =\
make_compatible_distance_matrices(parse_distmat(open(fp1,'U')),
parse_distmat(open(fp2,'U')),
lookup=sample_id_map)
if len(dm1_labels) < 2:
output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels)))
continue
p = mantel(dm1,dm2,n=num_iterations)
p_str = format_p_value_for_num_iters(p,num_iterations)
output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str))
output_f.close()
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:29,代码来源:compare_distance_matrices.py
示例4: test_parallel_rdp_taxonomy_assigner
def test_parallel_rdp_taxonomy_assigner(self):
""" parallel_rdp_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
'rdp_max_memory':1500,
'rdp_classifier_fp':getenv('RDP_JAR_PATH'),
'confidence':0.80,
'reference_seqs_fp':self.reference_seqs_file.name
}
app = ParallelRdpTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='RDPTEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 2)
self.assertEqual(len(results['X67228 some description']), 2)
self.assertEqual(len(results['EF503697']), 2)
开发者ID:DDomogala3,项目名称:qiime,代码行数:25,代码来源:test_assign_taxonomy.py
示例5: _parse_taxonomic_information
def _parse_taxonomic_information(tax_map_lines, taxonomic_levels=8):
"""Parses a taxonomy mapping file to return mapping of seq ID to taxonomy.
Returns a dictionary with sequence ID as the key and a list containing the
taxonomy at each level. Empty taxonomic levels (i.e. ';;' or levels
containing only whitespace) are ignored.
Arguments:
tax_map_lines - list of lines from the taxonomy mapping file (the
result of calling readlines() on the open file handle)
taxonomic_levels - the number of taxonomic levels in the taxonomy
strings found in the taxonomy mapping file. All taxonomy strings
must have this number of levels (excluding empty taxonomic levels)
"""
tax_info = {}
if tax_map_lines[0] != \
"ID Number\tGenBank Number\tNew Taxon String\tSource\n":
raise ValueError("The taxonomy map file appears to be invalid "
"because it is either missing the header or has a "
"corrupt header.")
for seq_id, seq_info in fields_to_dict(tax_map_lines[1:]).items():
if len(seq_info) != 3:
raise ValueError("The taxonomy map file appears to be invalid "
"because it does not have exactly 4 columns.")
# Split at each level and remove any empty levels or levels that
# contain only whitespace.
taxonomy = [level for level in seq_info[1].split(';') \
if level.strip() != '']
if len(taxonomy) != taxonomic_levels:
raise ValueError("Encountered invalid taxonomy '%s'. Valid "
"taxonomy strings must have %d levels separated by "
"semicolons." % (seq_info[1], taxonomic_levels))
tax_info[seq_id] = taxonomy
return tax_info
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:35,代码来源:summarize_taxonomic_agreement.py
示例6: get_seqs_to_keep_lookup_from_otu_map
def get_seqs_to_keep_lookup_from_otu_map(seqs_to_keep_f):
"""Generate a lookup dictionary from an OTU map"""
otu_map = fields_to_dict(seqs_to_keep_f)
seqs_to_keep = []
for seq_ids in otu_map.values():
seqs_to_keep += seq_ids
return {}.fromkeys(seqs_to_keep)
开发者ID:Honglongwu,项目名称:qiime,代码行数:7,代码来源:filter_fasta.py
示例7: test_write_otu_map_prefix
def test_write_otu_map_prefix(self):
"""write_otu_map functions as expected w otu prefix """
write_otu_map(self.otu_map1, self.tmp_fp1, "my.otu.")
actual = fields_to_dict(open(self.tmp_fp1))
self.files_to_remove.append(self.tmp_fp1)
exp = {"my.otu.0": ["seq1", "seq2", "seq5"], "my.otu.1": ["seq3", "seq4"], "my.otu.2": ["seq6", "seq7", "seq8"]}
self.assertEqual(actual, exp)
开发者ID:Gaby1212,项目名称:qiime,代码行数:8,代码来源:test_format.py
示例8: _generate_taxonomic_agreement_summary
def _generate_taxonomic_agreement_summary(otu_map_lines, tax_map_lines,
taxonomic_levels=8):
"""Computes a summary of taxonomic agreement between ref and its seqs.
Returns a dictionary with OTU ID as the key. The value is a four-element
list. The first element is the size of the OTU (i.e. the number of seqs in
the OTU, including the reference). The second element is a list of sequence
identifiers for each sequence in the OTU. The reference sequence ID will
always be listed first, followed by the sequence IDs of the other members
of the OTU as they appear in the OTU map. The third element is a list
containing percent agreement at each taxonomic level (a list of floats).
The fourth element is a list containing all taxonomic values that were
encountered at each level. The reference taxonomic value will always be
listed first. The third and fourth elements of the top-level list will
always be the same length (taxonomic_levels) because they each contain
information for each taxonomic level.
Arguments:
otu_map_lines - list of lines in the OTU map (the result of calling
readlines() on the open file handle)
tax_map_lines - list of lines from the taxonomy mapping file (the
result of calling readlines() on the open file handle)
taxonomic_levels - the number of taxonomic levels in the taxonomy
strings found in the taxonomy mapping file. All taxonomy strings
must have this number of levels to prevent inconsistent results in
the summary
"""
tax_map = _parse_taxonomic_information(tax_map_lines, taxonomic_levels)
otu_map = fields_to_dict(otu_map_lines)
taxonomic_agreement = {}
for otu_id, seq_ids in otu_map.items():
otu_size = len(seq_ids)
taxonomic_agreement[otu_id] = [otu_size, seq_ids, [], []]
# The reference sequence is always the first sequence listed in the OTU
# map.
ref_seq_id = seq_ids[0]
ref_seq_tax = tax_map[ref_seq_id]
# Calculate percent agreement for each taxonomic level. If the OTU only
# contains a reference sequence, the percent agreement will be 100%.
# Also keep track of all unique taxonomic values that are encountered
# for each level (with the reference's taxonomic value listed first).
for level_idx, ref_level in enumerate(ref_seq_tax):
agreement_count = 0
encountered_levels = []
for seq_id in seq_ids:
seq_level = tax_map[seq_id][level_idx]
if ref_level == seq_level:
agreement_count += 1
if seq_level not in encountered_levels:
encountered_levels.append(seq_level)
taxonomic_agreement[otu_id][2].append(
(agreement_count / otu_size) * 100)
taxonomic_agreement[otu_id][3].append(encountered_levels)
return taxonomic_agreement
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:57,代码来源:summarize_taxonomic_agreement.py
示例9: test_write_otu_map_prefix
def test_write_otu_map_prefix(self):
"""write_otu_map functions as expected w otu prefix """
write_otu_map(self.otu_map1,self.tmp_fp1,'my.otu.')
actual = fields_to_dict(open(self.tmp_fp1))
self.files_to_remove.append(self.tmp_fp1)
exp = {'my.otu.0':['seq1','seq2','seq5'],
'my.otu.1':['seq3','seq4'],
'my.otu.2':['seq6','seq7','seq8']}
self.assertEqual(actual,exp)
开发者ID:oscaredd,项目名称:qiime,代码行数:10,代码来源:test_format.py
示例10: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
# Create the output dir if it doesn't already exist.
try:
if not path.exists(opts.output_dir):
create_dir(opts.output_dir)
except:
option_parser.error("Could not create or access output directory " "specified with the -o option.")
sample_id_map = None
if opts.sample_id_map_fp:
sample_id_map = dict([(k, v[0]) for k, v in fields_to_dict(open(opts.sample_id_map_fp, "U")).items()])
input_dm_fps = opts.input_dms
distmats = [parse_distmat(open(dm_fp, "U")) for dm_fp in input_dm_fps]
if opts.method == "mantel":
output_f = open(path.join(opts.output_dir, "mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
sample_id_map=sample_id_map,
)
)
elif opts.method == "partial_mantel":
output_f = open(path.join(opts.output_dir, "partial_mantel_results.txt"), "w")
output_f.write(
run_mantel_test(
"partial_mantel",
input_dm_fps,
distmats,
opts.num_permutations,
opts.tail_type,
comment_mantel_pmantel,
control_dm_fp=opts.control_dm,
control_dm=parse_distmat(open(opts.control_dm, "U")),
sample_id_map=sample_id_map,
)
)
elif opts.method == "mantel_corr":
output_f = open(path.join(opts.output_dir, "mantel_correlogram_results.txt"), "w")
result_str, correlogram_fps, correlograms = run_mantel_correlogram(
input_dm_fps, distmats, opts.num_permutations, comment_corr, opts.alpha, sample_id_map=sample_id_map
)
output_f.write(result_str)
for corr_fp, corr in zip(correlogram_fps, correlograms):
corr.savefig(path.join(opts.output_dir, corr_fp + opts.image_type), format=opts.image_type)
output_f.close()
开发者ID:ranjit58,项目名称:qiime,代码行数:52,代码来源:compare_distance_matrices.py
示例11: test_fields_to_dict
def test_fields_to_dict(self):
"""fields_to_dict should make first field key, rest val"""
test_data = \
"""0 R27DLI_4812 R27DLI_600 R27DLI_727 U1PLI_403 U1PLI_8969 U1PLI_9080 U1PLI_9526 W3Cecum_6642 W3Cecum_8992
1 U1PLI_7889
2 W3Cecum_4858
3 R27DLI_3243 R27DLI_4562 R27DLI_6828 R27DLI_9097 U1PLI_2780 U1PLI_67 U9PSI_10475 U9PSI_4341 W3Cecum_5191""".splitlines() #output from cd-hit
obs = fields_to_dict(test_data)
exp = {'0':['R27DLI_4812','R27DLI_600','R27DLI_727','U1PLI_403','U1PLI_8969','U1PLI_9080','U1PLI_9526','W3Cecum_6642','W3Cecum_8992'],
'1':['U1PLI_7889'],
'2':['W3Cecum_4858'],
'3':['R27DLI_3243','R27DLI_4562','R27DLI_6828','R27DLI_9097','U1PLI_2780','U1PLI_67','U9PSI_10475','U9PSI_4341','W3Cecum_5191']}
self.assertEqual(obs, exp)
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:13,代码来源:test_parse.py
示例12: main
def main():
"""opens files as necessary based on prefs"""
option_parser, opts, args = parse_command_line_parameters(**script_info)
data = {}
fasta_file = opts.input_fasta_fp
# load the input alignment
data['aln'] = SequenceCollection.from_fasta_records(
parse_fasta(open(fasta_file)), DNA)
# Load the otu file
otu_path = opts.otu_map_fp
otu_f = open(otu_path, 'U')
otus = fields_to_dict(otu_f)
otu_f.close()
data['otus'] = otus
# Determine which which samples to extract from representative seqs
# and from otus file
if opts.samples_to_extract:
prefs = process_extract_samples(opts.samples_to_extract)
filepath = opts.input_fasta_fp
filename = filepath.strip().split('/')[-1]
filename = filename.split('.')[0]
if opts.output_dir:
if os.path.exists(opts.output_dir):
dir_path = opts.output_dir
else:
try:
os.mkdir(opts.output_dir)
dir_path = opts.output_dir
except OSError:
pass
else:
dir_path = './'
try:
action = filter_samples
except NameError:
action = None
# Place this outside try/except so we don't mask NameError in action
if action:
action(prefs, data, dir_path, filename)
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:47,代码来源:filter_otus_by_sample.py
示例13: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
exclude_otus_fp = opts.exclude_otus_fp
if opts.output_fp:
outfile = open(opts.output_fp, 'w')
else:
outfile = stdout
if not opts.taxonomy_fname:
otu_to_taxonomy = None
else:
infile = open(opts.taxonomy_fname,'U')
otu_to_taxonomy = parse_taxonomy(infile)
otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U'))
if exclude_otus_fp:
otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U'))
outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
开发者ID:Ecogenomics,项目名称:FrankenQIIME,代码行数:21,代码来源:make_otu_table.py
示例14: test_parallel_blast_taxonomy_assigner
def test_parallel_blast_taxonomy_assigner(self):
""" parallel_blast_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp':self.id_to_taxonomy_file.name,
'blastmat_dir':None,
'e_value':0.001,
'blast_db':None,
'reference_seqs_fp':self.reference_seqs_file.name
}
app = ParallelBlastTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='BTATEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 6)
self.assertEqual(len(results['s1']), 3)
self.assertEqual(len(results['s6']), 3)
开发者ID:DDomogala3,项目名称:qiime,代码行数:24,代码来源:test_assign_taxonomy.py
示例15: test_parallel_uclust_taxonomy_assigner
def test_parallel_uclust_taxonomy_assigner(self):
""" parallel_uclust_taxonomy_assigner functions as expected """
params = {'id_to_taxonomy_fp': self.id_to_taxonomy_file.name,
'reference_seqs_fp': self.reference_seqs_file.name,
'min_consensus_fraction': 0.51,
'similarity': 0.90,
'uclust_max_accepts': 3
}
app = ParallelUclustConsensusTaxonomyAssigner()
r = app(self.tmp_seq_filepath,
self.test_out,
params,
job_prefix='UTATEST',
poll_directly=True,
suppress_submit_jobs=False)
results = fields_to_dict(open(glob(join(
self.test_out, '*_tax_assignments.txt'))[0], 'U'))
# some basic sanity checks: we should get the same number of sequences
# as our input with the same seq IDs. We should have a taxonomy string
# and a confidence value for each seq as well.
self.assertEqual(len(results), 6)
self.assertEqual(len(results['s1']), 3)
self.assertEqual(len(results['s6']), 3)
开发者ID:ElDeveloper,项目名称:qiime,代码行数:24,代码来源:test_assign_taxonomy.py
示例16: compute_sequence_stats
def compute_sequence_stats(fasta_lines, tax_map_lines, unknown_keywords=None):
"""Generates statistics for the input sequences.
Returns a dictionary with sequence ID as the key, and a list of statistics
as the value. The statistics (in order of placement in the list) are:
relevant taxonomic depth (integer)
sequence read length (integer)
sequence data (string)
For example, the relevant taxonomic depth for the taxonomy string 'A;B;C'
would be 3. If unknown_keywords is supplied, any taxonomic level matching
a value in unknown_keywords wil be ignored. Empty or whitespace-only
taxonomic levels will also be ignored in the count. For example, if 'Z' is
an unknown keyword, the relevant taxonomic depth of 'A;B;Z;C' will be 3.
Arguments:
fasta_lines - list of lines in FASTA format (the result of calling
readlines() on the open file handle)
tax_map_lines - list of lines from the taxonomy mapping file (the
result of calling readlines() on the open file handle)
unknown_keywords - a list of strings corresponding to taxonomic level
strings that should be ignored when computing the relevant
taxonomic depth
"""
seq_stats = {}
if tax_map_lines[0] != \
"ID Number\tGenBank Number\tNew Taxon String\tSource\n":
raise ValueError("The taxonomy map file appears to be invalid "
"because it is either missing the header or has a "
"corrupt header.")
# Record the taxonomy depths for each sequence.
for seq_id, seq_info in fields_to_dict(tax_map_lines[1:]).items():
if len(seq_info) != 3:
raise ValueError("The taxonomy map file appears to be invalid "
"because it does not have exactly 4 columns.")
# Split at each level and remove any empty levels or levels that
# contain only whitespace.
taxonomy = [level for level in seq_info[1].split(';') \
if level.strip() != '']
# Remove any 'unknown' taxonomy levels before computing the known
# taxonomy depth.
if unknown_keywords:
for unknown_keyword in unknown_keywords:
while unknown_keyword in taxonomy:
taxonomy.remove(unknown_keyword)
seq_stats[seq_id] = [len(taxonomy)]
# Record the sequence data and sequence length for each sequence.
for seq_id, seq in MinimalFastaParser(fasta_lines):
if seq_id in seq_stats:
seq_stats[seq_id].extend([len(seq), seq])
else:
print ("Found sequence id '%s' in the FASTA file that wasn't in "
"the taxonomy mapping file\n" % seq_id)
# Assign a taxonomic depth of 0 because we don't have any
# taxonomic information for the sequence.
seq_stats[seq_id] = [0, len(seq), seq]
return seq_stats
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:61,代码来源:sort_seqs.py
示例17: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
input_fps = opts.input_fps
sample_id_map_fps = opts.sample_id_map_fps
num_dimensions = opts.num_dimensions
max_dims_str = str(num_dimensions or 'alldim')
output_dir = opts.output_dir
random_trials = opts.random_trials
if random_trials != None and random_trials < 10:
option_parser.error('Must perform >= 10 trails for Monte Carlo analysis.')
if sample_id_map_fps and \
(len(sample_id_map_fps) + 1) != len(opts.input_fps):
option_parser.error('If providing sample id maps, there must be exactly'
' one fewer sample id maps than input coordinate'
' matrices.')
if not exists(output_dir):
makedirs(output_dir)
reference_input_fp = input_fps[0]
reference_input_fp_dir, input_fn1 = split(reference_input_fp)
reference_input_fp_basename, reference_input_fp_ext = splitext(input_fn1)
output_summary_fp = join(output_dir,'procrustes_results.txt')
summary_file_lines = \
['#FP1\tFP2\tNum included dimensions\tMonte Carlo p-value\tCount better\tM^2',
'#Warning: p-values in this file are NOT currently adjusted for multiple comparisons.']
for i,query_input_fp in enumerate(input_fps[1:]):
query_input_fp_dir, query_input_fn = split(query_input_fp)
query_input_fp_basename, query_input_fp_ext = splitext(query_input_fn)
output_matrix1_fp = join(output_dir,
'%s_transformed_reference.txt' % reference_input_fp_basename)
output_matrix2_fp = join(output_dir,\
'%s_transformed_q%d.txt' % (query_input_fp_basename, i+1))
if sample_id_map_fps:
sample_id_map = dict([(k,v[0]) \
for k,v in fields_to_dict(open(sample_id_map_fps[i], "U")).items()])
else:
sample_id_map = None
transformed_coords1, transformed_coords2, m_squared, randomized_coords2 =\
get_procrustes_results(open(reference_input_fp,'U'),\
open(query_input_fp,'U'),\
sample_id_map=sample_id_map,\
randomize=False,
max_dimensions=num_dimensions)
output_matrix1_f = open(output_matrix1_fp,'w')
output_matrix1_f.write(transformed_coords1)
output_matrix1_f.close()
output_matrix2_f = open(output_matrix2_fp,'w')
output_matrix2_f.write(transformed_coords2)
output_matrix2_f.close()
if random_trials:
if opts.store_trial_details:
trial_output_dir = join(output_dir,'trial_details_%d' % i+2)
else:
trial_output_dir = None
coords_f1 = list(open(reference_input_fp,'U'))
coords_f2 = list(open(query_input_fp,'U'))
actual_m_squared, trial_m_squareds, count_better, mc_p_value =\
procrustes_monte_carlo(coords_f1,
coords_f2,
trials=random_trials,
max_dimensions=num_dimensions,
sample_id_map=sample_id_map,
trial_output_dir=trial_output_dir)
# truncate the p-value to the correct number of significant
# digits
mc_p_value_str = format_p_value_for_num_iters(mc_p_value, random_trials)
summary_file_lines.append('%s\t%s\t%s\t%s\t%d\t%1.3f' %\
(reference_input_fp, query_input_fp, max_dims_str, mc_p_value_str,\
count_better, actual_m_squared))
else:
summary_file_lines.append('%s\t%s\t%s\tNA\tNA\t%1.3f' %\
(reference_input_fp, query_input_fp, max_dims_str, m_squared))
# Write output summary
f = open(output_summary_fp,'w')
f.write('\n'.join(summary_file_lines))
f.write('\n')
f.close()
开发者ID:rob-knight,项目名称:qiime,代码行数:86,代码来源:transform_coordinate_matrices.py
示例18: test_write_otu_map
def test_write_otu_map(self):
"""write_otu_map functions as expected """
write_otu_map(self.otu_map1,self.tmp_fp1)
actual = fields_to_dict(open(self.tmp_fp1))
self.files_to_remove.append(self.tmp_fp1)
self.assertEqual(actual,dict(self.otu_map1))
开发者ID:oscaredd,项目名称:qiime,代码行数:6,代码来源:test_format.py
示例19: load_otu_mapping
def load_otu_mapping(data_access, input_dir, analysis_id):
""" Load the OTU table into the DB """
# For OTU Tables
# read in the workflow log file and determine timestamp and svn version of
# Qiime used for the analysis
pOTUs_threshold = '97'
ref_set_threshold = '97'
pOTUs_method='UCLUST_REF'
reference_set_name='GREENGENES_REFERENCE'
otus_log_str = open(join(input_dir, 'gg_97_otus', 'log.txt')).read()
log_str = open(join(input_dir, 'gg_97_otus', 'log.txt')).readlines()
#from the workflow log file get the pick-otus cmd
for substr in log_str:
if 'parallel_pick_otus_uclust_ref.py' in substr:
pick_otus_cmd=substr
elif 'pick_otus.py' in substr:
pick_otus_cmd=substr
# define values for otu_picking_run table
otu_run_set_id = 0
svn_version = '1418' # This is temporarily defined, however will use script to dtermine this value
run_date=datetime.now().strftime("%d/%m/%Y/%H/%M/%S")
pick_otus_map = join(input_dir, 'gg_97_otus', 'exact_uclust_ref_otus.txt')
# get md5 for split-lib seq file
split_lib_seqs = join(input_dir, 'split_libraries', 'seqs.fna')
split_lib_seqs_md5=safe_md5(open(split_lib_seqs)).hexdigest()
# Insert the otu-picking log information in the DB
print 'calling loadAllOTUInfo with analysis_id %s' % str(analysis_id)
valid,new_otu_run_set_id,otu_picking_run_id=data_access.loadAllOTUInfo(True,
otu_run_set_id, run_date,
pOTUs_method, pOTUs_threshold,
svn_version, pick_otus_cmd, otus_log_str,
split_lib_seqs_md5,reference_set_name,
ref_set_threshold, analysis_id)
if not valid:
raise ValueError, 'Error: Unable to load OTU run data into database!'
else:
print "Finished registering OTU run!"
# define OTU mapping
otu_map=[]
otu_to_seqid = fields_to_dict(open(pick_otus_map, 'U'))
for otu in otu_to_seqid:
for sample in otu_to_seqid[otu]:
otu_map.append('%s\t%s\t%s\t%s' % (otu,sample,new_otu_run_set_id,
reference_set_name))
print 'Finished setting otu_map.'
# define oracle data types
types = ['s','s','i','s']
con = data_access.getSFFDatabaseConnection()
cur = con.cursor()
#print 'Starting PK_SPLIT_LIBRARY_READ_MAP index rebuild...'
#cur.execute('alter index "SFF"."PK_SPLIT_LIBRARY_READ_MAP" rebuild ')
print 'Fisnished rebuilding index PK_SPLIT_LIBRARY_READ_MAP.'
cur = con.cursor()
set_count = 1
# prepare the OTU table for laoding
print 'Loading OTU Table into the database!'
pick_otus_table = join(input_dir, 'gg_97_otus',
'exact_uclust_ref_otu_table.txt')
otu_table_lines=open(pick_otus_table).readlines()
sample_ids, otu_ids, otu_table, lineages = \
parse_classic_otu_table(otu_table_lines)
# convert OTU table to tab-delimited list
otu_table_load=[]
for i,otu in enumerate(otu_ids):
for j,sample in enumerate(sample_ids):
if otu_table[i][j]>0:
otu_table_load.append("%s\t%s\t%s\t%s" % \
(otu,sample,new_otu_run_set_id,otu_table[i][j]))
# get DB connection
con = data_access.getSFFDatabaseConnection()
cur = con.cursor()
# load otu table into DB
data_types=['s','s','i','f']
set_count = 0
for input_set in input_set_generator(otu_table_load, cur,data_types,\
buffer_size=1000):
valid=data_access.loadOTUTable(True,input_set)
if not valid:
raise ValueError, 'Error: Unable to load OTU table!'
print "loading OTU Table: %s" % set_count
set_count += 1
print 'Successfully loaded the OTU Table into the database!'
print 'End of function'
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:95,代码来源:load_analysis_seqs_through_otu_table.py
示例20: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
random_trials = opts.random_trials
if random_trials != None and random_trials < 10:
option_parser.error('Must perform >= 10 trails for Monte Carlo analysis.')
output_dir = opts.output_dir
sample_id_map_fp = opts.sample_id_map_fp
num_dimensions = opts.num_dimensions
if not exists(output_dir):
makedirs(output_dir)
if opts.store_trial_details:
trial_output_dir = '%s/trial_details/' % output_dir
else:
trial_output_dir = None
input_fp1 = opts.input_fps[0]
input_fp2 = opts.input_fps[1]
input_fp1_dir, input_fn1 = split(input_fp1)
input_fp1_basename, input_fp1_ext = splitext(input_fn1)
input_fp2_dir, input_fn2 = split(input_fp2)
input_fp2_basename, input_fp2_ext = splitext(input_fn2)
output_summary_fp = '%s/%s_%s_procrustes_results.txt' %\
(output_dir,input_fp1_basename,input_fp2_basename)
output_matrix1_fp = '%s/pc1_transformed.txt' % output_dir
output_matrix2_fp = '%s/pc2_transformed.txt' % output_dir
if sample_id_map_fp:
sample_id_map = dict([(k,v[0]) \
for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
else:
sample_id_map = None
transformed_coords1, transformed_coords2, m_squared, randomized_coords2 =\
get_procrustes_results(open(input_fp1,'U'),\
open(input_fp2,'U'),\
sample_id_map=sample_id_map,\
randomize=False,
max_dimensions=num_dimensions)
output_matrix1_f = open(output_matrix1_fp,'w')
output_matrix1_f.write(transformed_coords1)
output_matrix1_f.close()
output_matrix2_f = open(output_matrix2_fp,'w')
output_matrix2_f.write(transformed_coords2)
output_matrix2_f.close()
if random_trials:
summary_file_lines = ['FP1 FP2 Included_dimensions MC_p_value Count_better M^2']
coords_f1 = list(open(input_fp1,'U'))
coords_f2 = list(open(input_fp2,'U'))
actual_m_squared, trial_m_squareds, count_better, mc_p_value =\
procrustes_monte_carlo(coords_f1,\
coords_f2,\
trials=random_trials,\
max_dimensions=num_dimensions,
sample_id_map=sample_id_map,
trial_output_dir=trial_output_dir)
# truncate the p-value to the correct number of significant
# digits
mc_p_value_str = format_p_value_for_num_iters(mc_p_value, random_trials)
max_dims_str = str(num_dimensions or 'alldim')
summary_file_lines.append('%s %s %s %s %d %1.3f' %\
(input_fp1, input_fp2, str(max_dims_str), mc_p_value_str,\
count_better, actual_m_squared))
f = open(output_summary_fp,'w')
f.write('\n'.join(summary_file_lines))
f.write('\n')
f.close()
开发者ID:DDomogala3,项目名称:qiime,代码行数:71,代码来源:transform_coordinate_matrices.py
注:本文中的qiime.parse.fields_to_dict函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论