本文整理汇总了Python中qiime.workflow.util.generate_log_fp函数的典型用法代码示例。如果您正苦于以下问题:Python generate_log_fp函数的具体用法?Python generate_log_fp怎么用?Python generate_log_fp使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了generate_log_fp函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
option_parser, opts, args = \
parse_command_line_parameters(suppress_verbose=True, **script_info)
input_dir = opts.input_dir
parameter_fp = opts.parameter_fp
read1_indicator = opts.read1_indicator
read2_indicator = opts.read2_indicator
match_barcodes = opts.match_barcodes
barcode_indicator = opts.barcode_indicator
leading_text = opts.leading_text
trailing_text = opts.trailing_text
include_input_dir_path = opts.include_input_dir_path
output_dir = abspath(opts.output_dir)
remove_filepath_in_name = opts.remove_filepath_in_name
print_only = opts.print_only
if remove_filepath_in_name and not include_input_dir_path:
option_parser.error("If --remove_filepath_in_name is enabled, "
"--include_input_dir_path must also be enabled.")
if opts.parameter_fp:
with open(opts.parameter_fp, 'U') as parameter_f:
params_dict = parse_qiime_parameters(parameter_f)
params_str = get_params_str(params_dict['join_paired_ends'])
else:
params_dict = {}
params_str = ""
create_dir(output_dir)
all_files = []
extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
for root, dir, fps in walk(input_dir):
for fp in fps:
for extension in extensions:
if fp.endswith(extension):
all_files += [abspath(join(root, fp))]
pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
match_barcodes, barcode_indicator)
commands = create_commands_jpe(pairs, output_dir,
params_str, leading_text, trailing_text, include_input_dir_path,
remove_filepath_in_name, match_barcodes, bc_pairs)
qiime_config = load_qiime_config()
if print_only:
command_handler = print_commands
else:
command_handler = call_commands_serially
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params_dict,
qiime_config=qiime_config)
# Call the command handler on the list of commands
command_handler(commands,
status_update_callback=no_status_updates,
logger=logger,
close_logger_on_success=True)
开发者ID:Springbudder,项目名称:qiime,代码行数:60,代码来源:multiple_join_paired_ends.py
示例2: run_core_diversity_analyses
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_otu_category_significance=False,
status_update_callback=print_to_stdout):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = \
parse_mapping_file_to_dict(open(mapping_fp,'U'))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, ("Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, ("Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c)
else:
categories= []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = '%s/index.html' % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir,'log_20*txt'))
log_fp = generate_log_fp(output_dir)
index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
for old_log_fp in old_log_fps:
index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
logger = WorkflowLogger(log_fp,
params=params,
qiime_config=qiime_config)
input_fps = [biom_fp,mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger,input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params['biom-summarize-table'])
except KeyError:
params_str = ''
biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = \
"biom summarize-table -i %s -o %s --suppress-md5 %s" % \
(biom_fp, biom_table_stats_output_fp,params_str)
commands.append([('Generate BIOM table summary',
biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
% biom_table_stats_output_fp)
index_links.append(('BIOM table statistics',
biom_table_stats_output_fp,
_index_headers['run_summary']))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
(biom_fp,filtered_biom_fp,sampling_depth)
commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
filter_samples_cmd)])
else:
logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
% filtered_biom_fp)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
if len(commands) > 0:
command_handler(commands,
status_update_callback,
logger,
#.........这里部分代码省略.........
开发者ID:jasonbogovich,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py
示例3: pick_nested_reference_otus
def pick_nested_reference_otus(input_fasta_fp,
input_tree_fp,
output_dir,
run_id,
similarity_thresholds,
command_handler,
status_update_callback=print_to_stdout):
# Prepare some variables for the later steps
create_dir(output_dir)
otu_dir = join(output_dir,'otus')
create_dir(otu_dir)
rep_set_dir = join(output_dir,'rep_set')
create_dir(rep_set_dir)
# currently not doing anything with taxonomies and trees
# tax_dir = join(output_dir,'taxonomies')
# create_dir(tax_dir)
if input_tree_fp:
tree_dir = join(output_dir,'trees')
create_dir(tree_dir)
commands = []
files_to_remove = []
logger = WorkflowLogger(generate_log_fp(output_dir))
similarity_thresholds.sort()
similarity_thresholds.reverse()
current_inseqs_fp = input_fasta_fp
current_tree_fp = input_tree_fp
previous_otu_map = None
for similarity_threshold in similarity_thresholds:
current_inseqs_basename = splitext(split(current_inseqs_fp)[1])[0]
# pick otus command
otu_fp = '%s/%d_otu_map.txt' % (otu_dir,similarity_threshold)
clusters_fp = '%s/%d_clusters.uc' % (otu_dir,similarity_threshold)
temp_otu_fp = '%s/%s_otus.txt' % (otu_dir, current_inseqs_basename)
temp_log_fp = '%s/%s_otus.log' % (otu_dir, current_inseqs_basename)
temp_clusters_fp = '%s/%s_clusters.uc' % (otu_dir, current_inseqs_basename)
pick_otus_cmd = \
'pick_otus.py -m uclust -DBz -i %s -s %1.2f -o %s' % (
current_inseqs_fp,
similarity_threshold/100,
otu_dir)
commands.append([('Pick OTUs (%d)' % similarity_threshold,
pick_otus_cmd)])
commands.append([('Rename OTU file (%d)' % similarity_threshold,
'mv %s %s' % (temp_otu_fp,otu_fp))])
commands.append([('Rename uc file (%d)' % similarity_threshold,
'mv %s %s' % (temp_clusters_fp,clusters_fp))])
files_to_remove.append(temp_log_fp)
# rep set picking
temp_rep_set_fp = get_tmp_filename(prefix='NestedReference',
suffix='.fasta')
pick_rep_set_cmd = \
'pick_rep_set.py -m first -i %s -o %s -f %s' % (
otu_fp,
temp_rep_set_fp,
current_inseqs_fp)
commands.append([('Pick Rep Set (%d)' % similarity_threshold,
pick_rep_set_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
# rename representative sequences
rep_set_fp = '%s/%d_otus_%s.fasta' % (
rep_set_dir,
similarity_threshold,
run_id)
logger.write('Renaming OTU representative sequences so OTU ids are reference sequence ids.')
rep_set_f = open(rep_set_fp,'w')
for e in rename_rep_seqs(open(temp_rep_set_fp,'U')):
rep_set_f.write('>%s\n%s\n' % e)
rep_set_f.close()
files_to_remove.append(temp_rep_set_fp)
# filter the tree, if provided
if current_tree_fp != None:
tree_fp = '%s/%d_otus_%s.tre' % (
tree_dir,
similarity_threshold,
run_id)
tree_cmd = 'filter_tree.py -i %s -f %s -o %s' %\
(current_tree_fp,rep_set_fp,tree_fp)
commands.append([('Filter tree (%d)' % similarity_threshold,tree_cmd)])
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
# prep for the next iteration
current_tree_fp = tree_fp
# prep for the next iteration
remove_files(files_to_remove)
commands = []
files_to_remove = []
current_inseqs_fp = rep_set_fp
logger.close()
开发者ID:infotroph,项目名称:nested_reference_otus,代码行数:100,代码来源:nested_reference_workflow.py
示例4: run_core_diversity_analyses
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_group_significance=False,
status_update_callback=print_to_stdout,
):
"""
"""
if categories is not None:
# Validate categories provided by the users
mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError(
"Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
)
if metadata_map.hasSingleCategoryValue(c):
raise ValueError(
"Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c
)
else:
categories = []
comma_separated_categories = ",".join(categories)
# prep some variables
if params is None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = "%s/index.html" % output_dir
index_links = []
commands = []
# begin logging
old_log_fps = glob(join(output_dir, "log_20*txt"))
log_fp = generate_log_fp(output_dir)
index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
for old_log_fp in old_log_fps:
index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
input_fps = [biom_fp, mapping_fp]
if tree_fp is not None:
input_fps.append(tree_fp)
log_input_md5s(logger, input_fps)
# run 'biom summarize-table' on input BIOM table
try:
params_str = get_params_str(params["biom-summarize-table"])
except KeyError:
params_str = ""
biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
if not exists(biom_table_stats_output_fp):
biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
biom_fp,
biom_table_stats_output_fp,
params_str,
)
commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
else:
logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
if not exists(filtered_biom_fp):
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
else:
#.........这里部分代码省略.........
开发者ID:EESI,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py
示例5: iterative_pick_subsampled_open_reference_otus
def iterative_pick_subsampled_open_reference_otus(
input_fps,
refseqs_fp,
output_dir,
percent_subsample,
new_ref_set_id,
command_handler,
params,
qiime_config,
prefilter_refseqs_fp=None,
prefilter_percent_id=0.60,
min_otu_size=2,
run_assign_tax=True,
run_align_and_tree=True,
step1_otu_map_fp=None,
step1_failures_fasta_fp=None,
parallel=False,
suppress_step4=False,
logger=None,
suppress_md5=False,
denovo_otu_picking_method='uclust',
reference_otu_picking_method='uclust_ref',
status_update_callback=print_to_stdout):
""" Call the pick_subsampled_open_reference_otus workflow on multiple inputs
and handle processing of the results.
"""
create_dir(output_dir)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
# if the user has not passed a different reference collection for the pre-filter,
# used the input refseqs_fp for all iterations. we want to pre-filter all data against
# the input data as lower percent identity searches with uclust can be slow, so we
# want the reference collection to stay at a reasonable size.
if prefilter_refseqs_fp == None:
prefilter_refseqs_fp = refseqs_fp
otu_table_fps = []
repset_fasta_fps = []
for i,input_fp in enumerate(input_fps):
iteration_output_dir = '%s/%d/' % (output_dir,i)
if iteration_output_exists(iteration_output_dir,min_otu_size):
# if the output from an iteration already exists, skip that
# iteration (useful for continuing failed runs)
log_input_md5s(logger,[input_fp,refseqs_fp])
logger.write('Iteration %d (input file: %s) output data already exists. '
'Skipping and moving to next.\n\n' % (i,input_fp))
else:
pick_subsampled_open_reference_otus(input_fp=input_fp,
refseqs_fp=refseqs_fp,
output_dir=iteration_output_dir,
percent_subsample=percent_subsample,
new_ref_set_id='.'.join([new_ref_set_id,str(i)]),
command_handler=command_handler,
params=params,
qiime_config=qiime_config,
run_assign_tax=False,
run_align_and_tree=False,
prefilter_refseqs_fp=prefilter_refseqs_fp,
prefilter_percent_id=prefilter_percent_id,
min_otu_size=min_otu_size,
step1_otu_map_fp=step1_otu_map_fp,
step1_failures_fasta_fp=step1_failures_fasta_fp,
parallel=parallel,
suppress_step4=suppress_step4,
logger=logger,
suppress_md5=suppress_md5,
denovo_otu_picking_method=denovo_otu_picking_method,
reference_otu_picking_method=reference_otu_picking_method,
status_update_callback=status_update_callback)
## perform post-iteration file shuffling whether the previous iteration's
## data previously existed or was just computed.
# step1 otu map and failures can only be used for the first iteration
# as subsequent iterations need to use updated refseqs files
step1_otu_map_fp = step1_failures_fasta_fp = None
new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
refseqs_fp = new_refseqs_fp
otu_table_fps.append('%s/otu_table_mc%d.biom' % (iteration_output_dir,min_otu_size))
repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)
# Merge OTU tables - check for existence first as this step has historically
# been a frequent failure, so is sometimes run manually in failed runs.
otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir,min_otu_size)
if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
(','.join(otu_table_fps),otu_table_fp)
commands.append([("Merge OTU tables",merge_cmd)])
# Build master rep set
final_repset_fp = '%s/rep_set.fna' % output_dir
final_repset_from_iteration_repsets_fps(repset_fasta_fps,final_repset_fp)
command_handler(commands,
status_update_callback,
#.........这里部分代码省略.........
开发者ID:Jorge-C,项目名称:qiime,代码行数:101,代码来源:pick_open_reference_otus.py
示例6: assign_tax
def assign_tax(repset_fasta_fp,
output_dir,
command_handler,
params,
qiime_config,
parallel=False,
logger=None,
status_update_callback=print_to_stdout):
input_dir, input_filename = split(repset_fasta_fp)
input_basename, input_ext = splitext(input_filename)
commands = []
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
## Prep the taxonomy assignment command
try:
assignment_method = params['assign_taxonomy']['assignment_method']
except KeyError:
assignment_method = 'rdp'
assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
(output_dir,assignment_method)
taxonomy_fp = '%s/%s_tax_assignments.txt' % \
(assign_taxonomy_dir,input_basename)
if parallel and (assignment_method == 'rdp' or
assignment_method == 'blast' or
assignment_method == 'uclust'):
# Grab the parallel-specific parameters
try:
params_str = get_params_str(params['parallel'])
except KeyError:
params_str = ''
try:
# Want to find a cleaner strategy for this: the parallel script
# is method-specific, so doesn't take a --assignment_method
# option. This works for now though.
d = params['assign_taxonomy'].copy()
if 'assignment_method' in d:
del d['assignment_method']
params_str += ' %s' % get_params_str(d)
except KeyError:
pass
# Build the parallel taxonomy assignment command
assign_taxonomy_cmd = \
'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
(assignment_method, repset_fasta_fp, assign_taxonomy_dir, params_str)
else:
try:
params_str = get_params_str(params['assign_taxonomy'])
except KeyError:
params_str = ''
# Build the taxonomy assignment command
assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
(assign_taxonomy_dir,repset_fasta_fp, params_str)
if exists(assign_taxonomy_dir):
rmtree(assign_taxonomy_dir)
commands.append([('Assign taxonomy',assign_taxonomy_cmd)])
# Call the command handler on the list of commands
command_handler(commands,
status_update_callback,
logger=logger,
close_logger_on_success=close_logger_on_success)
return taxonomy_fp
开发者ID:Jorge-C,项目名称:qiime,代码行数:71,代码来源:pick_open_reference_otus.py
示例7: run_pick_closed_reference_otus
def run_pick_closed_reference_otus(
input_fp,
refseqs_fp,
output_dir,
taxonomy_fp,
command_handler,
params,
qiime_config,
parallel=False,
logger=None,
suppress_md5=False,
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Pick OTUs;
2) Build an OTU table with optional pre-defined taxonmy.
"""
# confirm that a valid otu picking method was supplied before doing
# any work
reference_otu_picking_methods = ['blast','uclust_ref','usearch61_ref']
try:
otu_picking_method = params['pick_otus']['otu_picking_method']
except KeyError:
otu_picking_method = 'uclust_ref'
assert otu_picking_method in reference_otu_picking_methods,\
"Invalid OTU picking method supplied: %s. Valid choices are: %s"\
% (otu_picking_method,' '.join(reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[input_fp,refseqs_fp,taxonomy_fp])
# Prep the OTU picking command
pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename)
if parallel and (otu_picking_method == 'blast' or
otu_picking_method == 'uclust_ref' or
otu_picking_method == 'usearch61_ref'):
# Grab the parallel-specific parameters
try:
params_str = get_params_str(params['parallel'])
except KeyError:
params_str = ''
# Grab the OTU picker parameters
try:
# Want to find a cleaner strategy for this: the parallel script
# is method-specific, so doesn't take a --alignment_method
# option. This works for now though.
d = params['pick_otus'].copy()
if 'otu_picking_method' in d:
del d['otu_picking_method']
params_str += ' %s' % get_params_str(d)
except KeyError:
pass
otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
# Build the OTU picking command
pick_otus_cmd = '%s %s/%s -i %s -o %s -r %s -T %s' %\
(python_exe_fp,
script_dir,
otu_picking_script,
input_fp,
pick_otu_dir,
refseqs_fp,
params_str)
else:
try:
params_str = get_params_str(params['pick_otus'])
except KeyError:
params_str = ''
# Since this is reference-based OTU picking we always want to
# suppress new clusters -- force it here.
params_str+= ' --suppress_new_clusters'
logger.write("Forcing --suppress_new_clusters as this is closed-reference OTU picking.\n\n")
# Build the OTU picking command
pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -r %s -m %s %s' %\
(python_exe_fp,
script_dir,
input_fp,
pick_otu_dir,
refseqs_fp,
otu_picking_method,
#.........这里部分代码省略.........
开发者ID:rob-knight,项目名称:qiime,代码行数:101,代码来源:upstream.py
示例8: run_core_diversity_analyses
def run_core_diversity_analyses(
biom_fp,
mapping_fp,
sampling_depth,
output_dir,
qiime_config,
command_handler=call_commands_serially,
tree_fp=None,
params=None,
categories=None,
arare_min_rare_depth=10,
arare_num_steps=10,
parallel=False,
suppress_taxa_summary=False,
suppress_beta_diversity=False,
suppress_alpha_diversity=False,
suppress_otu_category_significance=False,
status_update_callback=print_to_stdout,
):
"""
"""
if categories != None:
# Validate categories provided by the users
mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
metadata_map = MetadataMap(mapping_data, mapping_comments)
for c in categories:
if c not in metadata_map.CategoryNames:
raise ValueError, (
"Category '%s' is not a column header "
"in your mapping file. "
"Categories are case and white space sensitive. Valid "
"choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
)
if metadata_map.hasSingleCategoryValue(c):
raise ValueError, (
"Category '%s' contains only one value. "
"Categories analyzed here require at least two values." % c
)
else:
categories = []
# prep some variables
if params == None:
params = parse_qiime_parameters([])
create_dir(output_dir)
index_fp = "%s/index.html" % output_dir
index_links = []
commands = []
# begin logging
log_fp = generate_log_fp(output_dir)
index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
input_fps = [biom_fp, mapping_fp]
if tree_fp != None:
input_fps.append(tree_fp)
log_input_md5s(logger, input_fps)
# run print_biom_table_summary.py on input BIOM table
try:
params_str = get_params_str(params["print_biom_table_summary"])
except KeyError:
params_str = ""
biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
biom_fp,
biom_table_stats_output_fp,
params_str,
)
index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])
# filter samples with fewer observations than the requested sampling_depth.
# since these get filtered for some analyses (eg beta diversity after
# even sampling) it's useful to filter them here so they're filtered
# from all analyses.
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
biom_fp,
filtered_biom_fp,
sampling_depth,
)
commands.append(
[
(
"Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
filter_samples_cmd,
)
]
)
biom_fp = filtered_biom_fp
# run initial commands and reset the command list
command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
commands = []
if not suppress_beta_diversity:
bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
#.........这里部分代码省略.........
开发者ID:kartoffelpuffer,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py
示例9: run_alpha_rarefaction
def run_alpha_rarefaction(otu_table_fp,
mapping_fp,
output_dir,
command_handler,
params,
qiime_config,
tree_fp=None,
num_steps=10,
parallel=False,
logger=None,
min_rare_depth=10,
max_rare_depth=None,
suppress_md5=False,
status_update_callback=print_to_stdout,
plot_stderr_and_stddev=False,
retain_intermediate_files=True):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Generate rarefied OTU tables;
2) Compute alpha diversity metrics for each rarefied OTU table;
3) Collate alpha diversity results;
4) Generate alpha rarefaction plots.
"""
# Prepare some variables for the later steps
otu_table_dir, otu_table_filename = split(otu_table_fp)
otu_table_basename, otu_table_ext = splitext(otu_table_filename)
create_dir(output_dir)
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
if max_rare_depth == None:
min_count, max_count, median_count, mean_count, counts_per_sample =\
compute_counts_per_sample_stats(parse_biom_table(open(otu_table_fp,'U')))
max_rare_depth = median_count
step = int((max_rare_depth - min_rare_depth) / num_steps) or 1
max_rare_depth = int(max_rare_depth)
rarefaction_dir = '%s/rarefaction/' % output_dir
create_dir(rarefaction_dir)
try:
params_str = get_params_str(params['multiple_rarefactions'])
except KeyError:
params_str = ''
if parallel:
params_str += ' %s' % get_params_str(params['parallel'])
# Build the rarefaction command
rarefaction_cmd = \
'%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\
(python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
step, rarefaction_dir, params_str)
else:
# Build the rarefaction command
rarefaction_cmd = \
'%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\
(python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
step, rarefaction_dir, params_str)
commands.append([('Alpha rarefaction', rarefaction_cmd)])
# Prep the alpha diversity command
alpha_diversity_dir = '%s/alpha_div/' % output_dir
create_dir(alpha_diversity_dir)
try:
params_str = get_params_str(params['alpha_diversity'])
except KeyError:
params_str = ''
if tree_fp:
params_str += ' -t %s' % tree_fp
if parallel:
params_str += ' %s' % get_params_str(params['parallel'])
# Build the alpha diversity command
alpha_diversity_cmd = \
"%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\
(python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
params_str)
else:
# Build the alpha diversity command
alpha_diversity_cmd = \
"%s %s/alpha_diversity.py -i %s -o %s %s" %\
(python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
params_str)
commands.append(\
[('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)])
# Prep the alpha diversity collation command
alpha_collated_dir = '%s/alpha_div_collated/' % output_dir
create_dir(alpha_collated_dir)
#.........这里部分代码省略.........
开发者ID:rob-knight,项目名称:qiime,代码行数:101,代码来源:downstream.py
示例10: create_personal_results
def create_personal_results(output_dir,
mapping_fp,
coord_fp,
collated_dir,
otu_table_fp,
prefs_fp,
personal_id_column,
personal_ids=None,
column_title='Self',
individual_titles=None,
category_to_split='BodySite',
time_series_category='WeeksSinceStart',
rarefaction_depth=10000,
alpha=0.05,
rep_set_fp=None,
body_site_rarefied_otu_table_dir=None,
retain_raw_data=False,
suppress_alpha_rarefaction=False,
suppress_beta_diversity=False,
suppress_taxa_summary_plots=False,
suppress_alpha_diversity_boxplots=False,
suppress_otu_category_significance=False,
command_handler=call_commands_serially,
status_update_callback=no_status_updates):
# Create our output directory and copy over the resources the personalized
# pages need (e.g. javascript, images, etc.).
create_dir(output_dir)
support_files_dir = join(output_dir, 'support_files')
if not exists(support_files_dir):
copytree(join(get_project_dir(), 'my_microbes', 'support_files'),
support_files_dir)
logger = WorkflowLogger(generate_log_fp(output_dir))
mapping_data, header, comments = parse_mapping_file(open(mapping_fp, 'U'))
try:
personal_id_index = header.index(personal_id_column)
except ValueError:
raise ValueError("Personal ID field '%s' is not a mapping file column "
"header." % personal_id_column)
try:
bodysite_index = header.index(category_to_split)
except ValueError:
raise ValueError("Category to split field '%s' is not a mapping file "
"column header." % category_to_split)
header = header[:-1] + [column_title] + [header[-1]]
# column that differentiates between body-sites within a single individual
# used for the creation of the vectors in make_3d_plots.py, this data is
# created by concatenating the two columns when writing the mapping file
site_id_category = '%s&&%s' % (personal_id_column, category_to_split)
header.insert(len(header)-1, site_id_category)
all_personal_ids = get_personal_ids(mapping_data, personal_id_index)
if personal_ids == None:
personal_ids = all_personal_ids
else:
for pid in personal_ids:
if pid not in all_personal_ids:
raise ValueError("'%s' is not a personal ID in the mapping "
"file column '%s'." %
(pid, personal_id_column))
if time_series_category not in header:
raise ValueError("Time series field '%s' is not a mapping file column "
"header." % time_series_category)
otu_table_title = splitext(basename(otu_table_fp))
output_directories = []
raw_data_files = []
raw_data_dirs = []
# Rarefy the OTU table and split by body site here (instead of on a
# per-individual basis) as we can use the same rarefied and split tables
# for each individual.
if not suppress_otu_category_significance:
rarefied_otu_table_fp = join(output_dir,
add_filename_suffix(otu_table_fp,
'_even%d' % rarefaction_depth))
if body_site_rarefied_otu_table_dir is None:
commands = []
cmd_title = 'Rarefying OTU table'
cmd = 'single_rarefaction.py -i %s -o %s -d %s' % (otu_table_fp,
rarefied_otu_table_fp, rarefaction_depth)
commands.append([(cmd_title, cmd)])
raw_data_files.append(rarefied_otu_table_fp)
per_body_site_dir = join(output_dir, 'per_body_site_otu_tables')
cmd_title = 'Splitting rarefied OTU table by body site'
cmd = 'split_otu_table.py -i %s -m %s -f %s -o %s' % (
rarefied_otu_table_fp, mapping_fp, category_to_split,
per_body_site_dir)
commands.append([(cmd_title, cmd)])
raw_data_dirs.append(per_body_site_dir)
#.........这里部分代码省略.........
开发者ID:biocore,项目名称:my-microbes,代码行数:101,代码来源:util.py
示例11: run_beta_diversity_through_plots
def run_beta_diversity_through_plots(otu_table_fp,
mapping_fp,
output_dir,
command_handler,
params,
qiime_config,
color_by_interesting_fields_only=True,
sampling_depth=None,
histogram_categories=None,
tree_fp=None,
parallel=False,
logger=None,
suppress_3d_plots=False,
suppress_2d_plots=False,
suppress_md5=False,
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Compute a beta diversity distance matrix;
2) Peform a principal coordinates analysis on the result of
Step 1;
3) Generate a 3D prefs file for optimized coloring of continuous
variables;
4) Generate a 3D plot for all mapping fields with colors
optimized for continuous data;
5) Generate a 3D plot for all mapping fields with colors
optimized for discrete data.
"""
# Prepare some variables for the later steps
otu_table_dir, otu_table_filename = split(otu_table_fp)
otu_table_basename, otu_table_ext = splitext(otu_table_filename)
create_dir(output_dir)
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
mapping_data, mapping_header, mapping_comments =\
parse_mapping_file(open(mapping_fp,'U'))
if histogram_categories:
invalid_categories = set(histogram_categories) - set(mapping_header)
if invalid_categories:
raise ValueError,\
"Invalid histogram categories - these must exactly match "+\
"mapping file column headers: %s" % (' '.join(invalid_categories))
# Get the interesting mapping fields to color by -- if none are
# interesting, take all of them. Interesting is defined as those
# which have greater than one value and fewer values than the number
# of samples
if color_by_interesting_fields_only:
mapping_fields =\
get_interesting_mapping_fields(mapping_data, mapping_header) or\
mapping_header
else:
mapping_fields = mapping_header
mapping_fields = ','.join(mapping_fields)
if sampling_depth:
# Sample the OTU table at even depth
even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
(output_dir, otu_table_basename,
sampling_depth, otu_table_ext)
single_rarefaction_cmd = \
'%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
(python_exe_fp, script_dir, otu_table_fp,
even_sampled_otu_table_fp, sampling_depth)
commands.append([
('Sample OTU table at %d seqs/sample' % sampling_depth,
single_rarefaction_cmd)])
otu_table_fp = even_sampled_otu_table_fp
otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
otu_table_basename, otu_table_ext = splitext(otu_table_filename)
try:
beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
except KeyError:
beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']
# Prep the 3d prefs file generator command
prefs_fp = '%s/prefs.txt' % output_dir
try:
params_str = get_params_str(params['make_prefs_file'])
except KeyError:
params_str = ''
if not 'mapping_headers_to_use' in params['make_prefs_file']:
params_str = '%s --mapping_headers_to_use %s' \
% (params_str,mapping_fields)
# Build the 3d prefs file generator command
prefs_cmd = \
'%s %s/make_prefs_file.py -m %s -o %s %s' %\
#.........这里部分代码省略.........
开发者ID:gxenomics,项目名称:qiime,代码行数:101,代码来源:downstream.py
示例12: run_pick_closed_reference_otus
def run_pick_closed_reference_otus(
input_fp,
refseqs_fp,
output_dir,
taxonomy_fp,
command_handler,
params,
qiime_config,
assign_taxonomy=False,
parallel=False,
logger=None,
suppress_md5=False,
status_update_callback=print_to_stdout):
""" Run the data preparation steps of Qiime
The steps performed by this function are:
1) Pick OTUs;
2) If assignment_taxonomy is True, choose representative sequence
for OTUs and assign taxonomy using a classifier.
3) Build an OTU table with optional predefined taxonomy
(if assign_taxonomy=False) or taxonomic assignments from step 2
(if assign_taxonomy=True).
"""
# confirm that a valid otu picking method was supplied before doing
# any work
reference_otu_picking_methods = ['blast', 'uclust_ref', 'usearch61_ref',
'usearch_ref', 'sortmerna']
try:
otu_picking_method = params['pick_otus']['otu_picking_method']
except KeyError:
otu_picking_method = 'uclust_ref'
assert otu_picking_method in reference_otu_picking_methods,\
"Invalid OTU picking method supplied: %s. Valid choices are: %s"\
% (otu_picking_method, ' '.join(reference_otu_picking_methods))
# Prepare some variables for the later steps
input_dir, input_filename = split(input_fp)
input_basename, input_ext = splitext(input_filename)
create_dir(output_dir)
commands = []
if logger is None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
if not suppress_md5:
log_input_md5s(logger, [input_fp, refseqs_fp, taxonomy_fp])
# Prep the OTU picking command
pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
otu_fp = '%s/%s_otus.txt' % (pick_otu_dir, input_basename)
if parallel and (otu_picking_method == 'blast' or
otu_picking_method == 'uclust_ref' or
otu_picking_method == 'usearch61_ref' or
otu_picking_meth
|
请发表评论