本文整理汇总了Python中qiime.util.get_qiime_scripts_dir函数的典型用法代码示例。如果您正苦于以下问题:Python get_qiime_scripts_dir函数的具体用法?Python get_qiime_scripts_dir怎么用?Python get_qiime_scripts_dir使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_qiime_scripts_dir函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: setup_workers
def setup_workers(num_cpus, outdir, server_socket, verbose=True,
error_profile=None):
"""Start workers waiting for data.
num_cpus: number of cores
outdir: directory were the workers will work in
server_socket: an open socket to the server
verbose: verbose flag passed to the workers
error_profile: filepath to the error profiles, passed to workers
"""
qiime_config = load_qiime_config()
DENOISE_WORKER = join(get_qiime_scripts_dir(), "denoiser_worker.py")
CLOUD_DISPATCH = join(get_qiime_scripts_dir(), "ec2Dispatch")
CLOUD_ENV = qiime_config['cloud_environment']
CLOUD = not CLOUD_ENV == "False"
workers = []
client_sockets = []
# somewhat unique id for cluster job
tmpname = "".join(sample(list(lowercase), 8))
host, port = server_socket.getsockname()
# TODO: this should be set to a defined wait time using alarm()
for i in range(num_cpus):
name = outdir + ("/%sworker%d" % (tmpname, i))
workers.append(name)
if CLOUD:
cmd = "%s %d %s %s -f %s -s %s -p %s" % (CLOUD_DISPATCH, i + 1, qiime_config['python_exe_fp'],
DENOISE_WORKER, name, host, port)
else:
cmd = "%s %s -f %s -s %s -p %s" % (qiime_config['python_exe_fp'],
DENOISE_WORKER, name, host, port)
if verbose:
cmd += " -v"
if error_profile:
cmd += " -e %s" % error_profile
submit_jobs([cmd], tmpname)
# wait until the client connects
# This might be a race condition -> make the client robust
client_socket, client_address = server_socket.accept()
client_sockets.append((client_socket, client_address))
return workers, client_sockets
开发者ID:cleme,项目名称:qiime,代码行数:52,代码来源:cluster_utils.py
示例2: test_main
def test_main(self):
"""Denoiser should always give same result on test data"""
expected = """>FS8APND01D3TW3 | cluster size: 94
CTCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC
"""
expected_map = """FS8APND01EWRS4:
FS8APND01DXG45:
FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN
FS8APND01BSTVP:
FS8APND01EFK0W:
FS8APND01DCIOO:
FS8APND01CKOMZ:
"""
command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(),
"--force", "-o", self.test_dir, "-i",
"%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME] );
result = Popen(command,shell=True,universal_newlines=True,\
stdout=PIPE,stderr=STDOUT).stdout.read()
self.result_dir = self.test_dir
observed = "".join(list(open(self.result_dir+ "centroids.fasta")))
self.assertEqual(observed, expected)
self.assertEqual(len(list(MinimalFastaParser(open(self.result_dir + "singletons.fasta")))), 6)
observed = "".join(list(open(self.result_dir+ "denoiser_mapping.txt")))
self.assertEqual(observed, expected_map)
开发者ID:cmhill,项目名称:qiime,代码行数:31,代码来源:test_denoiser.py
示例3: test_main_split_cluster
def test_main_split_cluster(self):
"""Denoiser on cluster in split mode should always give same result on test data"""
command = " ".join(
[
"%s/denoiser.py" % get_qiime_scripts_dir(),
"-S",
"--force",
"-c",
"-n 2",
"-i",
"%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME,
"-f",
"%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME,
"-o",
self.test_dir,
]
)
result = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT).stdout.read()
self.result_dir = self.test_dir
for subdir in ["0/", "1/"]:
observed = "".join(list(open(self.result_dir + subdir + "centroids.fasta")))
self.assertEqual(observed, expected_centroids[subdir])
observed = "".join(list(open(self.result_dir + subdir + "denoiser_mapping.txt")))
self.assertEqual(observed, expected_map_string_on_cluster[subdir])
开发者ID:davidsoergel,项目名称:qiime,代码行数:28,代码来源:test_denoiser.py
示例4: test_get_qiime_scripts_dir
def test_get_qiime_scripts_dir(self):
"""Test that we can find the directory containing QIIME scripts."""
# get_qiime_scripts_dir will raise an error if it can't find a scripts
# directory.
scripts_dir = get_qiime_scripts_dir()
self.assertTrue(isdir(scripts_dir), "The QIIME scripts directory does "
"not exist: %s" % scripts_dir)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:7,代码来源:print_qiime_config.py
示例5: run_make_otu_heatmap_html
def run_make_otu_heatmap_html(otu_table_fp,mapping_fp,output_dir, params,
qiime_config,
command_handler,tree_fp,
status_update_callback=print_to_stdout):
""" This function calls the make_otu_heatmap_html script """
# define upper-level values
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
commands = []
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
# get the user-defined parameters
try:
params_str = get_params_str(params['make_otu_heatmap_html'])
except KeyError:
params_str = ''
# Build the make_otu_heatmap_html command
heatmap_cmd = '%s %s/make_otu_heatmap_html.py -i %s -m %s -t %s -o %s %s' %\
(python_exe_fp, script_dir, otu_table_fp, mapping_fp,tree_fp, output_dir,
params_str)
commands.append([('OTU Heatmap' , heatmap_cmd)])
# Call the command handler on the list of commands
command_handler(commands, status_update_callback, logger)
return True
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:31,代码来源:handler_workflows.py
示例6: test_main_with_titanium_error
def test_main_with_titanium_error(self):
"""Denoiser with titanium error should always give same result on test data"""
command = " ".join(
[
"%s/denoiser.py" % get_qiime_scripts_dir(),
"--force",
"-o",
self.test_dir,
"-i",
"%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME,
"-f",
"%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME,
"-e",
"%s/qiime/support_files/denoiser/Data/Titanium_error_profile.dat" % PROJECT_HOME,
]
)
result = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT).stdout.read()
self.result_dir = self.test_dir
observed = "".join(list(open(self.result_dir + "centroids.fasta")))
self.assertEqual(observed, self.expected)
observed = "".join(list(open(self.result_dir + "denoiser_mapping.txt")))
self.assertEqual(observed, self.expected_titanium_map_string)
开发者ID:davidsoergel,项目名称:qiime,代码行数:26,代码来源:test_denoiser.py
示例7: summarize_otus
def summarize_otus(processed_dir):
"""
"""
per_library_stats_file = join(processed_dir, 'gg_97_otus/per_library_stats.txt')
# Generate the per_library_stats_file if it doesn't already exist
if not exists (per_library_stats_file):
qiime_config = load_qiime_config()
biom_file = join(processed_dir, 'gg_97_otus/exact_uclust_ref_otu_table.biom')
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
per_library_stats_script = join(script_dir, 'per_library_stats.py')
command = '{0} {1} -i {2}'.format(python_exe_fp, per_library_stats_script, biom_file)
# Run the script and produce the per_library_stats.txt
proc = Popen(command, shell = True, universal_newlines = True, stdout = PIPE, stderr = STDOUT)
return_value = proc.wait()
f = open(per_library_stats_file, 'w')
f.write(proc.stdout.read())
f.close()
# File exists, parse out details
start_lines = ['Seqs/sample detail:']
header_lines, otu_summary_dict = parse_log_file(per_library_stats_file, start_lines)
return header_lines, otu_summary_dict
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:25,代码来源:summarize_seqs_otu_hits.py
示例8: run_process_illumina_through_split_lib
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp,
mapping_fp, output_dir,
command_handler, params, qiime_config,
write_to_all_fasta=False,
status_update_callback=print_to_stdout):
""" NOTE: Parts of this function are a directly copied from the
run_qiime_data_preparation function from the workflow.py library file
in QIIME.
The steps performed by this function are:
1) De-multiplex sequences. (split_libraries_fastq.py)
"""
# Prepare some variables for the later steps
filenames=input_fp.split(',')
commands = []
create_dir(output_dir)
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
# copy the mapping file
copied_mapping=split(mapping_fp)[-1]
mapping_input_fp_copy=join(output_dir, copied_mapping)
copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy)
commands.append([('CopyMapping', copy_mapping_cmd)])
# sort the filenames
filenames.sort()
# determine which file is seq-file and which is barcode-file and associate
# to mapping file
if len(filenames) == 1:
try:
# Format of sample_id needs to be seqs_<sample_name>.<sequence_prep_id>.fastq
data_access = data_access_factory(ServerConfig.data_access_type)
sql = """
select s.sample_name || '.' || sp.sequence_prep_id
from sample s
inner join sequence_prep sp
on s.sample_id = sp.sample_id
where s.study_id = {0}
and sp.run_prefix = '{1}'
""".format(study_id, run_prefix[:-1])
sample_and_prep = data_access.dynamicMetadataSelect(sql).fetchone()[0]
input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep)
except Exception, e:
error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix)
error += 'SQL was: \n {0} \n'.format(sql)
error += 'Original exception was: \n {0}'.format(str(e))
raise Exception(error)
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:54,代码来源:run_process_sff_through_split_lib.py
示例9: test_main_low_mem
def test_main_low_mem(self):
"""Denoiser works using low_memory"""
command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(),
"-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME,
"-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME,
"-o", self.test_dir, "--low_memory"] )
result = Popen(command,shell=True,universal_newlines=True,\
stdout=PIPE,stderr=STDOUT).stdout.read()
self.result_dir = self.test_dir
observed = "".join(list(open(self.result_dir+ "centroids.fasta")))
self.assertEqual(observed, self.expected)
开发者ID:cmhill,项目名称:qiime,代码行数:14,代码来源:test_denoiser.py
示例10: __init__
def __init__(self,
python_exe_fp=qiime_config['python_exe_fp'],
cluster_jobs_fp=qiime_config['cluster_jobs_fp'],
jobs_to_start=int(qiime_config['jobs_to_start']),
poller_fp=join(get_qiime_scripts_dir(),'poller.py'),
retain_temp_files=False,
suppress_polling=False,
seconds_to_sleep=int(qiime_config['seconds_to_sleep'])):
""" """
self._python_exe_fp = python_exe_fp
self._cluster_jobs_fp = cluster_jobs_fp
self._jobs_to_start = jobs_to_start
self._poller_fp = poller_fp
self._retain_temp_files = retain_temp_files
self._suppress_polling = suppress_polling
self._seconds_to_sleep = seconds_to_sleep
开发者ID:DDomogala3,项目名称:qiime,代码行数:17,代码来源:util.py
示例11: get_html
def get_html(script_name,column_headers,help_img):
'''generate the html table rows for a given QIIME script'''
script_dir = get_qiime_scripts_dir()
dirname,fname=script_path_components(os.path.join(script_dir,script_name))
script_info_dict=get_script_info(dirname,fname)
fname,fname_ext=os.path.splitext(script_name)
#get all the required options
required_html=get_html_for_options(fname,script_info_dict,
'required_options',column_headers,
help_img)
#get all the optional options
optional_html=get_html_for_options(fname,script_info_dict,
'optional_options',column_headers,
help_img)
return required_html,optional_html
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:18,代码来源:html_from_script.py
示例12: setUp
def setUp(self):
""" """
self.headers=['head1','head2','head3']
self.test_option_file=make_option('-i', '--coord_fname',
help='Input principal coordinates filepath',
type='existing_path')
self.test_option_colorby=make_option('-b', '--colorby', dest='colorby',\
help='Comma-separated list categories metadata categories' +\
' (column headers) [default=color by all]')
self.test_option_custom_axes=make_option('-a', '--custom_axes',
help='This is the category from the metadata mapping file' +\
' [default: %default]')
self.test_option_choice=make_option('-k', '--background_color',
help='Background color to use in the plots.[default: %default]',
default='black',type='choice',choices=['black','white'])
self.test_option_float=make_option('--ellipsoid_opacity',
help='Used only when plotting ellipsoids for jackknifed' +\
' beta diversity (i.e. using a directory of coord files' +\
' [default=%default]',
default=0.33,type=float)
self.test_option_int=make_option('--n_taxa_keep',
help='Used only when generating BiPlots. This is the number '+\
' to display. Use -1 to display all. [default: %default]',
default=10,type=int)
self.test_option_true=make_option('--suppress_html_output',
dest='suppress_html_output',\
default=False,action='store_true',
help='Suppress HTML output. [default: %default]')
self.test_option_false=make_option('--suppress_html_output',
dest='suppress_html_output',\
default=True,action='store_false',
help='Suppress HTML output. [default: %default]')
self.option_labels={'coord_fname':'Principal coordinates filepath',
'colorby': 'Colorby category',
'background_color': 'Background color',
'ellipsoid_opacity':'Ellipsoid opacity',
'n_taxa_keep': '# of taxa to keep',
'custom_axes':'Custom Axis'}
self.script_dir = get_qiime_scripts_dir()
self.test_script_info=get_script_info(self.script_dir,
'make_qiime_rst_file')
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:44,代码来源:test_html_from_script.py
示例13: preprocess_on_cluster
def preprocess_on_cluster(sff_fps, log_fp, fasta_fp=None, out_fp="/tmp/",
squeeze=False, verbose=False,
primer=STANDARD_BACTERIAL_PRIMER):
"""Call preprocess via cluster_jobs_script on the cluster.
sff_fps: List of paths to flowgram files.
log_fp: path to log file
fasta_fp: Path to fasta file, formatted as from split_libraries.py.
This files is used to filter the flowgrams in sff_fps. Only reads in
fasta_fp are pulled from sff_fps.
out_fp: path to output directory
verbose: a binary verbose flag
squeeze: a flag that controls if sequences are squeezed before phase I.
Squeezing means consecutive identical nucs are collapsed to one.
primer: The primer sequences of the amplification process. This seq will be
removed from all reads during the preprocessing
"""
qiime_config = load_qiime_config()
python_bin = qiime_config['python_exe_fp']
cmd = "%s %s/denoiser_preprocess.py -i %s -l %s -o %s" %\
(python_bin, get_qiime_scripts_dir(),
",".join(sff_fps), log_fp, out_fp)
if (fasta_fp):
cmd += " -f %s" % fasta_fp
if(squeeze):
cmd += " -s"
if verbose:
cmd += " -v"
if primer:
cmd += " -p %s" % primer
submit_jobs([cmd], "pp_" + make_tmp_name(6))
wait_for_file(out_fp + "/prefix_mapping.txt", 10)
开发者ID:cleme,项目名称:qiime,代码行数:42,代码来源:preprocess.py
示例14: test_denoise_worker
def test_denoise_worker(self):
"""denoiser_worker.py is where it belongs and is callable."""
qiime_config = load_qiime_config()
PYTHON_BIN = qiime_config["python_exe_fp"]
DENOISE_WORKER = get_qiime_scripts_dir() + "/denoiser_worker.py"
self.assertTrue(exists(DENOISE_WORKER), "DENOISER_WORKER is not where it's supposed to be: %s" % DENOISE_WORKER)
# test if its callable and actually works
command = "%s %s -h" % (PYTHON_BIN, DENOISE_WORKER)
proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT)
if proc.wait() != 0:
self.fail("Calling %s failed. Check permissions and that it is in fact an executable." % DENOISE_WORKER)
result = proc.stdout.read()
# check that the help string looks correct
self.assertTrue(result.startswith("Usage"))
开发者ID:ranjit58,项目名称:qiime,代码行数:20,代码来源:test_settings.py
示例15: add_taxa_to_biom
def add_taxa_to_biom(input_dir,study):
""" Add the retrained taxonomy assignments to the biom-table """
# get the study directory
study_input_dir=join(input_dir,'study_%s' % (str(study)))
# get a list of all processed folders
processed_folders=listdir(study_input_dir)
for processed_folder in processed_folders:
# make sure it is processed folder produced by DB
if processed_folder.startswith('processed'):
# get the biom file for this particular run
gg_biom_fp=join(study_input_dir, processed_folder, \
'gg_97_otus', 'exact_uclust_ref_otu_table.biom')
# make sure the path exists and if not create the biom-table
# only applicable to studies processed prior to biom-format
if not exists(gg_biom_fp):
# get the classic OTU table path
gg_otu_table_fp=join(study_input_dir, processed_folder, \
'gg_97_otus', \
'exact_uclust_ref_otu_table.txt')
# make sure path exists and convert classic to biom-table
if exists(gg_otu_table_fp):
system("python %s/software/biom-format/scripts/convert_biom.py -i %s -o %s --biom_table_type='otu table'" % \
(environ['HOME'],gg_otu_table_fp,gg_biom_fp))
try:
# add the taxa to the biom-table
# if it exists already, this will fail out
system("python %s/add_taxa.py -i %s -t %s/phpr_taxonomy.txt -o %s" % \
(get_qiime_scripts_dir(), gg_biom_fp, environ['HOME'], \
gg_biom_fp))
except:
pass
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:36,代码来源:collate_per_study_seqs_lib.py
示例16: run_process_sff_through_split_lib
def run_process_sff_through_split_lib(study_id,run_prefix,sff_input_fp,
mapping_fp, output_dir,
command_handler, params, qiime_config,
convert_to_flx=False, write_to_all_fasta=False,
status_update_callback=print_to_stdout):
""" NOTE: Parts of this function are a directly copied from the
run_qiime_data_preparation function from the workflow.py library file
in QIIME.
The steps performed by this function are:
1) Process SFFs to generate .fna, .qual and flowgram file.
(process_sff.py)
2) De-multiplex sequences. (split_libraries.py)
"""
# Prepare some variables for the later steps
sff_filenames=sff_input_fp.split(',')
commands = []
create_dir(output_dir)
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
# generate a log file
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
make_flowgram=True
split_lib_fasta_input_files=[]
split_lib_qual_input_files=[]
denoise_flow_input_files=[]
# make a copy of the mapping file
copied_mapping=split(mapping_fp)[-1]
mapping_input_fp_copy=join(output_dir, copied_mapping)
copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy)
commands.append([('CopyMapping', copy_mapping_cmd)])
# iterate over SFFs and match to the mapping file
for sff_input_fp in sff_filenames:
# GENERATE THE MD5 HERE AND STORE IN THE DATABASE AFTER FILE
# SUCCESSFULLY PROCESSED
# Copy the SFF into the processed files directory
copied_sff=split(sff_input_fp)[-1]
sff_input_fp_copy=join(output_dir, copied_sff)
#Generate filenames for split_libraries
input_dir, input_filename = split(sff_input_fp)
if is_gzip(sff_input_fp) and sff_input_fp.endswith('.gz'):
input_basename, input_ext = splitext(splitext(input_filename)[0])
else:
input_basename, input_ext = splitext(input_filename)
# Convert sff file into fasta, qual and flowgram file
if convert_to_flx:
if study_id in ['496','968','969','1069','1002','1066','1194','1195','1457','1458','1460','1536','1918','1962']:
### this function is for handling files where the barcode and
### linkerprimer are all lowercase (i.e. HMP data or SRA data)
# write process_sff command
process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t --no_trim --use_sfftools' %\
(python_exe_fp, script_dir, sff_input_fp,
output_dir)
#process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' % (python_exe_fp, script_dir, sff_input_fp, output_dir)
commands.append([('ProcessSFFs', process_sff_cmd)])
# define output fasta from process_sff
no_trim_fasta_fp=join(output_dir,input_basename + '_FLX.fna')
# define pprospector scripts dir
pprospector_scripts_dir=join(ServerConfig.home,'software',
'pprospector','scripts')
# clean fasta - basically converting lowercase to uppercase
clean_fasta_cmd = '%s %s/clean_fasta.py -f %s -o %s' %\
(python_exe_fp, pprospector_scripts_dir,
no_trim_fasta_fp,output_dir)
commands.append([('CleanFasta', clean_fasta_cmd)])
# move the cleaned file to be consistent with other processes
cleaned_fasta_fp=join(output_dir,input_basename + \
'_FLX_filtered.fasta')
moved_fasta_fp=join(output_dir,input_basename + '_FLX.fna')
mv_cmd='mv %s %s' % (cleaned_fasta_fp,moved_fasta_fp)
commands.append([('RenameFasta',mv_cmd)])
# update the split-lib files to use the cleaned file
split_lib_fasta_input_files.append(moved_fasta_fp)
split_lib_qual_input_files.append(join(output_dir,
input_basename + '_FLX.qual'))
denoise_flow_input_files.append(join(output_dir,
input_basename + '_FLX.txt'))
else:
# write process_sff command
process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' %\
#.........这里部分代码省略.........
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:101,代码来源:run_process_sff_through_split_lib.py
示例17: run_process_fasta_through_split_lib
def run_process_fasta_through_split_lib(study_id,run_prefix,input_fp,
mapping_fp, output_dir,
command_handler, params, qiime_config,
write_to_all_fasta=False,
status_update_callback=print_to_stdout):
""" NOTE: Parts of this function are a directly copied from the
run_qiime_data_preparation function from the workflow.py library file
in QIIME.
The steps performed by this function are:
1) Update sequence names using DB accessions
"""
# Prepare some variables for the later steps
filenames=input_fp.split(',')
commands = []
create_dir(output_dir)
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
# copy the mapping file
copied_mapping=split(mapping_fp)[-1]
mapping_input_fp_copy=join(output_dir, copied_mapping)
copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy)
commands.append([('CopyMapping', copy_mapping_cmd)])
# sort filenames
filenames.sort()
# create split_libraries directory
split_library_output=join(output_dir,'split_libraries')
create_dir(split_library_output)
# Call the command handler on the list of commands
command_handler(commands,status_update_callback,logger=logger)
# define output filepath
output_fp=join(split_library_output,'seqs.fna')
# re-write the sequence file
outf=open(output_fp,'w')
# get sample-info from mapping file
map_data,map_header,map_comments=parse_mapping_file(\
open(mapping_input_fp_copy,'U'))
# create dictionary of original sample_ids to new sample_ids
sample_ids_from_mapping=zip(*map_data)[0]
sample_id_dict={}
for sample in sample_ids_from_mapping:
sample_id_dict['.'.join(sample.split('.')[:-1])]=sample
# NEED to be able to just pass fasta file, since mapping sample_ids
# will not match input fasta file ever
'''
fasta_check=run_fasta_checks(input_fp,mapping_input_fp_copy)
if float(fasta_check['invalid_labels']) > 0:
raise ValueError, "There are invalid sequence names in the Original sequence file"
#elif float(fasta_check['barcodes_detected']) > 0:
# raise ValueError, "There are barcode sequences found in the Original sequence file"
elif float(fasta_check['duplicate_labels']) > 0:
raise ValueError, "There are duplicate sequence names in the Original sequence file"
elif float(fasta_check['invalid_seq_chars']) > 0:
raise ValueError, "There are invalid nucleotides in the sequence Original file (i.e. not A,C,G,T or N)"
#elif float(fasta_check['linkerprimers_detected']) > 0:
# raise ValueError, "There are linker primer sequences in the Original sequence file"
#elif float(fasta_check['nosample_ids_map']) > 0.20:
# raise ValueError, "More than 20% of the samples in the mapping file do not have sequences"
'''
# parse the sequences
sequences=MinimalFastaParser(open(input_fp),'U')
# update fasta file with new DB SampleIDs and create new split-lib seqs
# file
num=1
for seq_name,seq in sequences:
seq_name_arr=seq_name.split()
updated_seq_name =sample_id_dict['_'.join(seq_name_arr[0].split('_')[:-1])] + \
'_' + str(num) + ' ' + ' '.join(seq_name_arr[1:])
num=num+1
outf.write('>%s\n%s\n' % (updated_seq_name,seq))
outf.close()
# Return the fasta file paths
return filenames
开发者ID:ElDeveloper,项目名称:qiime_web_app,代码行数:93,代码来源:run_process_sff_through_split_lib.py
示例18: run_ampliconnoise
def run_ampliconnoise(mapping_fp,
output_dir, command_handler, params, qiime_config,
logger=None, status_update_callback=print_to_stdout,
chimera_alpha=-3.8228,chimera_beta=0.6200, sff_txt_fp=None, numnodes=2,
suppress_perseus=True, output_filepath=None, platform='flx',
seqnoise_resolution=None, truncate_len=None):
""" Run the ampliconnoise pipeline
The steps performed by this function are:
1. Split input sff.txt file into one file per sample
2. Run scripts required for PyroNoise
3. Run scripts required for SeqNoise
4. Run scripts requred for Perseus (chimera removal)
5. Merge output files into one file similar to the output of split_libraries.py
output_filepath should be absolute
seqnoise_resolution should be string
environment variable PYRO_LOOKUP_FILE must be set correctly. Thus be
careful passing command handlers that don't spawn child processes, as they
may not inherit the correct environment variable setting
"""
map_data,headers,comments = parse_mapping_file(open(mapping_fp,'U'))
create_dir(output_dir)
if seqnoise_resolution == None:
if platform=='flx': seqnoise_resolution = '30.0'
elif platform=='titanium': seqnoise_resolution = '25.0'
else: raise RuntimeError('seqnoise_resolution not set, and no'+\
' default for platform '+platform)
if truncate_len == None:
if platform=='flx': truncate_len = '220'
elif platform=='titanium': truncate_len = '400'
else: raise RuntimeError('truncate_len not set, and no'+\
' default for platform '+platform)
sample_names = [] # these are filenames minus extension, and are sample IDs
primer_seqs = [] # same order as sample_names
bc_seqs = [] # same order as sample_names
for i in range(len(map_data)):
sample_names.append(map_data[i][headers.index('SampleID')])
bc_seqs.append(map_data[i][headers.index('BarcodeSequence')])
# don't know why don't just take off the primer now.
# but that's done later
# primer += (map_data[i][headers.index('LinkerPrimerSequence')])
# for char, bases in IUPAC_DNA_ambiguities.items():
# primer = primer.replace(char,'['+''.join(bases)+']')
primer = (map_data[i][headers.index('LinkerPrimerSequence')])
for char, bases in IUPAC_DNA_ambiguities.items():
primer = primer.replace(char,'['+''.join(bases)+']')
primer_seqs.append(primer)
if len(set(primer_seqs)) != 1:
raise RuntimeError(
'Error: only one primer per mapping file supported.')
one_primer = primer_seqs[0]
commands = []
python_exe_fp = qiime_config['python_exe_fp']
script_dir = get_qiime_scripts_dir()
if logger == None:
logger = WorkflowLogger(generate_log_fp(output_dir),
params=params,
qiime_config=qiime_config)
close_logger_on_success = True
else:
close_logger_on_success = False
log_input_md5s(logger,[mapping_fp,sff_txt_fp])
# execute commands in output_dir
called_dir = os.getcwd()
os.chdir(output_dir)
fh = open(os.path.join(output_dir,'map.csv'),'w')
for i in range(len(sample_names)):
fh.write(sample_names[i]+','+bc_seqs[i]+'\n')
fh.close()
# these are the fasta results, e.g. PC.636_Good.fa
# later we merge them and copy to output file
post_pyro_tail = '_'+truncate_len
if suppress_perseus == True:
fasta_result_names = [sample_name + post_pyro_tail+'_seqnoise_cd.fa'
for sample_name in sample_names]
else:
fasta_result_names = [sample_name + '_Good.fa' \
for sample_name in sample_names]
cmd = 'cd '+output_dir # see also os.chdir above
commands.append([('change to output dir', cmd)])
cmd = 'echo $PYRO_LOOKUP_FILE > pyro_lookup_filepath.txt'
commands.append([('confirm pyro lookup filepath environment variable',
cmd)])
#.........这里部分代码省略.........
开发者ID:gxenomics,项目名称:qiime,代码行数:101,代码来源:ampliconnoise.py
示例19: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.suppress_unit_tests and opts.suppress_script_tests and opts.suppress_script_usage_tests:
option_parser.error("You're suppressing all three test types. Nothing to run.")
test_dir = abspath(dirname(__file__))
unittest_good_pattern = re.compile("OK\s*$")
application_not_found_pattern = re.compile("ApplicationNotFoundError")
python_name = "python"
bad_tests = []
missing_application_tests = []
# Run through all of QIIME's unit tests, and keep track of any files which
# fail unit tests.
if not opts.suppress_unit_tests:
unittest_names = []
if not opts.unittest_glob:
for root, dirs, files in walk(test_dir):
for name in files:
if name.startswith("test_") and name.endswith(".py"):
unittest_names.append(join(root, name))
else:
for fp in glob(opts.unittest_glob):
fn = split(fp)[1]
if fn.startswith("test_") and fn.endswith(".py"):
unittest_names.append(abspath(fp))
unittest_names.sort()
for unittest_name in unittest_names:
print "Testing %s:\n" % unittest_name
command = "%s %s -v" % (python_name, unittest_name)
stdout, stderr, return_value = qiime_system_call(command)
print stderr
if not unittest_good_pattern.search(stderr):
if application_not_found_pattern.search(stderr):
missing_application_tests.append(unittest_name)
else:
bad_tests.append(unittest_name)
bad_scripts = []
if not opts.suppress_script_tests:
# Run through all of QIIME's scripts, and pass -h to each one. If the
# resulting stdout does not being with the Usage text, that is an
# indicator of something being wrong with the script. Issues that would
# cause that are bad import statements in the script, SyntaxErrors, or
# other failures prior to running qiime.util.parse_command_line_parameters.
try:
scripts_dir = get_qiime_scripts_dir()
script_directory_found = True
except AssertionError:
script_directory_found = False
if script_directory_found:
script_names = []
script_names = glob("%s/*py" % scripts_dir)
script_names.sort()
for script_name in script_names:
script_good_pattern = r
|
请发表评论