本文整理汇总了Python中qiime.denoiser.utils.check_flowgram_ali_exe函数的典型用法代码示例。如果您正苦于以下问题:Python check_flowgram_ali_exe函数的具体用法?Python check_flowgram_ali_exe怎么用?Python check_flowgram_ali_exe使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_flowgram_ali_exe函数的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_denoiser_supported_version
def test_denoiser_supported_version(self):
"""denoiser aligner is ready to use """
pass_test = True
try:
check_flowgram_ali_exe()
except (ApplicationNotFoundError, ApplicationError):
pass_test = False
self.assertTrue(pass_test, "Denoiser flowgram aligner not found or not executable."+\
"This may or may not be a problem depending on "+\
"which components of QIIME you plan to use.")
开发者ID:cmhill,项目名称:qiime,代码行数:12,代码来源:print_qiime_config.py
示例2: get_flowgram_distances
def get_flowgram_distances(id, flowgram, flowgrams, fc, ids, outdir,
error_profile=DENOISER_DATA_DIR +
'FLX_error_profile.dat'):
"""Computes distance scores of flowgram to all flowgrams in parser.
id: The flowgram identifier, also used to name intermediate files
flowgram: This flowgram is used to filter all the other flowgrams
flowgrams: iterable filehandle of flowgram file
fc: a sink for flowgrams, either a FlowgramContainerArray or
FlowgramContainerFile object
ids: dict of ids of flowgrams in flowgrams that should be aligned
outdir: directory for intermediate files
error_profile: path to error profile *.dat file
"""
check_flowgram_ali_exe()
# File that serves as input for external alignment program
(fh, tmpfile) = init_flowgram_file(prefix=outdir)
append_to_flowgram_file(id, flowgram, fh)
k = 0
names = []
for f in flowgrams:
if(f.Name in ids):
fc.add(f)
append_to_flowgram_file(f.Name, f, fh, trim=False)
k += 1
names.append(f.Name)
fh.close()
# TODO: capture stderr and warn user
scores_fh = popen("%s -relscore_pairid %s %s " %
(get_flowgram_ali_exe(),
error_profile, tmpfile), 'r')
scores = [map(float, (s.split())) for s in scores_fh if s != "\n"]
if (k != len(scores)):
raise RuntimeError("Something bad has happened! I received less " +
"alignment scores than there are flowgrams. Most likely this " +
"means that the alignment program is not setup or corrupted. " +
"Please run the test scripts to figure out the cause of the error.")
remove(tmpfile)
return (scores, names, fc)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:50,代码来源:flowgram_clustering.py
示例3: setUp
def setUp(self):
# abort all tests without the alignment binary
check_flowgram_ali_exe()
signal.signal(signal.SIGALRM, timeout)
# set the 'alarm' to go off in allowed_seconds seconds
signal.alarm(allowed_seconds_per_test)
self.test_dir = "denoiser_main_test" + make_tmp_name() + "/"
self.expected = ">FS8APND01D3TW3 | cluster size: 94 \nCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC\n"
self.expected_map_string = """FS8APND01EWRS4:
FS8APND01BSTVP:
FS8APND01DXG45:
FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN
FS8APND01EFK0W:
FS8APND01DCIOO:
FS8APND01CKOMZ:
"""
self.expected_titanium_map_string = """FS8APND01EWRS4:
开发者ID:cleme,项目名称:qiime,代码行数:19,代码来源:test_denoiser.py
示例4: denoise_per_sample
def denoise_per_sample(sff_fps, fasta_fp, tmpoutdir, cluster=False,
num_cpus=1, squeeze=True, percent_id=0.97, bail=1,
primer="", low_cutoff=3.75, high_cutoff=4.5,
log_fp="denoiser.log", low_memory=False, verbose=False,
error_profile=DENOISER_DATA_DIR +
'FLX_error_profile.dat',
max_num_rounds=None, titanium=False):
"""Denoise each sample separately"""
# abort early if binary is missing
check_flowgram_ali_exe()
log_fh = None
if log_fp:
# switch of buffering for global log file
log_fh = open(tmpoutdir + "/" + log_fp, "w", 0)
# overwrite settings if titanium is set
# This flag is only used from qiime. Remove after qiime integration
if titanium:
error_profile = DENOISER_DATA_DIR + "Titanium_error_profile.dat"
low_cutoff = 4
high_cutoff = 5
if verbose:
log_fh.write("Denoiser version: %s\n" % __version__)
log_fh.write("SFF files: %s\n" % ', '.join(sff_fps))
log_fh.write("Fasta file: %s\n" % fasta_fp)
log_fh.write("Cluster: %s\n" % cluster)
log_fh.write("Num CPUs: %d\n" % num_cpus)
log_fh.write("Squeeze Seqs: %s\n" % squeeze)
log_fh.write("tmpdir: %s\n\n" % tmpoutdir)
log_fh.write("percent_id threshold: %.2f\n" % percent_id)
log_fh.write("Minimal sequence coverage for first phase: %d\n" % bail)
log_fh.write("Error profile: %s\n" % error_profile)
log_fh.write("Maximal number of iteration: %s\n\n" % max_num_rounds)
# here we go ...
sff_files = split_sff(map(open, sff_fps), open(fasta_fp), tmpoutdir)
combined_mapping = {}
result_centroids = []
result_singletons_files = []
# denoise each sample separately
for i, sff_file in enumerate(sff_files):
if not exists(tmpoutdir + ("/%d" % i)):
makedirs(tmpoutdir + ("/%d" % i))
out_fp = tmpoutdir + ("/%d/" % i)
denoise_seqs([sff_file], fasta_fp, out_fp, None, cluster,
num_cpus, squeeze, percent_id, bail, primer,
low_cutoff, high_cutoff, log_fp, low_memory,
verbose, error_profile, max_num_rounds)
# collect partial results
this_rounds_mapping = read_denoiser_mapping(
open(out_fp + "/denoiser_mapping.txt"))
combined_mapping.update(this_rounds_mapping)
result_centroids.append(
parse_fasta(open(out_fp + "/centroids.fasta")))
result_singletons_files.append(out_fp + "/singletons.fasta")
# write the combined files
store_mapping(combined_mapping, tmpoutdir, "denoiser")
seqs = chain(*result_centroids)
fasta_fh = open(tmpoutdir + "/denoised.fasta", "w")
# write centroids sorted by clustersize
write_Fasta_from_name_seq_pairs(
sort_seqs_by_clustersize(seqs, combined_mapping),
fasta_fh)
for singleton_file in result_singletons_files:
write_Fasta_from_name_seq_pairs(
parse_fasta(open(singleton_file, "r")),
fasta_fh)
fasta_fh.close()
# return outdir for tests/test_denoiser
return tmpoutdir
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:76,代码来源:flowgram_clustering.py
示例5: denoise_seqs
def denoise_seqs(
sff_fps, fasta_fp, tmpoutdir, preprocess_fp=None, cluster=False,
num_cpus=1, squeeze=True, percent_id=0.97, bail=1, primer="",
low_cutoff=3.75, high_cutoff=4.5, log_fp="denoiser.log",
low_memory=False, verbose=False,
error_profile=DENOISER_DATA_DIR + 'FLX_error_profile.dat',
max_num_rounds=None, titanium=False, checkpoint_fp=None):
"""The main routine to denoise flowgrams"""
# abort if binary is missing
check_flowgram_ali_exe()
if verbose:
# switch of buffering for log file
log_fh = open(tmpoutdir + "/" + log_fp, "w", 0)
else:
log_fh = None
# overwrite settings if titanium is set
# This flag is only used from qiime. Remove after qiime integration
if titanium:
error_profile = DENOISER_DATA_DIR + "Titanium_error_profile.dat"
low_cutoff = 4
high_cutoff = 5
if verbose:
log_fh.write("Denoiser version: %s\n" % __version__)
log_fh.write("SFF files: %s\n" % ', '.join(sff_fps))
log_fh.write("Fasta file: %s\n" % fasta_fp)
log_fh.write("Preprocess dir: %s\n" % preprocess_fp)
if checkpoint_fp:
log_fh.write("Resuming denoiser from %s\n" % checkpoint_fp)
log_fh.write("Primer sequence: %s\n" % primer)
log_fh.write("Running on cluster: %s\n" % cluster)
log_fh.write("Num CPUs: %d\n" % num_cpus)
log_fh.write("Squeeze Seqs: %s\n" % squeeze)
log_fh.write("tmpdir: %s\n" % tmpoutdir)
log_fh.write("percent_id threshold: %.2f\n" % percent_id)
log_fh.write("Minimal sequence coverage for first phase: %d\n" % bail)
log_fh.write("Low cut-off: %.2f\n" % low_cutoff)
log_fh.write("High cut-off: %.2f\n" % high_cutoff)
log_fh.write("Error profile: %s\n" % error_profile)
log_fh.write("Maximal number of iteration: %s\n\n" % max_num_rounds)
# here we go ...
# Phase I - clean up and truncate input sff
if(checkpoint_fp):
if (preprocess_fp):
# skip preprocessing as we should have data
# we already have preprocessed data, so use it
(deprefixed_sff_fp, l, mapping,
seqs) = read_preprocessed_data(preprocess_fp)
else:
raise ApplicationError(
"Resuming from checkpoint requires --preprocess option")
else:
if(preprocess_fp):
# we already have preprocessed data, so use it
(deprefixed_sff_fp, l, mapping,
seqs) = read_preprocessed_data(preprocess_fp)
elif(cluster):
preprocess_on_cluster(sff_fps, log_fp, fasta_fp=fasta_fp,
out_fp=tmpoutdir, verbose=verbose,
squeeze=squeeze, primer=primer)
(deprefixed_sff_fp, l, mapping,
seqs) = read_preprocessed_data(tmpoutdir)
else:
(deprefixed_sff_fp, l, mapping, seqs) = \
preprocess(
sff_fps, log_fh, fasta_fp=fasta_fp, out_fp=tmpoutdir,
verbose=verbose, squeeze=squeeze, primer=primer)
# preprocessor writes into same file, so better jump to end of file
if verbose:
log_fh.close()
log_fh = open(tmpoutdir + "/" + log_fp, "a", 0)
# phase II:
# use prefix map based clustering as initial centroids and greedily
# add flowgrams to clusters with a low threshold
(new_sff_file, bestscores, mapping) = \
greedy_clustering(deprefixed_sff_fp, seqs, mapping, tmpoutdir, l,
log_fh, num_cpus=num_cpus, on_cluster=cluster,
bail_out=bail, pair_id_thresh=percent_id,
threshold=low_cutoff, verbose=verbose,
fast_method=not low_memory,
error_profile=error_profile,
max_num_rounds=max_num_rounds,
checkpoint_fp=checkpoint_fp)
# phase III phase:
# Assign seqs to nearest existing centroid with high threshold
secondary_clustering(new_sff_file, mapping, bestscores, log_fh,
verbose=verbose, threshold=high_cutoff)
remove(new_sff_file)
if (verbose):
log_fh.write("Finished clustering\n")
log_fh.write("Writing Clusters\n")
#.........这里部分代码省略.........
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:101,代码来源:flowgram_clustering.py
示例6: get_flowgram_distances_on_cluster
def get_flowgram_distances_on_cluster(
id, flowgram, flowgrams, fc, ids, num_cores,
num_flows, spread, client_sockets=[]):
"""Computes distance scores of flowgram to all flowgrams in parser.
id: The flowgram identifier, also used to name intermediate files
flowgram: This flowgram is used to filter all the other flowgrams
flowgrams: iterable filehandle of flowgram file
fc: a sink of flowgrams, which serves as source in the next round
ids: list of flowgram ids that should be used from flowgrams
num_cores: number of cpus
num_flows: Number of flows in parser
client_sockets: A list of open sockets for client-server communication
spread: historical distribution of processing runtimes
"""
epoch = time()
check_flowgram_ali_exe()
qiime_config = load_qiime_config()
min_per_core = int(qiime_config['denoiser_min_per_core'])
# if using from future import division this has to be checked,
# as we want true integer division here
per_core = max(min_per_core, (num_flows / num_cores) + 1)
names = []
scores = []
# Need to call this here, since we iterate over the same iterator repeatedly.
# Otherwise the call in ifilter will reset the iterator by implicitely calling __iter__.
# test if iter does the same
flowgrams_iter = flowgrams.__iter__()
# prepare input files and commands
# synchronous client-server communication
workload = compute_workload(num_cores, num_flows, spread)
debug_count = 0
for i in range(num_cores):
socket = client_sockets[i]
# send master flowgram to file first
send_flowgram_to_socket(id, flowgram, socket)
if(workload[i] < 1):
# no data left for this poor guy
save_send(socket, "--END--")
continue
else:
# Then add all others which are still valid, i.e. in ids
for (k, f) in (izip(range(workload[i]),
ifilter(lambda f: f.Name in ids, flowgrams_iter))):
fc.add(f)
send_flowgram_to_socket(k, f, socket, trim=False)
names.append(f.Name)
debug_count += 1
# send the termination signal
save_send(socket, "--END--")
# asynchronous client-server communication
# ClientHandlers write data in results
results = [None] * num_cores
timing = [0.0 for x in xrange(num_cores)]
for i in range(num_cores):
socket = client_sockets[i]
ClientHandler(socket, i, results, timing)
loop()
# end asynchronous loop
spread = adjust_processing_time(num_cores, workload, timing, epoch)
# flatten list
scores = [item for list in results for item in list]
if (debug_count != len(scores)):
raise RuntimeError("Something bad has happened! I received less " +
"alignment scores %d than there are flowgrams %d. Most likely this "
% (len(scores), debug_count) +
"means that the alignment program is not setup correctly or corrupted. " +
"Please run the test scripts to figure out the cause of the error.")
return (scores, names, fc)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:90,代码来源:flowgram_clustering.py
注:本文中的qiime.denoiser.utils.check_flowgram_ali_exe函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论