本文整理汇总了Python中quast_libs.qutils.label_from_fpath函数的典型用法代码示例。如果您正苦于以下问题:Python label_from_fpath函数的具体用法?Python label_from_fpath怎么用?Python label_from_fpath使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了label_from_fpath函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: save_features_in_contigs
def save_features_in_contigs(output_dirpath, contigs_fpaths, feature_name, features_in_contigs, ref_features_num):
return save(join(output_dirpath, feature_name + in_contigs_suffix_fn), {
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths],
feature_name + '_in_contigs': dict((qutils.label_from_fpath(contigs_fpath), feature_amounts)
for (contigs_fpath, feature_amounts) in features_in_contigs.items()),
'ref_' + feature_name + '_number': ref_features_num,
})
开发者ID:student-t,项目名称:quast,代码行数:7,代码来源:json_saver.py
示例2: draw_coverage_histograms
def draw_coverage_histograms(coverage_dict, contigs_fpaths, output_dirpath):
total_len = dict()
contigs_dict = dict()
contigs_with_coverage = [contigs_fpath for contigs_fpath in contigs_fpaths if coverage_dict[contigs_fpath]]
for contigs_fpath in contigs_fpaths:
total_len[contigs_fpath] = reporting.get(contigs_fpath).get_field(reporting.Fields.TOTALLEN)
contigs_dict[contigs_fpath] = reporting.get(contigs_fpath).get_field(reporting.Fields.CONTIGS)
cov_values = [coverage_dict[contigs_fpath] for contigs_fpath in contigs_with_coverage]
num_contigs = [contigs_dict[contigs_fpath] for contigs_fpath in contigs_with_coverage]
common_coverage_values, bin_size, low_threshold, high_threshold, max_cov = binning_coverage(cov_values, num_contigs)
histogram_title = 'Coverage histogram (bin size: ' + str(bin_size) + 'x)'
plotter.coverage_histogram(contigs_with_coverage, common_coverage_values, output_dirpath + '/coverage_histogram',
histogram_title, bin_size=bin_size, max_cov=max_cov, low_threshold=low_threshold, high_threshold=high_threshold)
for contigs_fpath in contigs_with_coverage:
coverage_values, bin_size, low_threshold, high_threshold, max_cov = binning_coverage([coverage_dict[contigs_fpath]],
[contigs_dict[contigs_fpath]])
label = qutils.label_from_fpath(contigs_fpath)
corr_label = qutils.label_from_fpath_for_fname(contigs_fpath)
histogram_title = label + ' coverage histogram (bin size: ' + str(bin_size) + 'x)'
histogram_fpath = os.path.join(output_dirpath, corr_label + '_coverage_histogram')
plotter.coverage_histogram([contigs_fpath], coverage_values, histogram_fpath,
histogram_title, draw_bars=True, bin_size=bin_size, max_cov=max_cov,
low_threshold=low_threshold, high_threshold=high_threshold)
开发者ID:student-t,项目名称:quast,代码行数:25,代码来源:basic_stats.py
示例3: save_contigs_lengths
def save_contigs_lengths(output_dirpath, contigs_fpaths, lists_of_lengths):
lists_of_lengths = [sorted(list, reverse=True) for list in lists_of_lengths]
return save(join(output_dirpath, contigs_lengths_fn), {
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths],
'lists_of_lengths': lists_of_lengths
})
开发者ID:student-t,项目名称:quast,代码行数:7,代码来源:json_saver.py
示例4: save_coord
def save_coord(output_dirpath, coord_x, coord_y, name_coord, contigs_fpaths):
coord_fn = name_coord + suffix_fn
return save(join(output_dirpath, coord_fn), {
'coord_x': coord_x,
'coord_y': coord_y,
'filenames': [qutils.label_from_fpath(label) for label in contigs_fpaths]
})
开发者ID:student-t,项目名称:quast,代码行数:7,代码来源:json_saver.py
示例5: get_assemblies_data
def get_assemblies_data(contigs_fpaths, icarus_dirpath, stdout_pattern, nx_marks):
assemblies_n50 = defaultdict(dict)
assemblies_data = ''
assemblies_data += 'var assemblies_links = {};\n'
assemblies_data += 'var assemblies_len = {};\n'
assemblies_data += 'var assemblies_contigs = {};\n'
assemblies_data += 'var assemblies_misassemblies = {};\n'
assemblies_data += 'var assemblies_n50 = {};\n'
assemblies_contig_size_data = ''
for contigs_fpath in contigs_fpaths:
assembly_label = qutils.label_from_fpath(contigs_fpath)
report = reporting.get(contigs_fpath)
l = report.get_field(reporting.Fields.TOTALLEN)
contigs = report.get_field(reporting.Fields.CONTIGS)
n50 = report.get_field(reporting.Fields.N50)
if stdout_pattern:
contig_stdout_fpath = stdout_pattern % qutils.label_from_fpath_for_fname(contigs_fpath) + '.stdout'
contig_stdout_fpath = qutils.relpath(contig_stdout_fpath, icarus_dirpath)
assemblies_data += 'assemblies_links["' + assembly_label + '"] = "' + contig_stdout_fpath + '";\n'
assemblies_contig_size_data += 'assemblies_len["' + assembly_label + '"] = ' + str(l) + ';\n'
assemblies_contig_size_data += 'assemblies_contigs["' + assembly_label + '"] = ' + str(contigs) + ';\n'
assemblies_contig_size_data += 'assemblies_n50["' + assembly_label + '"] = "' + str(n50) + '";\n'
for nx in nx_marks:
assemblies_n50[assembly_label][nx] = report.get_field(nx)
return assemblies_data, assemblies_contig_size_data, assemblies_n50
开发者ID:student-t,项目名称:quast,代码行数:25,代码来源:icarus_builder.py
示例6: get_color_and_ls
def get_color_and_ls(fpath, label=None):
from quast_libs import qutils
if not label:
label = qutils.label_from_fpath(fpath)
if not dict_color_and_ls:
return None, None
"""
Returns tuple: color, line style
"""
return dict_color_and_ls[label]
开发者ID:student-t,项目名称:quast,代码行数:10,代码来源:plotter_data.py
示例7: parallel_partition_contigs
def parallel_partition_contigs(asm, assemblies_by_ref, corrected_dirpath, alignments_fpath_template):
assembly_label = qutils.label_from_fpath(asm.fpath)
corr_assembly_label = qutils.label_from_fpath_for_fname(asm.fpath)
logger.info(' ' + 'processing ' + assembly_label)
added_ref_asm = []
not_aligned_fname = corr_assembly_label + '_not_aligned_anywhere.fasta'
not_aligned_fpath = os.path.join(corrected_dirpath, not_aligned_fname)
contigs = {}
aligned_contig_names = set()
aligned_contigs_for_each_ref = {}
contigs_seq = fastaparser.read_fasta_one_time(asm.fpath)
alignments_fpath = alignments_fpath_template % corr_assembly_label
if os.path.exists(alignments_fpath):
with open(alignments_fpath) as f:
for line in f:
values = line.split()
if values[0] in contigs_analyzer.ref_labels_by_chromosomes.keys():
ref_name = contigs_analyzer.ref_labels_by_chromosomes[values[0]]
ref_contigs_names = values[1:]
ref_contigs_fpath = os.path.join(
corrected_dirpath, corr_assembly_label + '_to_' + ref_name + '.fasta')
if ref_name not in aligned_contigs_for_each_ref:
aligned_contigs_for_each_ref[ref_name] = []
for (cont_name, seq) in contigs_seq:
if not cont_name in contigs:
contigs[cont_name] = seq
if cont_name in ref_contigs_names and cont_name not in aligned_contigs_for_each_ref[ref_name]:
# Collecting all aligned contigs names in order to further extract not aligned
aligned_contig_names.add(cont_name)
aligned_contigs_for_each_ref[ref_name].append(cont_name)
fastaparser.write_fasta(ref_contigs_fpath, [(cont_name, seq)], 'a')
ref_asm = Assembly(ref_contigs_fpath, assembly_label)
if ref_asm.name not in added_ref_asm:
if ref_name in assemblies_by_ref:
assemblies_by_ref[ref_name].append(ref_asm)
added_ref_asm.append(ref_asm.name)
if qconfig.space_efficient:
os.remove(alignments_fpath)
# Extraction not aligned contigs
all_contigs_names = set(contigs.keys())
not_aligned_contigs_names = all_contigs_names - aligned_contig_names
fastaparser.write_fasta(not_aligned_fpath, [(name, contigs[name]) for name in not_aligned_contigs_names])
not_aligned_asm = Assembly(not_aligned_fpath, asm.label)
return assemblies_by_ref, not_aligned_asm
开发者ID:student-t,项目名称:quast,代码行数:49,代码来源:metautils.py
示例8: do
def do(contigs_fpaths, gene_lengths, out_dirpath):
logger.print_timestamp()
logger.main_info('Running GlimmerHMM...')
tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
tmp_dirpath = os.path.join(out_dirpath, 'tmp')
tool_exec_fpath = compile_glimmer(logger)
if not tool_exec_fpath:
return
if not os.path.isdir(out_dirpath):
os.makedirs(out_dirpath)
if not os.path.isdir(tmp_dirpath):
os.makedirs(tmp_dirpath)
n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
if is_python2():
from joblib import Parallel, delayed
else:
from joblib3 import Parallel, delayed
if qconfig.memory_efficient:
results = Parallel(n_jobs=n_jobs)(delayed(predict_genes)(
index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths))
else:
results = [predict_genes(index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths)]
genes_by_labels = dict()
# saving results
for i, contigs_fpath in enumerate(contigs_fpaths):
report = reporting.get(contigs_fpath)
label = qutils.label_from_fpath(contigs_fpath)
genes_by_labels[label], unique, full_genes, partial_genes = results[i]
if unique is not None:
report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique)
if full_genes is not None:
genes = ['%s + %s part' % (full_cnt, partial_cnt) for full_cnt, partial_cnt in zip(full_genes, partial_genes)]
report.add_field(reporting.Fields.PREDICTED_GENES, genes)
if unique is None and full_genes is None:
logger.error(
'Glimmer failed running Glimmer for %s. ' + ('Run with the --debug option'
' to see the command line.' if not qconfig.debug else '') % label)
if not qconfig.debug:
shutil.rmtree(tmp_dirpath)
logger.main_info('Done.')
return genes_by_labels
开发者ID:student-t,项目名称:quast,代码行数:49,代码来源:glimmer.py
示例9: correct_assemblies
def correct_assemblies(contigs_fpaths, output_dirpath, labels):
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
# we need correction but do not need min-contig filtration
min_contig = qconfig.min_contig
qconfig.min_contig = 0
corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None)
qconfig.min_contig = min_contig
assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths]
corrected_labels = [asm.label for asm in assemblies]
if qconfig.draw_plots or qconfig.html_report:
corr_fpaths = [asm.fpath for asm in assemblies]
corr_labels = [asm.label for asm in assemblies]
plotter_data.save_colors_and_ls(corr_fpaths, labels=corr_labels)
return assemblies, corrected_labels
开发者ID:student-t,项目名称:quast,代码行数:15,代码来源:metautils.py
示例10: save_colors
def save_colors(results_dirpath, contigs_fpaths, dict_colors, meta=False): # coordinates for Nx, NAx, NGx, NGAX
if meta:
html_fpath = os.path.join(results_dirpath, report_fname)
with open(html_fpath) as f_html:
html_text = f_html.read()
html_text = re.sub('{{ ' + 'colors' + ' }}', 'standard_colors', html_text)
html_text = re.sub('{{ ' + 'broken_scaffolds' + ' }}', '[]', html_text)
with open(html_fpath, 'w') as f_html:
f_html.write(html_text)
else:
contig_labels = [qutils.label_from_fpath(contigs_fpath) for contigs_fpath in contigs_fpaths]
colors_and_ls = [dict_colors[contig_label] for contig_label in contig_labels]
colors = [color_and_ls[0] for color_and_ls in colors_and_ls]
colors_for_html = [html_colors[plotter_data.colors.index(color)] for color in colors]
save_record(results_dirpath, 'colors', colors_for_html)
broken_contig_names = [label for i, label in enumerate(contig_labels) if colors_and_ls[i][1] == secondary_line_style]
save_record(results_dirpath, 'broken_scaffolds', broken_contig_names)
开发者ID:ablab,项目名称:quast,代码行数:17,代码来源:html_saver.py
示例11: calculate_ave_read_support
def calculate_ave_read_support(combined_output_dirpath, assemblies):
unique_contigs_fpath = os.path.join(combined_output_dirpath, 'contigs_reports', qconfig.unique_contigs_fname_pattern)
for assembly in assemblies:
aligned_contigs_by_ref = dict()
assembly_label = qutils.label_from_fpath(assembly.fpath)
corr_assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
with open(unique_contigs_fpath % corr_assembly_label) as in_f:
for line in in_f:
ref_name, contig_len, contig_cov = line.strip().split('\t')
aligned_contigs_by_ref.setdefault(ref_name, []).append((float(contig_len), float(contig_cov)))
for ref_name, contigs in aligned_contigs_by_ref.items():
ref_cov = sum(contig_cov * aligned_len for (aligned_len, contig_cov) in contigs)
ref_cov /= sum(aligned_len for (aligned_len, contig_cov) in contigs)
corr_assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
ref_contigs_fpath = os.path.join(
os.path.dirname(assembly.fpath), corr_assembly_label + '_to_' + ref_name + '.fasta')
qconfig.assembly_labels_by_fpath[ref_contigs_fpath] = assembly_label
report = reporting.get(ref_contigs_fpath, ref_name=ref_name)
report.add_field(reporting.Fields.AVE_READ_SUPPORT, '%.2f' % ref_cov)
开发者ID:student-t,项目名称:quast,代码行数:19,代码来源:metautils.py
示例12: do
def do(contigs_fpaths, gene_lengths, out_dirpath):
logger.print_timestamp()
logger.main_info('Running GlimmerHMM...')
tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
tmp_dirpath = os.path.join(out_dirpath, 'tmp')
tool_exec_fpath = compile_glimmer(logger)
if not tool_exec_fpath:
return
if not os.path.isdir(out_dirpath):
os.makedirs(out_dirpath)
if not os.path.isdir(tmp_dirpath):
os.makedirs(tmp_dirpath)
n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
parallel_args = [(index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tool_exec_fpath, tmp_dirpath)
for index, contigs_fpath in enumerate(contigs_fpaths)]
genes_list, unique, full_genes, partial_genes = run_parallel(predict_genes, parallel_args, n_jobs)
genes_by_labels = dict()
# saving results
for i, contigs_fpath in enumerate(contigs_fpaths):
report = reporting.get(contigs_fpath)
label = qutils.label_from_fpath(contigs_fpath)
genes_by_labels[label] = genes_list[i]
if unique[i] is not None:
report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique[i])
if full_genes[i] is not None:
genes = ['%s + %s part' % (full_cnt, partial_cnt) for full_cnt, partial_cnt in zip(full_genes[i], partial_genes[i])]
report.add_field(reporting.Fields.PREDICTED_GENES, genes)
if unique[i] is None and full_genes[i] is None:
logger.error(
'Failed running Glimmer for %s. ' % label + ('Run with the --debug option'
' to see the command line.' if not qconfig.debug else ''))
if not qconfig.debug:
shutil.rmtree(tmp_dirpath)
logger.main_info('Done.')
return genes_by_labels
开发者ID:ablab,项目名称:quast,代码行数:41,代码来源:glimmer.py
示例13: save_total_report
def save_total_report(output_dirpath, min_contig, ref_fpath):
from quast_libs import reporting
asm_names = [qutils.label_from_fpath(this) for this in reporting.assembly_fpaths]
report = reporting.table(reporting.Fields.grouped_order)
subreports = []
ref_names = []
if qconfig.is_combined_ref and ref_labels_by_chromosomes:
ref_names = sorted(list(set([ref for ref in ref_labels_by_chromosomes.values()])))
subreports = [reporting.table(reporting.Fields.grouped_order, ref_name=ref_name) for ref_name in ref_names]
t = datetime.datetime.now()
return save(join(output_dirpath, total_report_fname), {
'date': t.strftime('%d %B %Y, %A, %H:%M:%S'),
'assembliesNames': asm_names,
'referenceName': qutils.name_from_fpath(ref_fpath) if ref_fpath else '',
'order': [i for i, _ in enumerate(asm_names)],
'report': report,
'subreferences': ref_names,
'subreports': subreports,
'minContig': min_contig
})
开发者ID:ablab,项目名称:quast,代码行数:21,代码来源:json_saver.py
示例14: main
def main(args):
check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '.\n' +
'Please, put QUAST in a different directory, then try again.\n', exit_code=3)
if not args:
qconfig.usage(stream=sys.stderr)
sys.exit(1)
metaquast_path = [os.path.realpath(__file__)]
quast_py_args, contigs_fpaths = parse_options(logger, metaquast_path + args)
output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
html_report = qconfig.html_report
test_mode = qconfig.test
# Directories
output_dirpath, _, _ = qutils.set_up_output_dir(
output_dirpath, None, not output_dirpath,
save_json=False)
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
qconfig.set_max_threads(logger)
qutils.logger = logger
########################################################################
from quast_libs import reporting
try:
import imp
imp.reload(reporting)
except:
reload(reporting)
from quast_libs import plotter
if os.path.isdir(corrected_dirpath):
shutil.rmtree(corrected_dirpath)
os.mkdir(corrected_dirpath)
# PROCESSING REFERENCES
if ref_fpaths:
logger.main_info()
logger.main_info('Reference(s):')
corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
correct_meta_references(ref_fpaths, corrected_dirpath)
# PROCESSING CONTIGS
logger.main_info()
logger.main_info('Contigs:')
qconfig.no_check_meta = True
assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels)
if not assemblies:
logger.error("None of the assembly files contains correct contigs. "
"Please, provide different files or decrease --min-contig threshold.")
return 4
# Running QUAST(s)
if qconfig.gene_finding:
quast_py_args += ['--mgm']
if qconfig.min_IDY is None: # special case: user not specified min-IDY, so we need to use MetaQUAST default value
quast_py_args += ['--min-identity', str(qconfig.META_MIN_IDY)]
downloaded_refs = False
# SEARCHING REFERENCES
if not ref_fpaths:
logger.main_info()
if qconfig.max_references == 0:
logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled")
else:
if qconfig.references_txt:
logger.main_info("List of references was provided, starting to download reference genomes from NCBI...")
else:
logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database "
"and to download them from NCBI...")
downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname)
if not os.path.isdir(downloaded_dirpath):
os.mkdir(downloaded_dirpath)
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, corrected_dirpath, qconfig.references_txt)
if ref_fpaths:
search_references_meta.is_quast_first_run = True
if not qconfig.references_txt:
downloaded_refs = True
logger.main_info()
logger.main_info('Downloaded reference(s):')
corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
correct_meta_references(ref_fpaths, corrected_dirpath, downloaded_refs=True)
elif test_mode and not ref_fpaths:
logger.error('Failed to download or setup SILVA 16S rRNA database for working without '
'references on metagenome datasets!', to_stderr=True, exit_with_code=4)
if not ref_fpaths:
# No references, running regular quast with MetaGenemark gene finder
logger.main_info()
logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder')
assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in contigs_fpaths]
_start_quast_main(quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, run_regular_quast=True)
exit(0)
#.........这里部分代码省略.........
开发者ID:ablab,项目名称:quast,代码行数:101,代码来源:metaquast.py
示例15: save_combined_ref_stats
def save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger):
istranslocations_by_asm = [result['istranslocations_by_refs'] if result else None for result in results]
misassemblies_by_asm = [result['misassemblies_by_ref'] if result else None for result in results]
all_refs = []
for ref in ref_labels_by_chromosomes.values():
if ref not in all_refs:
all_refs.append(ref)
if not qconfig.use_input_ref_order:
all_refs.sort()
misassemblies_by_refs_rows = []
row = {'metricName': 'References', 'values': all_refs}
misassemblies_by_refs_rows.append(row)
if not istranslocations_by_asm:
return
for i, fpath in enumerate(contigs_fpaths):
label = qutils.label_from_fpath(fpath)
row = {'metricName': label, 'values': []}
misassemblies_by_refs_rows.append(row)
istranslocations_by_ref = istranslocations_by_asm[i]
intergenomic_misassemblies_by_asm[label] = defaultdict(list)
for ref in all_refs:
intergenomic_misassemblies_by_asm[label][ref] = misassemblies_by_asm[i][ref] if misassemblies_by_asm[i] else []
if istranslocations_by_ref:
assembly_name = qutils.name_from_fpath(fpath)
all_rows = []
row = {'metricName': 'References', 'values': [ref_num + 1 for ref_num in range(len(all_refs))]}
all_rows.append(row)
for ref in all_refs:
row = {'metricName': ref, 'values': []}
for second_ref in all_refs:
if ref == second_ref or second_ref not in istranslocations_by_ref:
row['values'].append(None)
else:
row['values'].append(istranslocations_by_ref[ref][second_ref])
possible_misassemblies = 0
misassemblies_by_ref = misassemblies_by_asm[i]
if misassemblies_by_ref:
possible_misassemblies = misassemblies_by_ref[ref].count(Misassembly.POSSIBLE_MISASSEMBLIES)
istranslocations = max(0, sum([r for r in row['values'] if r]))
misassemblies_by_refs_rows[-1]['values'].append(istranslocations + possible_misassemblies)
all_rows.append(row)
misassembly_by_ref_fpath = os.path.join(output_dir, 'interspecies_translocations_by_refs_%s.info' % assembly_name)
with open(misassembly_by_ref_fpath, 'w') as misassembly_by_ref_file:
misassembly_by_ref_file.write('Number of interspecies translocations by references: \n')
print_file(all_rows, misassembly_by_ref_fpath, append_to_existing_file=True)
with open(misassembly_by_ref_fpath, 'a') as misassembly_by_ref_file:
misassembly_by_ref_file.write('References:\n')
for ref_num, ref in enumerate(all_refs):
misassembly_by_ref_file.write(str(ref_num + 1) + ' - ' + ref + '\n')
logger.info(' Information about interspecies translocations by references for %s is saved to %s' %
(assembly_name, misassembly_by_ref_fpath))
misassemblies = []
if qconfig.draw_plots:
from quast_libs import plotter
aligned_contigs_labels = []
for row in misassemblies_by_refs_rows[1:]:
if row['values']:
aligned_contigs_labels.append(row['metricName'])
else:
misassemblies_by_refs_rows.remove(row)
for i in range(len(all_refs)):
cur_results = []
for row in misassemblies_by_refs_rows[1:]:
if row['values']:
cur_results.append(row['values'][i])
misassemblies.append(cur_results)
is_translocations_plot_fpath = os.path.join(output_dir, 'intergenomic_misassemblies')
plotter.draw_meta_summary_plot('', output_dir, aligned_contigs_labels, all_refs,
misassemblies, is_translocations_plot_fpath,
title='Intergenomic misassemblies (found and supposed)', reverse=False,
yaxis_title=None, print_all_refs=True, logger=logger)
开发者ID:student-t,项目名称:quast,代码行数:73,代码来源:save_results.py
示例16: save_result
def save_result(result, report, fname, ref_fpath):
region_misassemblies = result['region_misassemblies']
misassemblies_by_ref = result['misassemblies_by_ref']
region_struct_variations = result['region_struct_variations']
misassemblies_matched_sv = result['misassemblies_matched_sv']
misassembled_contigs = result['misassembled_contigs']
misassembled_bases = result['misassembled_bases']
misassembly_internal_overlap = result['misassembly_internal_overlap']
unaligned = result['unaligned']
partially_unaligned = result['partially_unaligned']
partially_unaligned_bases = result['partially_unaligned_bases']
fully_unaligned_bases = result['fully_unaligned_bases']
ambiguous_contigs = result['ambiguous_contigs']
ambiguous_contigs_extra_bases = result['ambiguous_contigs_extra_bases']
SNPs = result['SNPs']
indels_list = result['indels_list']
total_aligned_bases = result['total_aligned_bases']
half_unaligned_with_misassembly = result['half_unaligned_with_misassembly']
report.add_field(reporting.Fields.MISLOCAL, region_misassemblies.count(Misassembly.LOCAL))
report.add_field(reporting.Fields.MISASSEMBL, region_misassemblies.count(Misassembly.RELOCATION) +
region_misassemblies.count(Misassembly.INVERSION) + region_misassemblies.count(Misassembly.TRANSLOCATION) +
region_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
report.add_field(reporting.Fields.MISCONTIGS, len(misassembled_contigs))
report.add_field(reporting.Fields.MISCONTIGSBASES, misassembled_bases)
report.add_field(reporting.Fields.MISINTERNALOVERLAP, misassembly_internal_overlap)
if qconfig.bed:
report.add_field(reporting.Fields.STRUCT_VARIATIONS, misassemblies_matched_sv)
report.add_field(reporting.Fields.UNALIGNED, '%d + %d part' % (unaligned, partially_unaligned))
report.add_field(reporting.Fields.UNALIGNEDBASES, (fully_unaligned_bases + partially_unaligned_bases))
report.add_field(reporting.Fields.AMBIGUOUS, ambiguous_contigs)
report.add_field(reporting.Fields.AMBIGUOUSEXTRABASES, ambiguous_contigs_extra_bases)
report.add_field(reporting.Fields.MISMATCHES, SNPs)
# different types of indels:
if indels_list is not None:
report.add_field(reporting.Fields.INDELS, len(indels_list))
report.add_field(reporting.Fields.INDELSBASES, sum(indels_list))
report.add_field(reporting.Fields.MIS_SHORT_INDELS, len([i for i in indels_list if i <= qconfig.SHORT_INDEL_THRESHOLD]))
report.add_field(reporting.Fields.MIS_LONG_INDELS, len([i for i in indels_list if i > qconfig.SHORT_INDEL_THRESHOLD]))
if total_aligned_bases:
report.add_field(reporting.Fields.SUBSERROR, "%.2f" % (float(SNPs) * 100000.0 / float(total_aligned_bases)))
report.add_field(reporting.Fields.INDELSERROR, "%.2f" % (float(report.get_field(reporting.Fields.INDELS))
* 100000.0 / float(total_aligned_bases)))
# for misassemblies report:
report.add_field(reporting.Fields.MIS_ALL_EXTENSIVE, region_misassemblies.count(Misassembly.RELOCATION) +
region_misassemblies.count(Misassembly.INVERSION) + region_misassemblies.count(Misassembly.TRANSLOCATION) +
region_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
report.add_field(reporting.Fields.MIS_RELOCATION, region_misassemblies.count(Misassembly.RELOCATION))
report.add_field(reporting.Fields.MIS_TRANSLOCATION, region_misassemblies.count(Misassembly.TRANSLOCATION))
report.add_field(reporting.Fields.MIS_INVERTION, region_misassemblies.count(Misassembly.INVERSION))
report.add_field(reporting.Fields.MIS_EXTENSIVE_CONTIGS, len(misassembled_contigs))
report.add_field(reporting.Fields.MIS_EXTENSIVE_BASES, misassembled_bases)
report.add_field(reporting.Fields.MIS_LOCAL, region_misassemblies.count(Misassembly.LOCAL))
if qconfig.is_combined_ref:
report.add_field(reporting.Fields.MIS_ISTRANSLOCATIONS, region_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
report.add_field(reporting.Fields.CONTIGS_WITH_ISTRANSLOCATIONS, region_misassemblies.count(Misassembly.POTENTIALLY_MIS_CONTIGS))
report.add_field(reporting.Fields.POSSIBLE_MISASSEMBLIES, region_misassemblies.count(Misassembly.POSSIBLE_MISASSEMBLIES))
all_references = sorted(list(set([ref for ref in ref_labels_by_chromosomes.values()])))
for ref_name in all_references:
subreport = reporting.get(fname, ref_name=ref_name)
ref_misassemblies = misassemblies_by_ref[ref_name]
subreport.add_field(reporting.Fields.MIS_ALL_EXTENSIVE, ref_misassemblies.count(Misassembly.RELOCATION) +
ref_misassemblies.count(Misassembly.INVERSION) + ref_misassemblies.count(Misassembly.TRANSLOCATION) +
ref_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
subreport.add_field(reporting.Fields.MIS_RELOCATION, ref_misassemblies.count(Misassembly.RELOCATION))
subreport.add_field(reporting.Fields.MIS_TRANSLOCATION, ref_misassemblies.count(Misassembly.TRANSLOCATION))
subreport.add_field(reporting.Fields.MIS_INVERTION, ref_misassemblies.count(Misassembly.INVERSION))
subreport.add_field(reporting.Fields.MIS_ISTRANSLOCATIONS, ref_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
subreport.add_field(reporting.Fields.MIS_LOCAL, ref_misassemblies.count(Misassembly.LOCAL))
subreport.add_field(reporting.Fields.POSSIBLE_MISASSEMBLIES, ref_misassemblies.count(Misassembly.POSSIBLE_MISASSEMBLIES))
subreport.add_field(reporting.Fields.CONTIGS_WITH_ISTRANSLOCATIONS, ref_misassemblies.count(Misassembly.POTENTIALLY_MIS_CONTIGS))
if fname not in qconfig.dict_of_broken_scaffolds:
subreport.add_field(reporting.Fields.MIS_SCAFFOLDS_GAP, ref_misassemblies.count(Misassembly.SCAFFOLD_GAP))
if qconfig.check_for_fragmented_ref:
subreport.add_field(reporting.Fields.MIS_FRAGMENTED, ref_misassemblies.count(Misassembly.FRAGMENTED))
elif intergenomic_misassemblies_by_asm:
label = qutils.label_from_fpath(fname)
ref_name = qutils.name_from_fpath(ref_fpath)
ref_misassemblies = intergenomic_misassemblies_by_asm[label][ref_name]
report.add_field(reporting.Fields.MIS_ISTRANSLOCATIONS, ref_misassemblies.count(Misassembly.INTERSPECTRANSLOCATION))
report.add_field(reporting.Fields.POSSIBLE_MISASSEMBLIES, ref_misassemblies.count(Misassembly.POSSIBLE_MISASSEMBLIES))
report.add_field(reporting.Fields.CONTIGS_WITH_ISTRANSLOCATIONS, ref_misassemblies.count(Misassembly.POTENTIALLY_MIS_CONTIGS))
if fname not in qconfig.dict_of_broken_scaffolds:
report.add_field(reporting.Fields.MIS_SCAFFOLDS_GAP, region_misassemblies.count(Misassembly.SCAFFOLD_GAP))
if qconfig.check_for_fragmented_ref:
report.add_field(reporting.Fields.MIS_FRAGMENTED, region_misassemblies.count(Misassembly.FRAGMENTED))
# for unaligned report:
report.add_field(reporting.Fields.UNALIGNED_FULL_CNTGS, unaligned)
report.add_field(reporting.Fields.UNALIGNED_FULL_LENGTH, fully_unaligned_bases)
report.add_field(reporting.Fields.UNALIGNED_PART_CNTGS, partially_unaligned)
report.add_field(reporting.Fields.UNALIGNED_PART_LENGTH, partially_unaligned_bases)
report.add_field(reporting.Fields.UNALIGNED_MISASSEMBLED_CTGS, half_unaligned_with_misassembly)
return report
开发者ID:student-t,项目名称:quast,代码行数:95,代码来源:save_results.py
示例17: main
def main(args):
check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' +
'Please, put QUAST in a different directory, then try again.\n', exit_code=3)
if not args:
qconfig.usage(stream=sys.stderr)
sys.exit(1)
try:
import imp
imp.reload(qconfig)
imp.reload(qutils)
except:
reload(qconfig)
reload(qutils)
try:
locale.setlocale(locale.LC_ALL, 'en_US.utf8')
except Exception:
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except Exception:
logger.warning('Python locale settings can\'t be changed')
quast_path = [os.path.realpath(__file__)]
quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
logger.main_info()
logger.print_params()
########################################################################
from quast_libs import reporting
reports = reporting.reports
try:
import imp
imp.reload(reporting)
except:
reload(reporting)
reporting.reports = reports
reporting.assembly_fpaths = []
from quast_libs import plotter # Do not remove this line! It would lead to a warning in matplotlib.
if qconfig.is_combined_ref:
corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname)
else:
if os.path.isdir(corrected_dirpath):
shutil.rmtree(corrected_dirpath)
os.mkdir(corrected_dirpath)
qconfig.set_max_threads(logger)
check_reads_fpaths(logger)
# PROCESSING REFERENCE
if ref_fpath:
logger.main_info()
logger.main_info('Reference:')
original_ref_fpath = ref_fpath
ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
if qconfig.ideal_assembly:
ideal_assembly_fpath = ideal_assembly.do(ref_fpath, original_ref_fpath,
os.path.join(output_dirpath, qconfig.ideal_assembly_basename))
if ideal_assembly_fpath is not None:
contigs_fpaths.insert(0, ideal_assembly_fpath)
labels.insert(0, 'IDEAL ASSEMBLY')
labels = qutils.process_labels(contigs_fpaths, labels)
else:
ref_fpath = ''
# PROCESSING CONTIGS
logger.main_info()
logger.main_info('Contigs:')
contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
for contigs_fpath in contigs_fpaths:
report = reporting.get(contigs_fpath)
report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))
qconfig.assemblies_num = len(contigs_fpaths)
cov_fpath = qconfig.cov_fpath
physical_cov_fpath = qconfig.phys_cov_fpath
if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths,
os.path.join(output_dirpath, qconfig.reads_stats_dirname),
external_logger=logger)
qconfig.bed = bed_fpath
if not contigs_fpaths:
logger.error("None of the assembly files contains correct contigs. "
"Please, provide different files or decrease --min-contig threshold.",
fake_if_nested_run=True)
return 4
if qconfig.used_colors and qconfig.used_ls:
for i, label in enumerate(labels):
plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])
qconfig.assemblies_fpaths = contigs_fpaths
# Where all pdfs will be saved
all_pdf_fpath = None
#.........这里部分代码省略.........
开发者ID:student-t,项目名称:quast,代码行数:101,代码来源:quast.py
示例18: align_and_analyze
def align_and_analyze(is_cyclic, index, contigs_fpath, output_dirpath, ref_fpath,
old_contigs_fpath, bed_fpath, parallel_by_chr=False, threads=1):
nucmer_output_dirpath = create_nucmer_output_dir(output_dirpath)
assembly_label = qutils.label_from_fpath(contigs_fpath)
corr_assembly_label = qutils.label_from_fpath_for_fname(contigs_fpath)
nucmer_fpath = join(nucmer_output_dirpath, corr_assembly_label)
logger.info(' ' + qutils.index_to_str(index) + assembly_label)
if not qconfig.space_efficient:
log_out_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stdout')
log_err_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stderr')
icarus_out_fpath = join(output_dirpath, qconfig.icarus_report_fname_pattern % corr_assembly_label)
misassembly_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.mis_contigs.info')
unaligned_info_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.unaligned.info')
else:
log_out_fpath = '/dev/null'
log_err_fpath = '/dev/null'
icarus_out_fpath = '/dev/null'
misassembly_fpath = '/dev/null'
unaligned_info_fpath = '/dev/null'
icarus_out_f = open(icarus_out_fpath, 'w')
icarus_header_cols = ['S1', 'E1', 'S2', 'E2', 'Reference', 'Contig', 'IDY', 'Ambiguous', 'Best_group']
icarus_out_f.write('\t'.join(icarus_header_cols) + '\n')
misassembly_f = open(misassembly_fpath, 'w')
if not qconfig.space_efficient:
logger.info(' ' + qutils.index_to_str(index) + 'Logging to files ' + log_out_fpath +
' and ' + os.path.basename(log_err_fpath) + '...')
else:
logger.info(' ' + qutils.index_to_str(index) + 'Logging is disabled.')
coords_fpath, coords_filtered_fpath, unaligned_fpath, show_snps_fpath, used_snps_fpath = \
get_nucmer_aux_out_fpaths(nucmer_fpath)
nucmer_status = align_contigs(nucmer_fpath, ref_fpath, contigs_fpath, old_contigs_fpath, index,
parallel_by_chr, threads, log_out_fpath, log_err_fpath)
if nucmer_status != NucmerStatus.OK:
with open(log_err_fpath, 'a') as log_err_f:
if nucmer_status == NucmerStatus.ERROR:
logger.error(' ' + qutils.index_to_str(index) +
'Failed aligning contigs ' + qutils.label_from_fpath(contigs_fpath) +
' to th
|
请发表评论