本文整理汇总了Python中pysam.tabix_compress函数的典型用法代码示例。如果您正苦于以下问题:Python tabix_compress函数的具体用法?Python tabix_compress怎么用?Python tabix_compress使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tabix_compress函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
# Read options, args.
parser = optparse.OptionParser()
(options, args) = parser.parse_args()
input_fname, output_fname = args
pysam.tabix_compress(input_fname, output_fname, force=True)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:7,代码来源:vcf_to_vcf_bgzip.py
示例2: make_bias_track
def make_bias_track(args, bases = 500000, splitsize = 1000):
"""function to compute bias track
"""
if args.out is None:
if args.bed is not None:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
params = _BiasParams(args.fasta, args.pwm)
if args.bed is None:
chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.Scores.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
for track in tmp:
write_queue.put(track)
pool.close()
pool.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.Scores.bedgraph')
pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
开发者ID:kesteph,项目名称:NucleoATAC,代码行数:35,代码来源:make_bias_track.py
示例3: get_cov
def get_cov(args, bases = 50000, splitsize = 1000):
"""function to get coverages
"""
if not args.out:
if args.bed is None:
args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
if args.bed is None:
chrs = read_chrom_sizes_from_bam(args.bam)
chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool1 = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.cov.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
for track in tmp:
write_queue.put(track)
pool1.close()
pool1.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.cov.bedgraph')
pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:35,代码来源:get_cov.py
示例4: main
def main():
# Read options, args.
parser = optparse.OptionParser()
parser.add_option('-c', '--chr-col', type='int', dest='chrom_col')
parser.add_option('-s', '--start-col', type='int', dest='start_col')
parser.add_option('-e', '--end-col', type='int', dest='end_col')
parser.add_option('-P', '--preset', dest='preset')
(options, args) = parser.parse_args()
input_fname, output_fname = args
tmpfile = tempfile.NamedTemporaryFile()
sort_params = None
if options.chrom_col and options.start_col and options.end_col:
sort_params = [
"sort",
"-k%(i)s,%(i)s" % {'i': options.chrom_col},
"-k%(i)i,%(i)in" % {'i': options.start_col},
"-k%(i)i,%(i)in" % {'i': options.end_col}
]
elif options.preset == "bed":
sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
elif options.preset == "vcf":
sort_params = ["sort", "-k1,1", "-k2,2n"]
elif options.preset == "gff":
sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column
# Skip any lines starting with "#" and "track"
grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile)
grepped.stdout.close()
output, err = after_sort.communicate()
pysam.tabix_compress(tmpfile.name, output_fname, force=True)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:33,代码来源:bgzip.py
示例5: _create_tabix
def _create_tabix(fname, ftype):
logger = logging.getLogger("pita")
tabix_file = ""
logger.info("Creating tabix index for %s", os.path.basename(fname))
logger.debug("Preparing %s for tabix", fname)
tmp = NamedTemporaryFile(prefix="pita", delete=False)
preset = "gff"
if ftype == "bed":
cmd = "sort -k1,1 -k2g,2 {0} | grep -v track | grep -v \"^#\" > {1}"
preset = "bed"
elif ftype in ["gff", "gff3", "gtf"]:
cmd = "sort -k1,1 -k4g,4 {0} | grep -v \"^#\" > {1}"
# Sort the input file
logger.debug(cmd.format(fname, tmp.name))
sp.call(cmd.format(fname, tmp.name), shell=True)
# Compress using bgzip
logger.debug("compressing %s", tmp.name)
tabix_file = tmp.name + ".gz"
pysam.tabix_compress(tmp.name, tabix_file)
tmp.close()
# Index (using tabix command line, as pysam.index results in a Segmentation fault
logger.debug("indexing %s", tabix_file)
sp.call("tabix {0} -p {1}".format(tabix_file, preset), shell=True)
return tabix_file
开发者ID:Jorisvansteenbrugge,项目名称:pita,代码行数:25,代码来源:io.py
示例6: ensureIndexed
def ensureIndexed(bedPath, preset="bed", trySorting=True):
if not bedPath.endswith(".gz"):
if not os.path.exists(bedPath + ".gz"):
logging.info("bgzf compressing {}".format(bedPath))
pysam.tabix_compress(bedPath, bedPath + ".gz")
if not os.path.exists(bedPath + ".gz"):
raise Exception(
"Failed to create compress {preset} file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
bedPath += ".gz"
if not os.path.exists(bedPath + ".tbi"):
logging.info("creating tabix index for {}".format(bedPath))
pysam.tabix_index(bedPath, preset=preset)
if not os.path.exists(bedPath + ".tbi"):
raise Exception(
"Failed to create tabix index file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
line = pysam.Tabixfile(bedPath).fetch().next()
if len(line.strip().split("\t")) < 6 and preset == "bed":
raise AnnotationError(
"BED files need to have at least 6 (tab-delimited) fields (including "
"chrom, start, end, name, score, strand; score is unused)"
)
if len(line.strip().split("\t")) < 9 and preset == "gff":
raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")
return bedPath
开发者ID:apregier,项目名称:svviz,代码行数:30,代码来源:tabix.py
示例7: eff_vcf
def eff_vcf(self, inVcf, outVcf, genome, java_flags='-Xmx2g',
in_format='vcf', out_format='vcf', eff_options=''):
"""
TODO: docstring here
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
else:
tmpVcf = outVcf
args = ' '.join([
'eff',
'-c', '{}/snpEff.config'.format(self.executable_path()),
'-i', in_format,
'-o', out_format,
genome,
'-treatAllAsProteinCoding false',
'-noLog',
'-ud 0',
'-noStats',
eff_options
])
if inVcf.endswith('.gz'):
pre_pipe = "zcat {} | ".format(inVcf)
else:
pre_pipe = "cat {} | ".format(inVcf)
post_pipe = " > {}".format(tmpVcf)
self.execute(args, java_flags=java_flags, pre_pipe=pre_pipe,
post_pipe=post_pipe)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
开发者ID:mlin,项目名称:viral-ngs,代码行数:35,代码来源:snpeff.py
示例8: testIndexPresetCompressed
def testIndexPresetCompressed(self):
'''test indexing via preset.'''
pysam.tabix_compress(self.tmpfilename, self.tmpfilename + ".gz")
pysam.tabix_index(self.tmpfilename + ".gz", preset=self.preset)
checkBinaryEqual(self.tmpfilename + ".gz", self.filename)
checkBinaryEqual(self.tmpfilename + ".gz.tbi", self.filename_idx)
开发者ID:humburg,项目名称:pysam,代码行数:7,代码来源:tabix_test.py
示例9: annotate_vcf
def annotate_vcf(self, inVcf, genome, outVcf, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
args = [
'-treatAllAsProteinCoding', 'false',
'-t',
'-noLog',
'-ud', '0',
'-noStats',
'-noShiftHgvs',
genome,
inVcf
]
with open(tmpVcf, 'wt') as outf:
self.execute('ann', args, JVMmemory=JVMmemory, stdout=outf)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:28,代码来源:snpeff.py
示例10: annotate_vcf
def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
sortedAccessionString = ", ".join(sorted(genomes))
databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55]
genomeToUse = ""
# if we don't have the genome, by name (snpEff official) or by hash (custom)
if (not self.has_genome(databaseId)):
if (not self.has_genome(genomes[0])):
_log.info("Checking for snpEff database online...")
# check to see if it is available for download, and if so install it
for row in self.available_databases():
if (genomes[0].lower() in row['Genome'].lower()) or (
genomes[0].lower() in row['Bundle'].lower()
) or (
genomes[0].lower() in row['Organism'].lower()
):
self.download_db(row['Genome'])
# backward compatability for where a single genome name is provided
if self.has_genome(genomes[0]):
genomeToUse = genomes[0]
else:
# if the hash of the accessions passed in is not present in the genomes db
if not self.has_genome(databaseId):
self.create_db(genomes, emailAddress, JVMmemory)
if self.has_genome(databaseId):
genomeToUse = databaseId
if not genomeToUse:
raise Exception()
args = [
'-treatAllAsProteinCoding', 'false', '-t', '-noLog', '-ud', '0', '-noStats', '-noShiftHgvs', genomeToUse,
os.path.realpath(inVcf)
]
command_ps = self.execute('ann', args, JVMmemory=JVMmemory)
if command_ps.returncode == 0:
with open(tmpVcf, 'wt') as outf:
outf.write(command_ps.stdout.decode("utf-8"))
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
else:
raise subprocess.CalledProcessError(cmd=command_ps.args, returncode=command_ps.returncode, output=command_ps.stdout)
开发者ID:mypandos,项目名称:viral-ngs,代码行数:59,代码来源:snpeff.py
示例11: run_nfr
def run_nfr(args):
"""run nfr calling
"""
if args.bam is None and args.ins_track is None:
raise Exception("Must supply either bam file or insertion track")
if not args.out:
args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
if args.fasta is not None:
chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
else:
chunks = ChunkList.read(args.bed)
if args.bam is not None:
chrs_bam = read_chrom_sizes_from_bam(args.bam)
chunks.checkChroms(chrs_bam, chrom_source = "BAM file")
chunks.merge()
maxQueueSize = args.cores * 10
params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
fasta = args.fasta, pwm = args.pwm)
sets = chunks.split(items = args.cores * 5)
pool1 = mp.Pool(processes = max(1,args.cores-1))
nfr_handle = open(args.out + '.nfrpos.bed','w')
nfr_handle.close()
nfr_queue = mp.JoinableQueue()
nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
nfr_process.start()
if params.ins_track is None:
ins_handle = open(args.out + '.ins.bedgraph','w')
ins_handle.close()
ins_queue = mp.JoinableQueue()
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
ins_queue.put(result[1])
else:
nfr_queue.put(result)
pool1.close()
pool1.join()
nfr_queue.put('STOP')
nfr_process.join()
if params.ins_track is None:
ins_queue.put('STOP')
ins_process.join()
pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
shell_command('rm ' + args.out + '.nfrpos.bed')
pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
if params.ins_track is None:
pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.ins.bedgraph')
pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:56,代码来源:run_nfr.py
示例12: testEmptyFileVCFGZWithoutIndex
def testEmptyFileVCFGZWithoutIndex(self):
with get_temp_context("tmp_testEmptyFileWithoutIndex.vcf") as fn:
with open(fn, "w"):
pass
pysam.tabix_compress(fn,
fn + ".gz",
force=True)
self.assertRaises(ValueError, pysam.VariantFile, fn + ".gz")
开发者ID:msto,项目名称:pysam,代码行数:10,代码来源:VariantFile_test.py
示例13: convert_VariantFile_to_IndexedVariantFile
def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path):
make_basedir(ivf_path)
tmp_path = get_tmp_path(ivf_path)
pysam.tabix_compress(vf_path, tmp_path, force=True)
os.rename(tmp_path, ivf_path)
pysam.tabix_index(
filename=ivf_path, force=True,
seq_col=0, start_col=1, end_col=1, # note: `pysam.tabix_index` calls the first column `0`, but cmdline `tabix` calls it `1`.
line_skip=1, # skip header
)
开发者ID:statgen,项目名称:pheweb,代码行数:11,代码来源:file_utils.py
示例14: run_merge
def run_merge(args):
if not args.out:
args.out = '.'.join(os.path.basename(args.nucpos).split('.')[0:-3])
occ = NucList.read(args.occpeaks, "occ", args.min_occ)
nuc = NucList.read(args.nucpos, "nuc", args.min_occ)
new = merge(occ, nuc, args.sep)
out = open(args.out + '.nucmap_combined.bed','w')
out.write(new.asBed())
out.close()
pysam.tabix_compress(args.out + '.nucmap_combined.bed', args.out + '.nucmap_combined.bed.gz',force = True)
shell_command('rm ' + args.out + '.nucmap_combined.bed')
pysam.tabix_index(args.out + '.nucmap_combined.bed.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:12,代码来源:merge.py
示例15: testEmptyFileVCFGZ
def testEmptyFileVCFGZ(self):
with open("tmp_testEmptyFile.vcf", "w"):
pass
pysam.tabix_compress("tmp_testEmptyFile.vcf",
"tmp_testEmptyFile.vcf.gz")
self.assertRaises(ValueError, pysam.VariantFile,
"tmp_testEmptyFile.vcf.gz")
os.unlink("tmp_testEmptyFile.vcf")
os.unlink("tmp_testEmptyFile.vcf.gz")
开发者ID:nickwork,项目名称:pysam,代码行数:12,代码来源:VariantFile_test.py
示例16: testEmptyFileVCFGZWithoutIndex
def testEmptyFileVCFGZWithoutIndex(self):
with open("tmp_testEmptyFileWithoutIndex.vcf", "w"):
pass
pysam.tabix_compress("tmp_testEmptyFileWithoutIndex.vcf",
"tmp_testEmptyFileWithoutIndex.vcf.gz",
force=True)
self.assertRaises(ValueError, pysam.VariantFile,
"tmp_testEmptyFileWithoutIndex.vcf.gz")
os.unlink("tmp_testEmptyFileWithoutIndex.vcf")
os.unlink("tmp_testEmptyFileWithoutIndex.vcf.gz")
开发者ID:bayolau,项目名称:pysam,代码行数:13,代码来源:VariantFile_test.py
示例17: indexFile
def indexFile(options):
filename=options.output
if not options.ensembl is None:
sys.stdout.write('Compressing output file... ')
sys.stdout.flush()
pysam.tabix_compress(filename,filename+'.gz',force=True)
sys.stdout.write('OK\n')
sys.stdout.write('Indexing output file... ')
sys.stdout.flush()
pysam.tabix_index(filename+'.gz', seq_col=2, start_col=4, end_col=5, meta_char='#',force=True)
sys.stdout.write('OK\n')
else:
print 'Compressing file...'
pysam.tabix_compress(filename,filename+'.gz',force=True)
print 'Indexing file...'
pysam.tabix_index(filename+'.gz', seq_col=1, start_col=2, end_col=2, meta_char='#',force=True)
开发者ID:RahmanTeam,项目名称:OpEx,代码行数:16,代码来源:dbprep.py
示例18: bgzip_index
def bgzip_index(original_file, new_file, file_format):
"""
:param original_file:
:param new_file:
:param file_format:
:return:
"""
if file_format.lower() == 'fa':
tabix_compress(original_file, new_file)
faidx(new_file)
delete_file(original_file)
elif file_format.lower() == 'vcf':
tabix_index(original_file, preset="vcf", force=True)
else:
raise G2GValueError("Unknown file format: {0}".format(file_format))
开发者ID:churchill-lab,项目名称:g2gtools,代码行数:17,代码来源:g2g_fileutils.py
示例19: _make_assembly_vcf
def _make_assembly_vcf(self):
tmp_vcf = self.final_assembly_vcf + '.tmp'
cmd = ' '.join([
self.samtools_exe, 'mpileup',
'-t INFO/DPR,DV',
'-A',
'-f', self.final_assembly_fa,
'-u',
'-v',
self.final_assembly_bam,
'>',
tmp_vcf
])
common.syscall(cmd, verbose=self.verbose)
cmd = ' '.join([
self.bcftools_exe, 'call -m',
tmp_vcf,
'|',
self.bcftools_exe, 'query',
r'''-f '%CHROM\t%POS\t%REF\t%ALT\t%DP\t%DPR]\n' ''',
'>',
self.final_assembly_read_depths + '.tmp'
])
common.syscall(cmd, verbose=self.verbose)
pysam.tabix_compress(self.final_assembly_read_depths + '.tmp', self.final_assembly_read_depths)
pysam.tabix_index(self.final_assembly_read_depths, seq_col=0, start_col=1, end_col=1)
os.unlink(self.final_assembly_read_depths + '.tmp')
cmd = ' '.join([
self.bcftools_exe, 'call -m -v',
tmp_vcf,
'|',
self.bcftools_exe, 'filter',
'-i', '"MIN(DP)>=' + str(self.bcf_min_dp),
' & MIN(DV)>=' + str(self.bcf_min_dv),
' & MIN(DV/DP)>=' + str(self.bcf_min_dv_over_dp),
' & QUAL >=', str(self.bcf_min_qual), '"',
'-o', self.final_assembly_vcf
])
common.syscall(cmd, verbose=self.verbose)
os.unlink(tmp_vcf)
开发者ID:codemeleon,项目名称:ariba,代码行数:45,代码来源:cluster.py
示例20: main
def main():
# Read options, args.
usage = "Usage: %prog [options] tabular_input_file bgzip_output_file"
parser = optparse.OptionParser(usage=usage)
parser.add_option('-c', '--chr-col', type='int', default=0, dest='chrom_col')
parser.add_option('-s', '--start-col', type='int', default=1, dest='start_col')
parser.add_option('-e', '--end-col', type='int', default=1, dest='end_col')
(options, args) = parser.parse_args()
if len(args) != 2:
parser.print_usage()
exit(1)
input_fname, output_fname = args
output_dir = os.path.dirname(output_fname)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
pysam.tabix_compress(input_fname, output_fname, force=True)
# Column indices are 0-based.
pysam.tabix_index(output_fname, seq_col=options.chrom_col, start_col=options.start_col, end_col=options.end_col)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:18,代码来源:tabular_to_dbnsfp.py
注:本文中的pysam.tabix_compress函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论