本文整理汇总了Python中pysam.asVCF函数的典型用法代码示例。如果您正苦于以下问题:Python asVCF函数的具体用法?Python asVCF怎么用?Python asVCF使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了asVCF函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testFromTabix
def testFromTabix(self):
# use ascii encoding - should raise error
t = pysam.TabixFile(self.tmpfilename + ".gz", encoding="ascii")
results = list(t.fetch(parser=pysam.asVCF()))
self.assertRaises(UnicodeDecodeError, getattr, results[1], "id")
t = pysam.TabixFile(self.tmpfilename + ".gz", encoding="utf-8")
results = list(t.fetch(parser=pysam.asVCF()))
self.assertEqual(getattr(results[1], "id"), u"Rene\xe9")
开发者ID:humburg,项目名称:pysam,代码行数:10,代码来源:tabix_test.py
示例2: fetch
def fetch(self, chrom, start, end):
""" yield tuples (vcf object + info dict key->val) for a range
vcf row attributes are:
contig
pos chromosomal position, zero-based
id
ref reference
alt alt
qual qual
filter filter
info info
format format specifier.
"""
chrom = chrom.replace("chr","")
#fname = self.fnameDict[chrom]
#vcf = pysam.VCF()
#vcf.connect(fname)
tbi = self.fhDict[chrom]
it = tbi.fetch(chrom, start, end, parser=pysam.asVCF())
for row in it:
infoDict = {}
infoStr = row.info
for keyVal in infoStr.split(";"):
if "=" not in keyVal:
continue
key, val = keyVal.split("=")
infoDict[key] = val
yield row, infoDict
开发者ID:Moxikai,项目名称:pubMunch,代码行数:28,代码来源:pubVcf.py
示例3: testRead
def testRead(self):
ncolumns = len(self.columns)
for x, r in enumerate(self.tabix.fetch(parser=pysam.asVCF())):
c = self.compare[x]
for y, field in enumerate(self.columns):
# it is ok to have a missing format column
if y == 8 and y == len(c):
continue
if field == "pos":
self.assertEqual(int(c[y]) - 1, getattr(r, field))
self.assertEqual(int(c[y]) - 1, r.pos)
else:
self.assertEqual(c[y], getattr(r, field),
"mismatch in field %s: %s != %s" %
(field, c[y], getattr(r, field)))
if len(c) == 8:
self.assertEqual(0, len(r))
else:
self.assertEqual(len(c), len(r) + ncolumns)
for y in range(len(c) - ncolumns):
self.assertEqual(c[ncolumns + y], r[y])
self.assertEqual("\t".join(map(str, c)),
str(r))
开发者ID:zengfengbo,项目名称:pysam,代码行数:26,代码来源:tabix_test.py
示例4: filter_vcfs
def filter_vcfs(genotype, contig, start, end):
if contig in genotype.contigs:
parser = pysam.asVCF()
# This raises a ValueError if the VCF does not
# contain any entries for the specified contig.
for vcf in genotype.fetch(contig, start, end, parser=parser):
if vcf.filter in ("PASS", "."):
yield vcf
开发者ID:health1987,项目名称:paleomix,代码行数:8,代码来源:vcf_to_fasta.py
示例5: fetch
def fetch(self,
reference = None,
start = None,
end = None,
region = None ):
""" Parse a stream of VCF-formatted lines. Initializes class instance and return generator """
iter = self.tabixfile.fetch( reference, start, end, region, parser = pysam.asVCF() )
for x in iter:
yield VCFRecord( x, self )
开发者ID:pkaleta,项目名称:pysam,代码行数:10,代码来源:VCF.py
示例6: check_nth_sample
def check_nth_sample(options, genotype):
parser = pysam.asVCF()
for contig in genotype.contigs:
for record in text.parse_lines(genotype.fetch(contig), parser):
if len(record) <= options.nth_sample:
sys.stderr.write("ERROR: Sample %i selected with --nth-sample,"
" but file only contains %i sample(s)!\n"
% (options.nth_sample + 1, len(record)))
return False
return True
return True
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:12,代码来源:vcf_to_fasta.py
示例7: records
def records(self, chromosome_num, start_pos_bp, end_pos_bp, parser=pysam.asVCF()):
'''
Returns an iterator for the file records.
'''
start_pos_bp = 1 if start_pos_bp is None else start_pos_bp
end_pos_bp = self.clens[str(chromosome_num)] if end_pos_bp is None else end_pos_bp
# subtract one since pysam uses incorrect 0-indexed positions
records = self.tabix_file.fetch( str(chromosome_num), start_pos_bp+self.indexDelta, end_pos_bp+self.indexDelta, parser)
return records
开发者ID:quank,项目名称:cms,代码行数:12,代码来源:vcf_reader.py
示例8: _read_files
def _read_files(filenames, args):
in_header = True
has_filters = False
vcf_parser = pysam.asVCF()
for line in fileinput.input(filenames):
if not line.startswith("#"):
in_header = False
line = line.rstrip("\n\r")
yield vcf_parser(line, len(line))
elif in_header:
if not (line.startswith("##") or has_filters):
has_filters = True
for item in sorted(vcffilter.describe_filters(args).items()):
print('##FILTER=<ID=%s,Description="%s">' % item)
print(line, end="")
开发者ID:CarlesV,项目名称:paleomix,代码行数:16,代码来源:vcf_filter.py
示例9: __init__
def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
TabixReader.__init__(self, inFile, parser=parser)
assert ploidy in (1,2)
self.ploidy = ploidy
self.clens = []
self.sample_names = None
for line in self.header:
if line.startswith('##contig=<ID=') and line.endswith('>'):
line = line[13:-1]
c = line.split(',')[0]
clen = int(line.split('=')[1])
self.clens.append((c,clen))
elif line.startswith('#CHROM'):
row = line.split('\t')
self.sample_names = row[9:]
self.clens = dict(self.clens)
assert self.sample_names
开发者ID:elimoss,项目名称:broad_malaria,代码行数:17,代码来源:util_vcf.py
示例10: testRead
def testRead( self ):
ncolumns = len(self.columns)
for x, r in enumerate(self.tabix.fetch( parser = pysam.asVCF() )):
c = self.compare[x]
for y, field in enumerate( self.columns ):
if field == "pos":
self.assertEqual( int(c[y]) - 1, getattr( r, field ) )
self.assertEqual( int(c[y]) - 1, r.pos )
else:
self.assertEqual( c[y], getattr( r, field ),
"mismatch in field %s: %s != %s" %\
( field,c[y], getattr( r, field ) ) )
self.assertEqual( len(c), len( r ) + ncolumns )
for y in range(len(c) - ncolumns):
self.assertEqual( c[ncolumns+y], r[y] )
开发者ID:RLuisier,项目名称:RSeQC,代码行数:18,代码来源:tabix_test.py
示例11: _get_hits
def _get_hits(coords, annotation, parser_type):
"""Retrieve BED information, recovering if BED annotation file does have a chromosome.
"""
if parser_type == "bed":
parser = pysam.asBed()
elif parser_type == "vcf":
parser = pysam.asVCF()
elif parser_type == "tuple":
parser = pysam.asTuple()
elif parser_type is None:
parser = None
else:
raise ValueError("Unexpected parser type: %s" % parser)
chrom, start, end = coords
try:
hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
# catch invalid region errors raised by ctabix
except ValueError:
hit_iter = []
return hit_iter
开发者ID:egafni,项目名称:gemini,代码行数:20,代码来源:annotations.py
示例12: GetSumOfDifferencesFromTheReference
def GetSumOfDifferencesFromTheReference(vcfpath):
from subprocess import check_call
from utilBMF.HTSUtils import TrimExt
import pysam
import numpy as np
from sys import stderr
from itertools import chain
cfi = chain.from_iterable
bgvcfpath = TrimExt(vcfpath) + ".gz"
check_call("bgzip -c %s > %s" % (vcfpath, bgvcfpath), shell=True)
stderr.write("bgvcf now at %s" % bgvcfpath)
tabixstr = "tabix " + bgvcfpath
stderr.write("Now calling tabixstr: '%s'" % tabixstr)
check_call("tabix %s" % bgvcfpath, shell=True)
infh = open(bgvcfpath, "rb")
tabixhandle = pysam.tabix_iterator(infh, pysam.asVCF())
return np.sum(np.array(list(cfi([dict(tup.split("=") for
tup in i.info.split(";"))[
'I16'].split(",")[2:4] for i in tabixhandle if
"INDEL" not in i.info])), dtype=np.int64))
开发者ID:NoSeatbelts,项目名称:cybmf,代码行数:20,代码来源:pileup.py
示例13: testWrite
def testWrite(self):
ncolumns = len(self.columns)
for x, r in enumerate(self.tabix.fetch(parser=pysam.asVCF())):
c = self.compare[x]
# check unmodified string
cmp_string = str(r)
ref_string = "\t".join([x for x in c])
self.assertEqual(ref_string, cmp_string)
# set fields and compare field-wise
for y, field in enumerate(self.columns):
# it is ok to have a missing format column
if y == 8 and y == len(c):
continue
if field == "pos":
rpos = getattr(r, field)
self.assertEqual(int(c[y]) - 1, rpos)
self.assertEqual(int(c[y]) - 1, r.pos)
# increment pos by 1
setattr(r, field, rpos + 1)
self.assertEqual(getattr(r, field), rpos + 1)
c[y] = str(int(c[y]) + 1)
else:
setattr(r, field, "test_%i" % y)
c[y] = "test_%i" % y
self.assertEqual(c[y], getattr(r, field),
"mismatch in field %s: %s != %s" %
(field, c[y], getattr(r, field)))
if len(c) == 8:
self.assertEqual(0, len(r))
else:
self.assertEqual(len(c), len(r) + ncolumns)
for y in range(len(c) - ncolumns):
c[ncolumns + y] = "test_%i" % y
r[y] = "test_%i" % y
self.assertEqual(c[ncolumns + y], r[y])
开发者ID:humburg,项目名称:pysam,代码行数:41,代码来源:tabix_test.py
示例14: __init__
def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
# using old-style class superclass calling here
# since TabixReader is derived from pysam.TabixFile
# which is itself an old-style class (due to Cython version?)
TabixReader.__init__(self, inFile, parser=parser)
# when pysam uses new-style classes, we can replace with:
#super(VcfReader, self).__init__(inFile, parser=parser)
assert ploidy in (1, 2)
self.ploidy = ploidy
self.clens = []
self.sample_names = None
for line in self.header:
line = bytes_to_string(line)
if line.startswith('##contig=<ID=') and line.endswith('>'):
line = line[13:-1]
c = line.split(',')[0]
clen = int(line.split('=')[1])
self.clens.append((c, clen))
elif line.startswith('#CHROM'):
row = line.split('\t')
self.sample_names = row[9:]
self.clens = dict(self.clens)
assert self.sample_names
开发者ID:a113n,项目名称:viral-ngs,代码行数:23,代码来源:vcf.py
示例15: select_vcf_records
def select_vcf_records(bed_records, vcf_records):
"""Returns an iterable of VCF records, corresponding to the contents of each
region specified by the BED records. Records are returned at most once, even
if covered by multiple BED records."""
contigs = frozenset(vcf_records.contigs)
vcf_parser = pysam.asVCF()
# Timer class used processing progress; meant primarily for BAM files
progress = timer.BAMTimer(None)
# Cache of positions observed for this contig, to prevent returning
# positions in overlapping regions multiple times
contig_cache = None
contig_cache_name = None
for bed in sorted(bed_records):
if bed.contig not in contigs:
# Skip contigs for which no calls have been made (e.g. due to
# low coverage. Otherwise Pysam raises an exception.
continue
elif contig_cache_name != bed.contig:
# Reset cache per contig, to save memory
contig_cache = set()
contig_cache_name = bed.contig
for record in vcf_records.fetch(bed.contig, bed.start, bed.end, parser = vcf_parser):
progress.increment()
if record.pos in contig_cache:
# We've already reported this VCF record
continue
contig_cache.add(record.pos)
# Skip records filtered by VCF_filter
if record.filter in ('.', "PASS"):
yield record
progress.finalize()
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:37,代码来源:summarize_heterozygosity.py
示例16: process_vcf_into_selscan_tped
def process_vcf_into_selscan_tped(cls, vcf_file, gen_map_file, outfile_location,
outfile_prefix, chromosome_num, samples_to_include=None, start_pos_bp=None, end_pos_bp=None, ploidy=2,
consider_multi_allelic=True, rescale_genetic_distance=False, include_variants_with_low_qual_ancestral=False, coding_function=None,
multi_alleli_merge_function="AND"):
"""
Process a bgzipped-VCF (such as those included in the Phase 3 1000 Genomes release) into a gzip-compressed
tped file of the sort expected by selscan.
"""
assert ploidy > 0
processor = VCFReader(vcf_file)
end_pos = processor.clens[str(chromosome_num)] if end_pos_bp is None else end_pos_bp
records = processor.records( str(chromosome_num), start_pos_bp, end_pos, pysam.asVCF())
outTpedFile = outfile_location + "/" + outfile_prefix + ".tped.gz"
outTpedMetaFile = outfile_location + "/" + outfile_prefix + ".tped.allele_metadata.gz"
if samples_to_include is not None and len(samples_to_include) > 0:
indices_of_matching_samples = sorted([processor.sample_names.index(x) for x in samples_to_include])
else:
indices_of_matching_samples = range(0,len(processor.sample_names))
indices_of_matching_genotypes = [(x*2, (x*2)+1) for x in indices_of_matching_samples]
indices_of_matching_genotypes = list(np.ravel(np.array(indices_of_matching_genotypes)))
rm = RecomMap(gen_map_file)
for filePath in [outTpedFile, outTpedMetaFile]:
assert not os.path.exists(filePath), "File {} already exists. Consider removing this file or specifying a different output prefix. Processing aborted.".format(filePath)
mergeOperatorString = ""
if multi_alleli_merge_function == "OR":
mergeOperatorString = "|"
if multi_alleli_merge_function == "AND":
mergeOperatorString = "&"
if multi_alleli_merge_function == "XOR":
mergeOperatorString = "^"
startTime = datetime.now()
sec_remaining_avg = 0
current_pos_bp = 1
with util.file.open_or_gzopen(outTpedFile, 'w') as of1, util.file.open_or_gzopen(outTpedMetaFile, 'w') as of2:
# WRITE header for metadata file here with selected subset of sample_names
headerString = "CHROM VARIANT_ID POS_BP MAP_POS_CM REF_ALLELE ALT_ALLELE ANCESTRAL_CALL ALLELE_FREQ_IN_POP\n".replace(" ","\t")
of2.write(headerString)
of1linesToWrite = []
of2linesToWrite = []
recordLengths = set() #
recordCount = 0
mostRecentRecordPosSeen = -1
positionHasBeenSeenBefore = False
previouslyCodedGenotypes = GenoRecord([])
lineToWrite1 = None
lineToWrite2 = None
previousAncestral = None
ancestralDiffersFromPrevious = True
for record in records:
# in some cases, there may be records with duplicate positions in the VCF file
# to account for that we collapse rows that pass our filter and then write out the rows when we
# encounter a record with a new position
if record.pos != mostRecentRecordPosSeen:
if positionHasBeenSeenBefore and not consider_multi_allelic:
lineToWrite1 = None
lineToWrite2 = None
if lineToWrite1 is not None and lineToWrite2 is not None:
if len(previouslyCodedGenotypes) == ploidy*len(indices_of_matching_samples):
# write the output line
of1.write(lineToWrite1)
of2.write(lineToWrite2)
else:
genotypesCount = len(previouslyCodedGenotypes)
countSpecificTpedName = outTpedFile.replace(outfile_prefix, outfile_prefix + "_" + str(genotypesCount))
countSpecificMetafileName = outTpedMetaFile.replace(outfile_prefix, outfile_prefix + "_" + str(genotypesCount))
with util.file.open_or_gzopen(countSpecificTpedName, 'a') as of1l, util.file.open_or_gzopen(countSpecificMetafileName, 'a') as of2l:
of1l.write(lineToWrite1)
of2l.write(lineToWrite2)
lineToWrite1 = None
lineToWrite2 = None
mostRecentRecordPosSeen = record.pos
positionHasBeenSeenBefore = False
else:
positionHasBeenSeenBefore = True
# if the variant is a SNP
# OLD style looking at INFO VT value:
# processor.variant_is_type(record.info, "SNP"):
VALID_BASES = ["A","C","G","T","N","a","c","g","t","n"]
if (len(record.ref) == 1 and len(record.alt) == 1) or ( all(variant in VALID_BASES for variant in record.ref.split(",")) and
all(variant in VALID_BASES for variant in record.alt.split(",")) ):
#.........这里部分代码省略.........
开发者ID:quank,项目名称:cms,代码行数:101,代码来源:selscan.py
示例17: process_piece
def process_piece(filename_vcf, chrom, chrom_length, sample_index, chain_info, diploid, passed, quality, vcf_keep, vcf_discard_file):
ret = {'chrom': chrom, 'stats': {}, 'chain_info': chain_info}
stats = OrderedDict()
stats['ACCEPTED'] = 0
if vcf_keep:
vcf_discard = open(vcf_discard_file, "w")
line_no = 0
try:
LOG.info("Processing Chromosome {0}...".format(chrom))
tb = pysam.TabixFile(filename_vcf)
for vcf_rec in tb.fetch(chrom, parser=pysam.asVCF()):
line_no += 1
try:
gt = parse_gt_new(vcf_rec, sample_index)
except:
LOG.info("Unable to parse record, improper VCF file?")
continue
LOG.debug('\n')
LOG.debug(vcf_rec)
LOG.debug(gt)
LOG.debug(vcf_rec[sample_index])
if passed and 'PASS' not in vcf_rec.FILTER:
LOG.debug("TOSSED: FILTERED ON PASS")
stats = update_stats(stats, 'FILTERED ON PASS')
if vcf_keep:
vcf_discard.write(vcf_rec)
vcf_discard.write("\n")
continue
elif quality and gt.fi == '0':
# FI : Whether a sample was a Pass(1) or fail (0) based on FILTER values
LOG.debug("TOSSED: FILTERED ON QUALITY")
stats = update_stats(stats, 'FILTERED ON QUALITY')
if vcf_keep:
vcf_discard.write(vcf_rec)
vcf_discard.write("\n")
continue
elif gt.left is None and gt.right is None:
LOG.debug("TOSSED: NOT RELEVANT")
stats = update_stats(stats, 'NOT RELEVANT')
if vcf_keep:
vcf_discard.write(vcf_rec)
vcf_discard.write("\n")
continue
elif not diploid and gt.left != gt.right:
# haploid or hexaploid
# gt must be equal
LOG.debug("TOSSED: HETEROZYGOUS")
stats = update_stats(stats, 'HETEROZYGOUS')
if vcf_keep:
vcf_discard.write(vcf_rec)
vcf_discard.write("\n")
continue
# START L AND R, ONLY R IF DIPLOID
for ci, lr in chain_info.iteritems():
if ci == 'left':
alt_seq = str(gt.left)
else:
alt_seq = str(gt.right)
if gt.ref == alt_seq:
LOG.debug("TOSSED, SAME AS REF")
lr.stats = update_stats(lr.stats, 'SAME AS REF')
if vcf_keep:
vcf_discard.write(vcf_rec)
vcf_discard.write("\n")
continue
orig_alt_seq = alt_seq
LOG.debug("SAMPLE: {0}".format(vcf_rec[sample_index]))
LOG.debug("REF='{0}', ALT_L='{1}', ALT_R='{2}'. POS={3}".format(gt.ref, gt.left, gt.right, vcf_rec.pos))
position = vcf_rec.pos + 1
ref_seq = str(gt.ref)
len_ref = len(ref_seq)
#.........这里部分代码省略.........
开发者ID:churchill-lab,项目名称:g2gtools,代码行数:101,代码来源:vcf2chain.py
示例18: RunSnvplot
def RunSnvplot(args):
cfg = Parse.generate_snvplot_cfg(args)
Parse.print_snvplot_options(cfg)
if not cfg['debug']:
logging.disable(logging.CRITICAL)
ro.r('suppressMessages(library(ggplot2))')
ro.r('suppressMessages(library(grid))')
handle=pysam.TabixFile(filename=cfg['file'],parser=pysam.asVCF())
header = [x for x in handle.header]
skip_rows = len(header)-1
cols = header[-1].split()
pcols = cfg['pcol'].split(',')
cols_extract = [cfg['chrcol'],cfg['bpcol']] + pcols
if cfg['qq_strat_freq']:
if cfg['freqcol'] not in cols:
print Process.Error("frequency column " + cfg['freqcol'] + " not found, unable to proceed with frequency stratified plots").out
return 1
else:
cols_extract = cols_extract + [cfg['freqcol']]
print "frequency column " + cfg['freqcol'] + " found"
if cfg['qq_strat_mac']:
if cfg['maccol'] not in cols:
print Process.Error("minor allele count column " + cfg['maccol'] + " not found, unable to proceed with minor allele count stratified plots").out
return 1
else:
cols_extract = cols_extract + [cfg['maccol']]
print "minor allele count column " + cfg['maccol'] + " found"
print "importing data"
r = pd.read_table(cfg['file'],sep='\t',skiprows=skip_rows,usecols=cols_extract,compression='gzip')
print str(r.shape[0]) + " total variants found"
for pcol in pcols:
print "plotting p-values for column " + pcol + " ..."
results = r[[cfg['chrcol'],cfg['bpcol'],cfg['freqcol'],pcol]] if cfg['freqcol'] in r else r[[cfg['chrcol'],cfg['bpcol'],pcol]]
results.dropna(inplace=True)
results = results[(results[pcol] > 0) & (results[pcol] <= 1)].reset_index(drop=True)
print " " + str(results.shape[0]) + " variants with plottable p-values"
results['logp'] = -1 * np.log10(results[pcol]) + 0.0
ro.globalenv['results'] = results
l = np.median(scipy.chi2.ppf([1-x for x in results[pcol].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
# in R: median(qchisq(results$p, df=1, lower.tail=FALSE))/qchisq(0.5,1)
print " genomic inflation (all variants) = " + str(l)
if cfg['qq']:
print " generating standard qq plot"
print " minimum p-value: " + str(np.min(results[pcol]))
a = -1 * np.log10(ro.r('ppoints(' + str(len(results.index)) + ')'))
a.sort()
results.sort_values(by=['logp'], inplace=True)
print " maximum -1*log10(p-value): " + str(np.max(results['logp']))
ci_upper = -1 * np.log10(scipy.beta.ppf(0.95, range(1,len(results[pcol]) + 1), range(len(results[pcol]),0,-1)))
ci_upper.sort()
ci_lower = -1 * np.log10(scipy.beta.ppf(0.05, range(1,len(results[pcol]) + 1), range(len(results[pcol]),0,-1)))
ci_lower.sort()
ro.globalenv['df'] = ro.DataFrame({'a': ro.FloatVector(a), 'b': ro.FloatVector(results['logp']), 'ci_lower': ro.FloatVector(ci_lower), 'ci_upper': ro.FloatVector(ci_upper)})
dftext_label = 'lambda %~~% ' + str(l)
ro.globalenv['dftext'] = ro.DataFrame({'x': ro.r('Inf'), 'y': 0.5, 'lab': dftext_label})
if cfg['ext'] == 'tiff':
ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq.tiff')
elif cfg['ext'] == 'eps':
ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq.eps')
else:
ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq.pdf')
ro.r("""
gp<-ggplot(df)
pp<-gp +
aes_string(x='a',y='b') +
geom_ribbon(aes_string(x='a',ymin='ci_lower',ymax='ci_upper'), data=df, alpha=0.25, fill='black') +
geom_point(size=2) +
geom_abline(intercept=0, slope=1, alpha=0.5) +
scale_x_discrete(expression(Expected~~-log[10](italic(p)))) +
scale_y_discrete(expression(Observed~~-log[10](italic(p)))) +
coord_fixed() +
theme_bw(base_size = 12) +
geom_text(aes_string(x='x', y='y', label='lab'), data = dftext, colour="black", vjust=0, hjust=1, size = 4, parse=TRUE) +
theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.position = 'none',
panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(),
panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
%s
""" % (ggsave))
if np.max(results['logp']) > cfg['crop']:
print " generating cropped standard qq plot"
ro.r('df$b[df$b > ' + str(cfg['crop']) + ']<-' + str(cfg['crop']))
ro.r('df$shape<-0')
ro.r('df$shape[df$b == ' + str(cfg['crop']) + ']<-1')
if cfg['ext'] == 'tiff':
ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq.cropped.tiff')
elif cfg['ext'] == 'eps':
ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq.cropped.eps')
else:
#.........这里部分代码省略.........
开发者ID:CongcongZhu,项目名称:uga,代码行数:101,代码来源:RunSnvplot.py
示例19: __init__
def __init__(self, file_path, parser=pysam.asVCF()):
self.vcf_file_path = file_path
self.tabix_file = pysam.TabixFile(file_path, parser=parser)
self.sample_names = self.read_sample_names()
self.clens = self.contig_lengths()
self.indexDelta = -1 if tuple(map(int, pysam.__version__.split('.'))) > (0,5) else 0
开发者ID:quank,项目名称:cms,代码行数:6,代码来源:vcf_reader.py
示例20: RunTools
def RunTools(args):
cfg = Parse.generate_tools_cfg(args)
Parse.print_tools_options(cfg)
if not cfg['debug']:
logging.disable(logging.CRITICAL)
regions_df = pd.read_table(cfg['region_file'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
regions_df = regions_df[regions_df['job'] == int(cfg['job'])].reset_index(drop=True)
return_values = {}
print ''
print "initializing out file"
try:
bgzfile = bgzf.BgzfWriter(cfg['out'] + '.gz', 'wb')
except:
print Process.Error("failed to initialize bgzip format out file " + cfg['out'] + '.gz').out
return 1
if cfg['cpus'] > 1:
pool = mp.Pool(cfg['cpus']-1)
for i in xrange(1,cfg['cpus']):
return_values[i] = pool.apply_async(process_regions, args=(regions_df,cfg,i,True,))
print "submitting job on cpu " + str(i) + " of " + str(cfg['cpus'])
pool.close()
print "executing job for cpu " + str(cfg['cpus']) + " of " + str(cfg['cpus']) + " via main process"
main_return = process_regions(regions_df,cfg,cfg['cpus'],True)
pool.join()
if 1 in [return_values[i].get() for i in return_values] or main_return == 1:
print Process.Error("error detected, see log files").out
return 1
else:
main_return = process_regions(regions_df,cfg,1,True)
if main_return == 1:
print Process.Error("error detected, see log files").out
return 1
for i in xrange(1,cfg['cpus']+1):
try:
logfile = open(cfg['out'] + '.cpu' + str(i) + '.log', 'r')
except:
print Process.Error("failed to initialize log file " + cfg['out'] + '.cpu' + str(i) + '.log').out
return 1
print logfile.read()
logfile.close()
os.remove(cfg['out'] + '.cpu' + str(i) + '.log')
written = False
for i in xrange(1,cfg['cpus']+1):
cpu_regions_df = regions_df[regions_df['cpu'] == i].reset_index()
for j in xrange(0,len(cpu_regions_df.index)):
f_temp=glob.glob(cfg['out'] + '.cpu' + str(i) + '.chr' + cpu_regions_df['region'][j].replace(':','bp') + '*.gz')[0]
try:
h=pysam.TabixFile(filename=f_temp,parser=pysam.asVCF())
except:
print Process.Error("failed to load vcf file " + f_temp)
return 1
if not written:
for row in h.header:
bgzfile.write(str(row) + '\n')
written = True
h_iter = h.fetch(region=str(cpu_regions_df['chr'][j]))
for row in h_iter:
bgzfile.write(str(row) + '\n')
for f in glob.glob(cfg['out'] + '.cpu' + str(i) + '.chr' + cpu_regions_df['region'][j].replace(':','bp') + '.*'):
os.remove(f)
bgzfile.close()
print "indexing out file"
try:
pysam.tabix_index(cfg['out'] + '.gz',preset="vcf",force=True)
except:
print Process.Error('failed to generate index').out
return 1
print "process complete"
return 0
开发者ID:CongcongZhu,项目名称:uga,代码行数:79,代码来源:RunTools.py
注:本文中的pysam.asVCF函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论