本文整理汇总了Python中pysam.Samfile类的典型用法代码示例。如果您正苦于以下问题:Python Samfile类的具体用法?Python Samfile怎么用?Python Samfile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Samfile类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_process_bam_mismatches
def test_process_bam_mismatches():
tbam = os.path.join(DATA, "tmp.bam")
bam = os.path.join(DATA, "ordered_umi.bam")
if os.path.exists(tbam):
os.remove(tbam)
with captured_output() as (out, err):
process_bam(bam, tbam, mismatches=1)
assert os.path.exists(tbam)
it = iter(out.getvalue().split("\n"))
assert it.next().strip() == "1\t9\t10\t4\t2"
assert it.next().strip() == "1\t11\t12\t2\t1"
assert it.next().strip() == "1\t29\t30\t2\t1"
bam_reader = Samfile(tbam)
it = iter(bam_reader)
r = it.next()
assert r.pos == 4
assert r.qname == "read8:UMI_ATTCAGGG"
r = it.next()
assert r.pos == 9
assert r.qname == "read1:UMI_AAAAAGGG"
r = it.next()
assert r.pos == 9
assert r.qname == "read4:UMI_AAAGGGGG"
r = it.next()
assert r.pos == 11
assert r.qname == "read5:UMI_ATTTAGGG"
bam_reader.close()
os.remove(tbam)
开发者ID:brwnj,项目名称:umitools,代码行数:29,代码来源:test_umitools.py
示例2: callbase
def callbase(bamfile, snpsites, out):
BF = Samfile(bamfile, 'rb') #open your bam file
SF = open(snpsites, 'r') #the file contain snp sites info
RF = open(out, 'w') #resulte file
RF.write('ref_name\tpos\tRbase\tAbase\tA\tT\tC\tG\tN\tothers\n')
for i in SF:
if i.startswith('#'):
continue
else:
line = ParseSNPsitesLine(i)
vcf_pos = line.pos-1 #change 1-base to 0-based
vcf_refname = line.chrom
print 'processing: %s %s...'%(vcf_refname, str(vcf_pos))
At, Tt, Ct, Gt, Nt, othert = 0, 0, 0, 0, 0, 0
for i in BF.pileup(vcf_refname, vcf_pos, vcf_pos+1):
if i.pos == vcf_pos:
vcf_Rbase = line.Rbase
vcf_Abase = line.Abase
for j in i.pileups:
yourbase = j.alignment.seq[j.qpos]
if yourbase == 'A': At += 1
elif yourbase == 'T': Tt += 1
elif yourbase == 'C': Ct += 1
elif yourbase == 'G': Gt += 1
elif yourbase == 'N': Nt += 1
else: othert += 1
RF.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(vcf_refname, \
str(vcf_pos+1), vcf_Rbase, vcf_Abase, str(At), str(Tt), str(Ct), str(Gt), \
str(Nt), str(othert)))
BF.close()
开发者ID:freemao,项目名称:call-base-each-snp-site,代码行数:30,代码来源:callbase-pysam.py
示例3: parse_bam_differential
def parse_bam_differential(afn, bfn, regs, step):
"""(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping
onto a segment (chr, start, end). No normalization is done at this step.
"""
abam = Samfile(str(afn), "rb")
bbam = Samfile(str(bfn), "rb")
acount = []
bcount = []
oldchr = "chr1"
for reg in regs:
chr, start, end = reg[:3]
if chr != oldchr:
log("files: %s - %s : %s counted" % (afn, bfn, oldchr))
oldchr = chr
# this could be improved
for s in xrange(start, end, step):
e = s + step
an = abam.count(chr, s, e)
bn = bbam.count(chr, s, e)
acount.append(an)
bcount.append(bn)
acount.append(-1)
bcount.append(-1)
log("files: %s - %s : %s counted (finished)" % (afn, bfn, oldchr))
return acount, bcount
开发者ID:BioinformaticsArchive,项目名称:epicode,代码行数:25,代码来源:epicode.py
示例4: bam_read_id
def bam_read_id(url):
'''Read first read id out of a remote bam file.
Note: requires a patched version of pysam
'''
stream = Samfile(url, 'rb')
read = stream.next()
return read.qname
开发者ID:detrout,项目名称:encode3-curation,代码行数:8,代码来源:validate_encode3_aliases.py
示例5: parse_bam_absolute
def parse_bam_absolute(fn, regs):
"""(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping
onto a segment (chr, start, end) and normalizes the count by the segment's length.
"""
bam = Samfile(str(fn), "rb")
count = []
for reg in regs:
chr, start, end = reg[:3]
n = bam.count(chr, start, end)
count.append(float(n) / (end - start))
return count
开发者ID:BioinformaticsArchive,项目名称:epicode,代码行数:11,代码来源:epicode.py
示例6: main
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
for al in bam:
chrom = bam.getrname(al.rname)
start = al.pos
end = al.aend
name = al.qname
for hit in rmsk.search(chrom, start, end):
print chrom, start, end, name,
print hit.chrom, hit.start, hit.end, hit.name
开发者ID:mikerlindberg,项目名称:bedtools-python,代码行数:13,代码来源:pysam-and-bedtools.py
示例7: main
def main(args=None):
if args is None:
args = sys.argv[1:]
f = Samfile(args[0])
header = f.header
f.close()
reflen = header['SQ'][0]['LN']
BamIO.write(clip(BamIO.parse(args[0]), reflen), args[1], header=header)
return 0
开发者ID:trident01,项目名称:BioExt-1,代码行数:13,代码来源:bamclip.py
示例8: main
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, "rb")
#Iterates over each read instead of each contig
outputs = defaultdict(list)
#import ipdb; ipdb.set_trace()
for aln in samfile.fetch(until_eof = True):
ref = samfile.getrname(aln.tid)
outputs[ref].append(aln)
for ref, alns in outputs.iteritems():
print_reads(alns, ref, samfile.header)
开发者ID:alneberg,项目名称:bu,代码行数:13,代码来源:split_bam_per_reference.py
示例9: main
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
# Example 1:
# Method: IntervalFile.all_hits()
# Report _all_ of the rmsk features that overlap with the BAM alignment
for al in bam:
strand = "+"
if al.is_reverse: strand = "-"
i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
print "\t".join(str(x) for x in [i,hit])
开发者ID:Shima0,项目名称:bedtools-python,代码行数:15,代码来源:ex2-pysam-and-bedtools.py
示例10: __init__
def __init__(self, file_name):
"""
Initializes GenomicSignal.
"""
self.file_name = file_name
self.sg_coefs = None
self.bam = Samfile(file_name, "rb")
开发者ID:eggduzao,项目名称:reg-gen,代码行数:7,代码来源:signalProcessing.py
示例11: get_raw_tracks
def get_raw_tracks(args):
# Initializing Error Handler
err = ErrorHandler()
if len(args.input_files) != 2:
err.throw_error("ME_FEW_ARG", add_msg="You must specify reads and regions file.")
output_fname = os.path.join(args.output_location, "{}.wig".format(args.output_prefix))
bam = Samfile(args.input_files[0], "rb")
regions = GenomicRegionSet("Interested regions")
regions.read(args.input_files[1])
regions.merge()
reads_file = GenomicSignal()
with open(output_fname, "a") as output_f:
for region in regions:
# Raw counts
signal = [0.0] * (region.final - region.initial)
for read in bam.fetch(region.chrom, region.initial, region.final):
if not read.is_reverse:
cut_site = read.pos + args.forward_shift
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
else:
cut_site = read.aend + args.reverse_shift - 1
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
if args.norm:
signal = reads_file.boyle_norm(signal)
perc = scoreatpercentile(signal, 98)
std = np.std(signal)
signal = reads_file.hon_norm_atac(signal, perc, std)
output_f.write("fixedStep chrom=" + region.chrom + " start=" + str(region.initial + 1) + " step=1\n" +
"\n".join([str(e) for e in np.nan_to_num(signal)]) + "\n")
output_f.close()
if args.bigWig:
genome_data = GenomeData(args.organism)
chrom_sizes_file = genome_data.get_chromosome_sizes()
bw_filename = os.path.join(args.output_location, "{}.bw".format(args.output_prefix))
os.system(" ".join(["wigToBigWig", output_fname, chrom_sizes_file, bw_filename, "-verbose=0"]))
os.remove(output_fname)
开发者ID:CostaLab,项目名称:reg-gen,代码行数:45,代码来源:Tracks.py
示例12: subsample
def subsample(fn, ns=None):
if ns is None:
fn, ns = fn
sample = []
count = 0
outdir_base = path.join(path.dirname(fn), "subset")
sf = Samfile(fn)
try:
i_weight = float(sf.mapped) / max(ns)
print "Read out ", i_weight
except ValueError:
i_weight = 0.0
for read in sf:
i_weight += 1
print "Counted ", i_weight
i_weight /= float(max(ns))
sf = Samfile(fn)
print fn, count, i_weight
for i, read in enumerate(sf):
key = random() ** i_weight
if len(sample) < max(ns):
heappush(sample, (key, read, i + count))
else:
heappushpop(sample, (key, read, i + count))
count += i
for n in ns:
if n == min(ns):
outdir = outdir_base + "_min"
else:
outdir = outdir_base + "{:04.1f}M".format(n / 1e6)
try:
makedirs(outdir)
except OSError:
pass
sampN = sorted(sample, reverse=True)[: int(n)]
print "Kept {: >12,} of {: >12,} reads".format(len(sampN), count)
print fn, "->", outdir
stdout.flush()
of = Samfile(path.join(outdir, "accepted_hits.bam"), mode="wb", template=sf)
sample.sort(key=lambda (key, read, pos): (read.tid, read.pos))
for key, read, pos in sampN:
of.write(read)
of.close()
sf.close()
return [count for key, read, count in sample]
开发者ID:petercombs,项目名称:EisenLab-Code,代码行数:48,代码来源:SubSample.py
示例13: split_reads
def split_reads(reads, ref_reads, alt_reads):
read_results = Counter()
ref_bam = Samfile(ref_reads, 'wb', template=reads)
alt_bam = Samfile(alt_reads, 'wb', template=reads)
prev_phase = None
prev_read = None
prev_qname = None
test = 0
for read in reads.fetch(until_eof=True):
test += 1
chrom = read.reference_name
snps_on_chrom = snp_dict[chrom]
phase = get_phase(read, snps_on_chrom)
read_qname = read.qname
if read_qname == prev_qname:
read_results["tot_read"]+=1
phase_set = set([phase, prev_phase])
phase_set.discard(None)
if len(phase_set) == 1:
read_phase = phase_set.pop()
if read_phase == -1:
ref_bam.write(read)
ref_bam.write(prev_read)
read_results["ref_read"]+=1
elif read_phase == 1:
alt_bam.write(read)
alt_bam.write(prev_read)
read_results["alt_read"]+=1
elif read_phase == 0:
read_results['misphased_read']+=1
elif len(phase_set)==0:
read_results["no_snps_read"]+=1
else:
read_results['misphased_read']+=1
prev_read = read
prev_phase = phase
prev_qname = read_qname
return(read_results)
开发者ID:rmagoglia,项目名称:Genomics,代码行数:43,代码来源:splitSpeciesReads.py
示例14: _bowtie2_filter
def _bowtie2_filter(fnam, fastq_path, unmap_out, map_out):
"""
Divides reads in a map file in two categories: uniquely mapped, and not.
Writes them in two files
"""
try:
fhandler = Samfile(fnam)
except IOError:
raise Exception('ERROR: file "%s" not found' % fnam)
# getrname chromosome names
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
# iteration over reads
unmap_out = open(unmap_out, 'w')
map_out = open(map_out, 'w')
fastq_in = open(fastq_path , 'r')
for line in fhandler:
line_in = fastq_in.readline()
if line.is_unmapped or line.mapq < 4:
read = '%s\t%s\t%s\t%s\t%s\n' % (
line_in.split('\t', 1)[0].rstrip('\n')[1:],
line.seq, line.qual, '-', '-'
)
unmap_out.write(read)
else:
read = '%s\t%s\t%s\t%s\t%s:%s:%d:%d\n' % (
line.qname, line.seq, line.qual, '1',
crm_dict[line.tid],
'-' if line.is_reverse else '+', line.pos + 1, len(line.seq))
map_out.write(read)
for _ in range(3):
fastq_in.readline()
unmap_out.close()
map_out.close()
fastq_in.close()
开发者ID:3DGenomes,项目名称:TADbit,代码行数:42,代码来源:full_mapper.py
示例15: main
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, "rb")
ref_ids = [samfile.gettid(r) for r in samfile.references]
#Iterates over each read instead of each contig
reads_to_print = []
for aln in samfile.fetch(until_eof = True):
if pair_is_aligned(aln, ref_ids):
if args.read_pair == 1 and aln.is_read1:
reads_to_print.append(aln)
elif args.read_pair == 2 and aln.is_read2:
reads_to_print.append(aln)
elif args.read_pair == 0:
reads_to_print.append(aln)
if len(reads_to_print) >= 10000:
# Flush the reads collected
print_reads(reads_to_print)
reads_to_print = []
print_reads(reads_to_print)
开发者ID:alneberg,项目名称:bu,代码行数:20,代码来源:extract_reads_from_bam.py
示例16: removeEdgeMismatches
def removeEdgeMismatches(self,bamFile,minDistance, minBaseQual):
startTime=Helper.getTime()
minDistance=int(minDistance)
counter=0;j=0
num_lines = len(self.variantDict)
Helper.info(" [%s] remove Missmatches from the first %s bp from read edges" % (startTime.strftime("%c"),str(minDistance)),self.logFile,self.textField)
bamFile = Samfile(bamFile, "rb")
for varKey in self.variantDict.keys():
variant = self.variantDict[varKey]
counter+=1
if counter%10000==0:
Helper.status('%s mm parsed ' % counter ,self.logFile, self.textField,"grey")
keepSNP=False
varPos=variant.position-1
iter = bamFile.pileup(variant.chromosome, variant.position-1, variant.position)
#walks up the region wich overlap this position
for x in iter:
if x.pos == varPos:
for pileupread in x.pileups: #walk through the single reads
if not pileupread.is_del and not pileupread.is_refskip:
distance=abs(pileupread.alignment.alen-pileupread.query_position) if pileupread.alignment.is_reverse else pileupread.query_position
if distance >= minDistance:
#check readBase and Base Quality
if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt and pileupread.alignment.query_qualities[pileupread.query_position]>=minBaseQual:
#if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt:
keepSNP=True
if keepSNP==False:
j+=1
del self.variantDict[varKey]
Helper.status('%s of %svariants were deleted' % (j,num_lines), self.logFile, self.textField,"black")
Helper.printTimeDiff(startTime, self.logFile, self.textField)
bamFile.close()
开发者ID:djhn75,项目名称:RNAEditor,代码行数:38,代码来源:VariantSet.py
示例17: __init__
def __init__(self, fname, referenceFastaFname=None):
self.filename = fname = abspath(expanduser(fname))
self.peer = Samfile(fname, "rb", check_sq=False)
self._checkFileCompatibility()
self._loadReferenceInfo()
self._loadReadGroupInfo()
self._loadProgramInfo()
self.referenceFasta = None
if referenceFastaFname is not None:
if self.isUnmapped:
raise ValueError, "Unmapped BAM file--reference FASTA should not be given as argument to BamReader"
self._loadReferenceFasta(referenceFastaFname)
开发者ID:extemporaneousb,项目名称:pbcore,代码行数:14,代码来源:BamIO.py
示例18: main
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, option)
ref_ids = [samfile.gettid(r) for r in samfile.references]
#Iterates over each read instead of each contig
reads_to_print_1 = []
reads_to_print_2 = []
reads_to_print_u = []
for aln in samfile.fetch(until_eof = True):
if aln.tid in ref_ids: # This read is aligned
if aln.rnext in ref_ids: # The mate is also aligned
if aln.is_read1:
reads_to_print_1.append(aln)
reads_to_print_1 = flush_reads(reads_to_print_1, args.R1)
elif aln.is_read2:
reads_to_print_2.append(aln)
reads_to_print_2 = flush_reads(reads_to_print_2, args.R2)
else:
reads_to_print_u.append(aln)
reads_to_print_u = flush_reads(reads_to_print_u, args.u)
print_reads(reads_to_print_1, args.R1)
print_reads(reads_to_print_2, args.R2)
print_reads(reads_to_print_u, args.u)
开发者ID:alneberg,项目名称:bu,代码行数:24,代码来源:extract_reads_from_bam_and_separate.py
示例19: __init__
def __init__(self, fname, referenceFastaFname=None):
self.filename = fname = abspath(expanduser(fname))
self.peer = Samfile(fname, "rb")
# Check for sortedness, index.
# There doesn't seem to be a "public" way to do this right
# now, but that's fine because we're going to have to rewrite
# it all anyway once the pysam rewrite lands.
if not self.peer._hasIndex:
raise ValueError, "Specified bam file lacks a bam index---required for this API"
self._loadReferenceInfo()
self._loadReadGroupInfo()
self._loadProgramInfo()
self.referenceFasta = None
if referenceFastaFname is not None:
self._loadReferenceFasta(referenceFastaFname)
开发者ID:leehosung,项目名称:GenomicConsensus,代码行数:17,代码来源:BamIO.py
示例20: __init__
def __init__(self, file_name):
"""
Initializes GenomicSignal.
"""
self.file_name = file_name
self.bam = None
self.bw = None
self.sg_coefs = None
self.is_bam = False
self.is_bw = False
if(self.file_name.split(".")[-1].upper() == "BAM"):
self.is_bam = True
self.bam = Samfile(file_name,"rb")
elif(self.file_name.split(".")[-1].upper() == "BW" or self.file_name.split(".")[-1].upper() == "BIGWIG"):
self.is_bw = True
self.bw = BigWigFile(file_name)
else: pass # TODO ERROR
开发者ID:Marvin84,项目名称:reg-gen,代码行数:17,代码来源:signalProcessing.py
注:本文中的pysam.Samfile类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论