• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pysam.Samfile类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pysam.Samfile的典型用法代码示例。如果您正苦于以下问题:Python Samfile类的具体用法?Python Samfile怎么用?Python Samfile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Samfile类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_process_bam_mismatches

def test_process_bam_mismatches():
    tbam = os.path.join(DATA, "tmp.bam")
    bam = os.path.join(DATA, "ordered_umi.bam")
    if os.path.exists(tbam):
        os.remove(tbam)
    with captured_output() as (out, err):
        process_bam(bam, tbam, mismatches=1)
    assert os.path.exists(tbam)
    it = iter(out.getvalue().split("\n"))
    assert it.next().strip() == "1\t9\t10\t4\t2"
    assert it.next().strip() == "1\t11\t12\t2\t1"
    assert it.next().strip() == "1\t29\t30\t2\t1"

    bam_reader = Samfile(tbam)
    it = iter(bam_reader)
    r = it.next()
    assert r.pos == 4
    assert r.qname == "read8:UMI_ATTCAGGG"
    r = it.next()
    assert r.pos == 9
    assert r.qname == "read1:UMI_AAAAAGGG"
    r = it.next()
    assert r.pos == 9
    assert r.qname == "read4:UMI_AAAGGGGG"
    r = it.next()
    assert r.pos == 11
    assert r.qname == "read5:UMI_ATTTAGGG"
    bam_reader.close()
    os.remove(tbam)
开发者ID:brwnj,项目名称:umitools,代码行数:29,代码来源:test_umitools.py


示例2: callbase

def callbase(bamfile, snpsites, out):
    BF = Samfile(bamfile, 'rb') #open your bam file
    SF = open(snpsites, 'r')    #the file contain snp sites info
    RF = open(out, 'w')         #resulte file
    RF.write('ref_name\tpos\tRbase\tAbase\tA\tT\tC\tG\tN\tothers\n')
    for i in SF:
        if i.startswith('#'):
            continue
        else:
            line = ParseSNPsitesLine(i)
            vcf_pos = line.pos-1 #change 1-base to 0-based
            vcf_refname = line.chrom
            print 'processing: %s %s...'%(vcf_refname, str(vcf_pos))
            At, Tt, Ct, Gt, Nt, othert = 0, 0, 0, 0, 0, 0
            for i in BF.pileup(vcf_refname, vcf_pos, vcf_pos+1):
                if i.pos == vcf_pos:
                    vcf_Rbase = line.Rbase
                    vcf_Abase = line.Abase
                    for j in i.pileups:
                        yourbase = j.alignment.seq[j.qpos]
                        if yourbase == 'A': At += 1
                        elif yourbase == 'T': Tt += 1
                        elif yourbase == 'C': Ct += 1
                        elif yourbase == 'G': Gt += 1
                        elif yourbase == 'N': Nt += 1
                        else: othert += 1
        RF.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(vcf_refname, \
str(vcf_pos+1), vcf_Rbase, vcf_Abase, str(At), str(Tt), str(Ct), str(Gt), \
str(Nt), str(othert)))
    BF.close()
开发者ID:freemao,项目名称:call-base-each-snp-site,代码行数:30,代码来源:callbase-pysam.py


示例3: parse_bam_differential

def parse_bam_differential(afn, bfn, regs, step):
    """(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping 
    onto a segment (chr, start, end). No normalization is done at this step.
    """
    abam = Samfile(str(afn), "rb")
    bbam = Samfile(str(bfn), "rb")
    acount = []
    bcount = []
    oldchr = "chr1"
    for reg in regs:
        chr, start, end = reg[:3]
        if chr != oldchr:
            log("files: %s - %s : %s counted" % (afn, bfn, oldchr))
            oldchr = chr
        # this could be improved
        for s in xrange(start, end, step):
            e = s + step
            an = abam.count(chr, s, e)
            bn = bbam.count(chr, s, e)
            acount.append(an)
            bcount.append(bn)
        acount.append(-1)
        bcount.append(-1)
    log("files: %s - %s : %s counted (finished)" % (afn, bfn, oldchr))
    return acount, bcount
开发者ID:BioinformaticsArchive,项目名称:epicode,代码行数:25,代码来源:epicode.py


示例4: bam_read_id

def bam_read_id(url):
    '''Read first read id out of a remote bam file.

    Note: requires a patched version of pysam
    '''
    stream = Samfile(url, 'rb')
    read = stream.next()
    return read.qname
开发者ID:detrout,项目名称:encode3-curation,代码行数:8,代码来源:validate_encode3_aliases.py


示例5: parse_bam_absolute

def parse_bam_absolute(fn, regs):
    """(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping 
    onto a segment (chr, start, end) and normalizes the count by the segment's length.
    """
    bam = Samfile(str(fn), "rb")
    count = []
    for reg in regs:
        chr, start, end = reg[:3]
        n = bam.count(chr, start, end)
        count.append(float(n) / (end - start))
    return count
开发者ID:BioinformaticsArchive,项目名称:epicode,代码行数:11,代码来源:epicode.py


示例6: main

def main():

    bam  = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    for al in bam:
        chrom = bam.getrname(al.rname)
        start = al.pos
        end   = al.aend
        name  = al.qname 
        for hit in rmsk.search(chrom, start, end):
            print chrom, start, end, name,
            print hit.chrom, hit.start, hit.end, hit.name
开发者ID:mikerlindberg,项目名称:bedtools-python,代码行数:13,代码来源:pysam-and-bedtools.py


示例7: main

def main(args=None):
    if args is None:
        args = sys.argv[1:]

    f = Samfile(args[0])
    header = f.header
    f.close()

    reflen = header['SQ'][0]['LN']

    BamIO.write(clip(BamIO.parse(args[0]), reflen), args[1], header=header)

    return 0
开发者ID:trident01,项目名称:BioExt-1,代码行数:13,代码来源:bamclip.py


示例8: main

def main(args):
    option = "r" if args.samformat else "rb"
    samfile = Samfile(args.bamfile, "rb")

    #Iterates over each read instead of each contig
    outputs = defaultdict(list)
    #import ipdb; ipdb.set_trace()
    for aln in samfile.fetch(until_eof = True):
        ref = samfile.getrname(aln.tid)
        outputs[ref].append(aln)

    for ref, alns in outputs.iteritems():
        print_reads(alns, ref, samfile.header)
开发者ID:alneberg,项目名称:bu,代码行数:13,代码来源:split_bam_per_reference.py


示例9: main

def main():

    bam  = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    # Example 1:
    #    Method: IntervalFile.all_hits()
    #    Report _all_ of the rmsk features that overlap with the BAM alignment
    for al in bam:
        strand = "+"
        if al.is_reverse: strand = "-"
        i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
        
        for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
            print "\t".join(str(x) for x in [i,hit])
开发者ID:Shima0,项目名称:bedtools-python,代码行数:15,代码来源:ex2-pysam-and-bedtools.py


示例10: __init__

 def __init__(self, file_name):
     """ 
     Initializes GenomicSignal.
     """
     self.file_name = file_name
     self.sg_coefs = None
     self.bam = Samfile(file_name, "rb")
开发者ID:eggduzao,项目名称:reg-gen,代码行数:7,代码来源:signalProcessing.py


示例11: get_raw_tracks

def get_raw_tracks(args):
    # Initializing Error Handler
    err = ErrorHandler()

    if len(args.input_files) != 2:
        err.throw_error("ME_FEW_ARG", add_msg="You must specify reads and regions file.")

    output_fname = os.path.join(args.output_location, "{}.wig".format(args.output_prefix))

    bam = Samfile(args.input_files[0], "rb")
    regions = GenomicRegionSet("Interested regions")
    regions.read(args.input_files[1])
    regions.merge()
    reads_file = GenomicSignal()

    with open(output_fname, "a") as output_f:
        for region in regions:
            # Raw counts
            signal = [0.0] * (region.final - region.initial)
            for read in bam.fetch(region.chrom, region.initial, region.final):
                if not read.is_reverse:
                    cut_site = read.pos + args.forward_shift
                    if region.initial <= cut_site < region.final:
                        signal[cut_site - region.initial] += 1.0
                else:
                    cut_site = read.aend + args.reverse_shift - 1
                    if region.initial <= cut_site < region.final:
                        signal[cut_site - region.initial] += 1.0

            if args.norm:
                signal = reads_file.boyle_norm(signal)
                perc = scoreatpercentile(signal, 98)
                std = np.std(signal)
                signal = reads_file.hon_norm_atac(signal, perc, std)

            output_f.write("fixedStep chrom=" + region.chrom + " start=" + str(region.initial + 1) + " step=1\n" +
                           "\n".join([str(e) for e in np.nan_to_num(signal)]) + "\n")
    output_f.close()

    if args.bigWig:
        genome_data = GenomeData(args.organism)
        chrom_sizes_file = genome_data.get_chromosome_sizes()
        bw_filename = os.path.join(args.output_location, "{}.bw".format(args.output_prefix))
        os.system(" ".join(["wigToBigWig", output_fname, chrom_sizes_file, bw_filename, "-verbose=0"]))
        os.remove(output_fname)
开发者ID:CostaLab,项目名称:reg-gen,代码行数:45,代码来源:Tracks.py


示例12: subsample

def subsample(fn, ns=None):
    if ns is None:
        fn, ns = fn
    sample = []
    count = 0
    outdir_base = path.join(path.dirname(fn), "subset")
    sf = Samfile(fn)
    try:
        i_weight = float(sf.mapped) / max(ns)
        print "Read out ", i_weight
    except ValueError:
        i_weight = 0.0
        for read in sf:
            i_weight += 1
        print "Counted ", i_weight
        i_weight /= float(max(ns))
        sf = Samfile(fn)

    print fn, count, i_weight
    for i, read in enumerate(sf):
        key = random() ** i_weight
        if len(sample) < max(ns):
            heappush(sample, (key, read, i + count))
        else:
            heappushpop(sample, (key, read, i + count))

    count += i

    for n in ns:
        if n == min(ns):
            outdir = outdir_base + "_min"
        else:
            outdir = outdir_base + "{:04.1f}M".format(n / 1e6)
        try:
            makedirs(outdir)
        except OSError:
            pass
        sampN = sorted(sample, reverse=True)[: int(n)]
        print "Kept {: >12,} of {: >12,} reads".format(len(sampN), count)
        print fn, "->", outdir
        stdout.flush()
        of = Samfile(path.join(outdir, "accepted_hits.bam"), mode="wb", template=sf)
        sample.sort(key=lambda (key, read, pos): (read.tid, read.pos))
        for key, read, pos in sampN:
            of.write(read)
        of.close()
    sf.close()
    return [count for key, read, count in sample]
开发者ID:petercombs,项目名称:EisenLab-Code,代码行数:48,代码来源:SubSample.py


示例13: split_reads

def split_reads(reads, ref_reads, alt_reads):
    read_results = Counter()

    ref_bam = Samfile(ref_reads, 'wb', template=reads)
    alt_bam = Samfile(alt_reads, 'wb', template=reads)

    prev_phase = None
    prev_read = None
    prev_qname = None
    test = 0

    for read in reads.fetch(until_eof=True):
        test += 1
        chrom = read.reference_name
        snps_on_chrom = snp_dict[chrom]
        phase = get_phase(read, snps_on_chrom)
        read_qname = read.qname
        if read_qname == prev_qname:
            read_results["tot_read"]+=1
            phase_set = set([phase, prev_phase])
            phase_set.discard(None)
            if len(phase_set) == 1:
                read_phase = phase_set.pop()
                if read_phase == -1:
                    ref_bam.write(read)
                    ref_bam.write(prev_read)
                    read_results["ref_read"]+=1
                elif read_phase == 1:
                    alt_bam.write(read)
                    alt_bam.write(prev_read)
                    read_results["alt_read"]+=1
                elif read_phase == 0:
                    read_results['misphased_read']+=1
            elif len(phase_set)==0:
                read_results["no_snps_read"]+=1
            else:
                read_results['misphased_read']+=1

        prev_read = read
        prev_phase = phase
        prev_qname = read_qname

    return(read_results)
开发者ID:rmagoglia,项目名称:Genomics,代码行数:43,代码来源:splitSpeciesReads.py


示例14: _bowtie2_filter

def _bowtie2_filter(fnam, fastq_path, unmap_out, map_out):
    """
    Divides reads in a map file in two categories: uniquely mapped, and not.
    Writes them in two files

    """
    try:
        fhandler = Samfile(fnam)
    except IOError:
        raise Exception('ERROR: file "%s" not found' % fnam)
    # getrname chromosome names
    i = 0
    crm_dict = {}
    while True:
        try:
            crm_dict[i] = fhandler.getrname(i)
            i += 1
        except ValueError:
            break
    # iteration over reads
    unmap_out = open(unmap_out, 'w')
    map_out   = open(map_out, 'w')
    fastq_in  = open(fastq_path , 'r')
    for line in fhandler:
        line_in = fastq_in.readline()
        if line.is_unmapped or line.mapq < 4:
            read = '%s\t%s\t%s\t%s\t%s\n' % (
                line_in.split('\t', 1)[0].rstrip('\n')[1:],
                line.seq, line.qual, '-', '-'
                )
            unmap_out.write(read)
        else:
            read = '%s\t%s\t%s\t%s\t%s:%s:%d:%d\n' % (
                line.qname, line.seq, line.qual, '1',
                crm_dict[line.tid],
                '-' if line.is_reverse else '+', line.pos + 1, len(line.seq))
            map_out.write(read)
        for _ in range(3):
            fastq_in.readline()
    unmap_out.close()
    map_out.close()
    fastq_in.close()
开发者ID:3DGenomes,项目名称:TADbit,代码行数:42,代码来源:full_mapper.py


示例15: main

def main(args):
    option = "r" if args.samformat else "rb"
    samfile = Samfile(args.bamfile, "rb")
    ref_ids = [samfile.gettid(r) for r in samfile.references]
    #Iterates over each read instead of each contig
    reads_to_print = []
    for aln in samfile.fetch(until_eof = True):
        if pair_is_aligned(aln, ref_ids):
            if args.read_pair == 1 and aln.is_read1:
                reads_to_print.append(aln)
            elif args.read_pair == 2 and aln.is_read2:
                reads_to_print.append(aln)
            elif args.read_pair == 0:
                reads_to_print.append(aln)
        if len(reads_to_print) >= 10000:
            # Flush the reads collected
            print_reads(reads_to_print)
            reads_to_print = []

    print_reads(reads_to_print)
开发者ID:alneberg,项目名称:bu,代码行数:20,代码来源:extract_reads_from_bam.py


示例16: removeEdgeMismatches

 def removeEdgeMismatches(self,bamFile,minDistance, minBaseQual):
     startTime=Helper.getTime()
     minDistance=int(minDistance)
     counter=0;j=0  
     num_lines = len(self.variantDict)
     Helper.info(" [%s] remove Missmatches from the first %s bp from read edges" % (startTime.strftime("%c"),str(minDistance)),self.logFile,self.textField)
     
     bamFile = Samfile(bamFile, "rb")
     
     for varKey in self.variantDict.keys():
         variant = self.variantDict[varKey]
         
         counter+=1
         if counter%10000==0:
             Helper.status('%s mm parsed ' % counter ,self.logFile, self.textField,"grey")
         
         keepSNP=False
         varPos=variant.position-1
         iter = bamFile.pileup(variant.chromosome, variant.position-1, variant.position)
         #walks up the region wich overlap this position
         for x in iter:
             if x.pos == varPos:
                 for pileupread in x.pileups: #walk through the single reads
                     if not pileupread.is_del and not pileupread.is_refskip:
                         distance=abs(pileupread.alignment.alen-pileupread.query_position) if pileupread.alignment.is_reverse else pileupread.query_position
                         if distance >= minDistance:
                             #check readBase and Base Quality
                             if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt and pileupread.alignment.query_qualities[pileupread.query_position]>=minBaseQual:
                             #if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt:
                                 keepSNP=True
                                 
         if keepSNP==False:
             j+=1
             del self.variantDict[varKey]
     
     Helper.status('%s of %svariants were deleted' % (j,num_lines), self.logFile, self.textField,"black") 
     Helper.printTimeDiff(startTime, self.logFile, self.textField)
     bamFile.close()
开发者ID:djhn75,项目名称:RNAEditor,代码行数:38,代码来源:VariantSet.py


示例17: __init__

    def __init__(self, fname, referenceFastaFname=None):
        self.filename = fname = abspath(expanduser(fname))
        self.peer = Samfile(fname, "rb", check_sq=False)
        self._checkFileCompatibility()

        self._loadReferenceInfo()
        self._loadReadGroupInfo()
        self._loadProgramInfo()

        self.referenceFasta = None
        if referenceFastaFname is not None:
            if self.isUnmapped:
                raise ValueError, "Unmapped BAM file--reference FASTA should not be given as argument to BamReader"
            self._loadReferenceFasta(referenceFastaFname)
开发者ID:extemporaneousb,项目名称:pbcore,代码行数:14,代码来源:BamIO.py


示例18: main

def main(args):
    option = "r" if args.samformat else "rb"
    samfile = Samfile(args.bamfile, option)
    ref_ids = [samfile.gettid(r) for r in samfile.references]
    #Iterates over each read instead of each contig
    reads_to_print_1 = []
    reads_to_print_2 = []
    reads_to_print_u = []
    for aln in samfile.fetch(until_eof = True):
        if aln.tid in ref_ids: # This read is aligned
            if aln.rnext in ref_ids: # The mate is also aligned
                if aln.is_read1:
                    reads_to_print_1.append(aln)
                    reads_to_print_1 = flush_reads(reads_to_print_1, args.R1)
                elif aln.is_read2:
                    reads_to_print_2.append(aln)
                    reads_to_print_2 = flush_reads(reads_to_print_2, args.R2)
            else:
                reads_to_print_u.append(aln)
                reads_to_print_u = flush_reads(reads_to_print_u, args.u)

    print_reads(reads_to_print_1, args.R1)
    print_reads(reads_to_print_2, args.R2)
    print_reads(reads_to_print_u, args.u)
开发者ID:alneberg,项目名称:bu,代码行数:24,代码来源:extract_reads_from_bam_and_separate.py


示例19: __init__

    def __init__(self, fname, referenceFastaFname=None):
        self.filename = fname = abspath(expanduser(fname))
        self.peer = Samfile(fname, "rb")
        # Check for sortedness, index.
        # There doesn't seem to be a "public" way to do this right
        # now, but that's fine because we're going to have to rewrite
        # it all anyway once the pysam rewrite lands.
        if not self.peer._hasIndex:
            raise ValueError, "Specified bam file lacks a bam index---required for this API"

        self._loadReferenceInfo()
        self._loadReadGroupInfo()
        self._loadProgramInfo()

        self.referenceFasta = None
        if referenceFastaFname is not None:
            self._loadReferenceFasta(referenceFastaFname)
开发者ID:leehosung,项目名称:GenomicConsensus,代码行数:17,代码来源:BamIO.py


示例20: __init__

 def __init__(self, file_name):
     """ 
     Initializes GenomicSignal.
     """
     self.file_name = file_name
     self.bam = None
     self.bw = None
     self.sg_coefs = None
     self.is_bam = False
     self.is_bw = False
     if(self.file_name.split(".")[-1].upper() == "BAM"):
         self.is_bam = True
         self.bam = Samfile(file_name,"rb")
     elif(self.file_name.split(".")[-1].upper() == "BW" or self.file_name.split(".")[-1].upper() == "BIGWIG"):
         self.is_bw = True
         self.bw = BigWigFile(file_name)
     else: pass # TODO ERROR
开发者ID:Marvin84,项目名称:reg-gen,代码行数:17,代码来源:signalProcessing.py



注:本文中的pysam.Samfile类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python connection_info.ConnectionState类代码示例发布时间:2022-05-27
下一篇:
Python pysam.view函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap