• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pysam.idxstats函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pysam.idxstats函数的典型用法代码示例。如果您正苦于以下问题:Python idxstats函数的具体用法?Python idxstats怎么用?Python idxstats使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了idxstats函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: testDoubleCalling

 def testDoubleCalling(self):
     # The following would fail if there is an
     # issue with stdout being improperly caught.
     retvals = pysam.idxstats(
         os.path.join(DATADIR, "ex1.bam"))
     retvals = pysam.idxstats(
         os.path.join(DATADIR, "ex1.bam"))
开发者ID:grayshiwb512,项目名称:pysam,代码行数:7,代码来源:samtools_test.py


示例2: bam_total_reads

def bam_total_reads(bam_handle, chroms_to_ignore):
    """Count the total number of mapped reads in a BAM file, filtering
    the chromosome given in chroms_to_ignore list
    """
    if chroms_to_ignore:
        import pysam

        lines = pysam.idxstats(bam_handle.filename)
        lines = toString(lines)
        if type(lines) is str:
            lines = lines.strip().split('\n')
        if len(lines) == 0:
            # check if this is a test running under nose
            # in which case it will fail.
            if len([val for val in sys.modules.keys() if val.find("nose") >= 0]):
                sys.stderr.write("To run this code inside a test use disable "
                                 "output buffering `nosetest -s`\n".format(bam_handle.filename))
            else:
                sys.stderr.write("Error running idxstats on {}\n".format(bam_handle.filename))
        tot_mapped_reads = 0
        for line in lines:
            chrom, _len, nmapped, _nunmapped = line.split('\t')
            if chrom not in chroms_to_ignore:
                tot_mapped_reads += int(nmapped)

    else:
        tot_mapped_reads = bam_handle.mapped

    return tot_mapped_reads
开发者ID:venuthatikonda,项目名称:deepTools,代码行数:29,代码来源:utilities.py


示例3: bam_blacklisted_reads

def bam_blacklisted_reads(bam_handle, chroms_to_ignore, blackListFileName=None, numberOfProcessors=1):
    blacklisted = 0
    if blackListFileName is None:
        return blacklisted

    # Get the chromosome lengths
    chromLens = {}
    lines = pysam.idxstats(bam_handle.filename)
    lines = toString(lines)
    if type(lines) is str:
        lines = lines.strip().split('\n')
    for line in lines:
        chrom, _len, nmapped, _nunmapped = line.split('\t')
        chromLens[chrom] = int(_len)

    bl = GTF(blackListFileName)
    regions = []
    for chrom in bl.chroms:
        if (not chroms_to_ignore or chrom not in chroms_to_ignore) and chrom in chromLens:
            for reg in bl.findOverlaps(chrom, 0, chromLens[chrom]):
                regions.append([bam_handle.filename, chrom, reg[0], reg[1]])

    if len(regions) > 0:
        import multiprocessing
        if len(regions) > 1 and numberOfProcessors > 1:
            pool = multiprocessing.Pool(numberOfProcessors)
            res = pool.map_async(bam_blacklisted_worker, regions).get(9999999)
        else:
            res = [bam_blacklisted_worker(x) for x in regions]
        for val in res:
            blacklisted += val

    return blacklisted
开发者ID:venuthatikonda,项目名称:deepTools,代码行数:33,代码来源:utilities.py


示例4: getNumReads

def getNumReads(bamfile):
    '''count number of reads in bam file.

    This methods works through pysam.idxstats.

    Arguments
    ---------
    bamfile : string
        Filename of :term:`bam` formatted file. The file needs
        to be indexed.
    Returns
    -------
    nreads : int
        Number of reads
    '''

    lines = pysam.idxstats(bamfile).splitlines()

    try:
        nreads = sum(
            map(int, [x.split("\t")[2]
                      for x in lines if not x.startswith("#")]))

    except IndexError, msg:
        raise IndexError(
            "can't get number of reads from bamfile, msg=%s, data=%s" %
            (msg, lines))
开发者ID:sudlab,项目名称:cgat,代码行数:27,代码来源:BamTools.py


示例5: _setup

    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError("TODO: size mismatch")

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
开发者ID:muslih14,项目名称:paleomix,代码行数:28,代码来源:nuclear.py


示例6: print_sex

def print_sex(bam):
    """
    Print sex based on chr x ratio
    
    Args:
	bam (str): Path to bam file
    """

    idxstats = pysam.idxstats(bam)
    chr_ratio = []
    # Calculate read / chromosome length ratio per chromosome
    for chr in idxstats[0:24]:
        chr = chr.strip("\n").split("\t")
        chr_length = float(chr[1])
        chr_mapped = float(chr[2])
        ratio = chr_mapped / chr_length
        chr_ratio.append(ratio)

    chr_ratio_std = numpy.std(chr_ratio)
    chr_ratio_mean = numpy.mean(chr_ratio)

    chr_x = idxstats[22].strip("\n").split("\t")
    chr_x_ratio = float(chr_x[2]) / float(chr_x[1])

    if (chr_x_ratio > chr_ratio_mean - (2 * chr_ratio_std)) and (chr_x_ratio < chr_ratio_mean + (2 * chr_ratio_std)):
        print "female"
    elif chr_x_ratio < chr_ratio_mean - (2 * chr_ratio_std):
        print "male"
    else:
        print "unkown"
开发者ID:CuppenResearch,项目名称:IAP,代码行数:30,代码来源:determine_sex.py


示例7: main

def main(argv):

    fullname = os.path.abspath(argv[1])
    bamfile = pysam.Samfile(fullname)
    header = bamfile.header
    fullname_s = fullname.split("/")

    # Populate info dict
    info = dict()    
    info['idsequencing'] = fullname_s[-2].split("seq")[1]
    info['filename'] = os.path.basename(fullname)
    info['aligner_index'] = fullname_s[-4]
    bam_datetime = b = datetime.datetime.fromtimestamp(os.stat(fullname).st_mtime)
    info['align_datetime'] = bam_datetime.strftime("%Y-%m-%d %H:%M:%S")
    info['aligner'] = header['PG'][0]['PN']
    info['command'] = header['PG'][0]['cl']

    # Compute total number of aligned reads
    stats = pysam.idxstats(fullname)
    stats = [el.split("\t") for el in stats]
    total_reads = 0
    for el in stats:
        total_reads += int(el[2])
    info['total_reads'] = total_reads
 
    # Format dict entries for MySQL
    for i in info.iterkeys():
        info[i] = "'" + str(info[i]) + "'"    

    ## Connect to db
    try:
        conn = mdb.connect('localhost', 'brad', 'Eu23ler1', 'sample_db')
        cur = conn.cursor()
    except mdb.Error, e:
        print "MySQLdb error %d: %s " % (e.args[0] + e.args[1])
开发者ID:bradleycolquitt,项目名称:seqAnalysis,代码行数:35,代码来源:alignment2db.py


示例8: test_idxstats_parse

def test_idxstats_parse():
    bam_filename = "./pysam_data/ex2.bam"
    idxstats_string = pysam.idxstats(bam_filename, split_lines=False)  # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
    lines = idxstats_string.splitlines()
    for line in lines:
        splt = line.split("\t")
        _seqname, _seqlen, nmapped, _nunmapped = splt
开发者ID:Bratdaking,项目名称:pysam,代码行数:7,代码来源:test_samtools_python.py


示例9: bamStats

def bamStats(bamfile):
    """ Extract average depths + idxstats data from BAM file, return data frame """
    istats = pysam.idxstats(bamfile)
    result = []
    samfile = pysam.Samfile(bamfile, "rb")
    for x in istats:
        xs = x.replace("\n", "").split("\t")
        rec = {
            "CHROM": xs[0],
            "NT": int(xs[1]),
            "MAPPED": int(xs[2]),
            "UNMAPPED": int(xs[3]),
            "READLEN": 0,
            "COVERAGE": 0.0,
        }
        count = 0
        rls = 0.0        
        try:
            for read in samfile.fetch(xs[0]):
                rls += float(read.rlen)
                count += 1
                if count > 10000:
                    break

            rls /= count
            rec["READLEN"] = rls
            rec["COVERAGE"] = float(rec["MAPPED"] * rec["READLEN"])/float(rec["NT"])
        except:
            pass
        result.append(rec)

    if result:
        return pandas.DataFrame(result, columns=["CHROM", "NT", "MAPPED", "UNMAPPED", "READLEN", "COVERAGE"])
    else:
        return pandas.DataFrame(columns=["CHROM", "NT", "MAPPED", "UNMAPPED", "READLEN", "COVERAGE"])
开发者ID:goranrakocevic,项目名称:hap.py,代码行数:35,代码来源:bamstats.py


示例10: get_num_reads

def get_num_reads(filename):
    num_reads = 0
    try:
        num_reads = reduce(lambda x, y: x + y,
                           [eval('+'.join(l.rstrip('\n').split('\t')[2:])) for l in pysam.idxstats(filename)])
    except:
        sys.stderr.write("Unable to count reads in file: %s" % filename)
    return num_reads
开发者ID:nebiolabs,项目名称:equalize_bam_reads,代码行数:8,代码来源:equalize_bam_files.py


示例11: testReturnValueString

 def testReturnValueString(self):
     retval = pysam.idxstats(os.path.join(BAM_DATADIR, "ex1.bam"))
     if IS_PYTHON3:
         self.assertFalse(isinstance(retval, bytes))
         self.assertTrue(isinstance(retval, str))
     else:
         self.assertTrue(isinstance(retval, bytes))
         self.assertTrue(isinstance(retval, basestring))
开发者ID:msto,项目名称:pysam,代码行数:8,代码来源:samtools_test.py


示例12: test_idxstats_parse

def test_idxstats_parse():
    bam_filename = os.path.join(BAM_DATADIR, "ex2.bam")
    # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
    idxstats_string = pysam.idxstats(bam_filename, split_lines=False)
    lines = idxstats_string.splitlines()
    for line in lines:
        splt = line.split("\t")
        _seqname, _seqlen, nmapped, _nunmapped = splt
开发者ID:msto,项目名称:pysam,代码行数:8,代码来源:test_samtools_python.py


示例13: calculate_samples

def calculate_samples(bamfile, count, ref_prefix=None):
    refcounts = dict()
    for s in pysam.idxstats(bamfile).split("\n"):
        tok = s.rstrip().split("\t")
        if ref_prefix is not None and tok[0].startswith(ref_prefix) == False:
            continue
        refcounts[tok[0]] = int(tok[2])
    coef = (count * 1.0) / sum(refcounts.values())
    return dict((k, (v, int(np.round(v * coef)))) for k, v in refcounts.items())
开发者ID:brianpenghe,项目名称:bamsam-scripts,代码行数:9,代码来源:resample_bam.py


示例14: bam_total_reads

def bam_total_reads(bam_fname):
    """Count the total number of mapped reads in a BAM file.

    Uses the BAM index to do this quickly.
    """
    lines = pysam.idxstats(bam_fname)
    tot_mapped_reads = 0
    for line in lines:
        _seqname, _seqlen, nmapped, _nunmapped = line.split()
        tot_mapped_reads += int(nmapped)
    return tot_mapped_reads
开发者ID:roryk,项目名称:cnvkit,代码行数:11,代码来源:coverage.py


示例15: idxstats

def idxstats(bam_fname, drop_unmapped=False):
    """Get chromosome names, lengths, and number of mapped/unmapped reads.

    Use the BAM index (.bai) to get the number of reads and size of each
    chromosome. Contigs with no mapped reads are skipped.
    """
    handle = StringIO(pysam.idxstats(bam_fname, split_lines=False))
    table = pd.read_table(handle, header=None,
                          names=['chromosome', 'length', 'mapped', 'unmapped'])
    if drop_unmapped:
        table = table[table.mapped != 0].drop('unmapped', axis=1)
    return table
开发者ID:JimmyLiJing,项目名称:cnvkit,代码行数:12,代码来源:samutil.py


示例16: CountRandom

def CountRandom(BamFile):
	samIdxStats = pysam.idxstats(BamFile)
	samfile = pysam.Samfile(BamFile,"rb")
	TotalMapped = samfile.mapped
	samfile.close()
	countAlign = 0
	List = GetChromoList()
	for stat in samIdxStats:
		if stat.split()[0] in List:
			MappedforChromosome = stat.split()[2]
			countAlign = countAlign+long(MappedforChromosome)
	RandAlign = TotalMapped-countAlign
	return [BamFile,{"Random":RandAlign}]
开发者ID:ThomasCarroll,项目名称:ChIPSeqQC,代码行数:13,代码来源:CRIBamRun3.py


示例17: _init_read_number

 def _init_read_number(self, bamFile):
     """Compute number of reads and number of mapped reads for CoverageSet"""
     # XXX ToDo add number of mapped reads in all cases
     # try:
     from distutils.version import LooseVersion
     if LooseVersion("0.9.0") <= LooseVersion(pysam.__version__):
         a = pysam.idxstats(bamFile)
         mapped_reads = sum([int(el.split('\t')[2]) for el in a.split('\n')[:len(a.split('\n'))-1]])
         unmapped_read = sum([int(el.split('\t')[3]) for el in a.split('\n')[:len(a.split('\n'))-1]])
         self.reads = mapped_reads + unmapped_read
         self.mapped_reads = mapped_reads
     else:
         self.reads = reduce(lambda x, y: x + y, [ eval('+'.join(l.rstrip('\n').split('\t')[2:]) ) for l in pysam.idxstats(bamFile)])
         self.mapped_reads = None
开发者ID:eggduzao,项目名称:reg-gen,代码行数:14,代码来源:CoverageSet.py


示例18: _validate_mito_bam

def _validate_mito_bam(data, handle, info):
    if data.mitochondria is None:
        # No mitochondrial data .. skip phylogeny
        return True

    references = handle.references
    min_length = min((len(record.sequence))
                     for record in data.mitochondria.itervalues())

    for bam_contig, bam_length in zip(references, handle.lengths):
        if bam_contig not in data.mitochondria:
            continue

        db_sequence = data.mitochondria[bam_contig].sequence
        db_length = len(db_sequence) - db_sequence.count("-")

        if bam_length != db_length:
            print_err("ERROR: Length of mitochondrial contig %r (%i bp) "
                      "does not match the length of the corresponding "
                      "sequence in the database (%i bp)"
                      % (bam_contig, bam_length, db_length))
            return False

        if not os.path.exists(handle.filename + '.bai') \
                and not os.path.exists(swap_ext(handle.filename, '.bai')):
            print_info('    - Attempting to index BAM file %r!'
                       % (handle.filename,))
            pysam.index(handle.filename)

        # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
        for line in "".join(pysam.idxstats(handle.filename)).split('\n'):
            line = line.strip()
            if not line:
                continue

            name, _, hits, _ = line.split('\t')
            if (name == bam_contig) and not int(hits):
                print_err("WARNING: Mitochondrial BAM (%r) does not contain "
                          "any reads aligned to contig %r; inferring an "
                          "phylogeny is not possible."
                          % (handle.filename, name))
                return True

        info.mt_contig = bam_contig
        info.mt_length = bam_length
        info.mt_padding = len(db_sequence) - min_length

        return True
    return True
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:49,代码来源:database.py


示例19: get_chrom_lengths

def get_chrom_lengths(path_to_bam):
    '''
    Uses pysam to retrieve chromosome sizes form bam.
    Useful helper to use with some pybedtools functions (e.g. coverage), when a bam was mapped with custom genome not available in UCSC.
    Input: path to bam file (should be indexed)
    Output: dictionary.
    Example output:
    {'chr4': (0, 1351857), 'chr3L': (0, 24543557), 'chr2L': (0, 23011544), '*': (0, 0), 'chrX': (0, 22422827), 'chr2R': (0, 21146708), 'chr3R': (0, 27905053)}
    '''
    idx = pysam.idxstats(path_to_bam).splitlines()
    chromsizes = {}
    for element in idx:
        stats = element.split("\t")
        chromsizes[stats[0]] = (0, int(stats[1]))
    return chromsizes
开发者ID:adomingues,项目名称:NGSpipe2go,代码行数:15,代码来源:piRNABaseTerminalBases.py


示例20: verify_chrom_in_paths

def verify_chrom_in_paths(genome_path, bamfile1, bamfile2, chrom_sizes):
    """Check whether the chromsome info overlap in bamfiles, genome path and chrom size path"""
    chrom_bams = set()
    chrom_genome = set()
    chrom_chrom_sizes = set()
    #check bam files
    try:
        if pysam.__version__ == '0.9.0':
            chrom_bams_1 = set([el.split('\t')[0] for el in pysam.idxstats(bamfile1).split('\n')[:len(pysam.idxstats(bamfile1).split('\n'))-1]])
            chrom_bams_2 = set([el.split('\t')[0] for el in pysam.idxstats(bamfile2).split('\n')[:len(pysam.idxstats(bamfile2).split('\n'))-1]])
        else:
            chrom_bams_1 = set(map(lambda x: x.split('\t')[0], pysam.idxstats(bamfile1)))
            chrom_bams_2 = set(map(lambda x: x.split('\t')[0], pysam.idxstats(bamfile2)))
    except:
        return True
            
    chrom_bams = chrom_bams_1 & chrom_bams_2
    #check chrom_sizes
    with open(chrom_sizes) as f:
        for line in f:
            line = line.split('\t')
            if line[0] not in chrom_chrom_sizes:
                chrom_chrom_sizes.add(line[0])
    
    tmp = chrom_bams & chrom_chrom_sizes
    if len(tmp) == 0:
        return False
    
    #check genome
    for s in FastaReader(genome_path):
        if s.name not in chrom_genome:
            chrom_genome.add(s.name)
	    if s.name in tmp: #one overlap is sufficient
		return True
    
    return len(chrom_bams & chrom_genome & chrom_chrom_sizes) >= 1
开发者ID:Marvin84,项目名称:reg-gen,代码行数:36,代码来源:dpc_help.py



注:本文中的pysam.idxstats函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pysam.index函数代码示例发布时间:2022-05-27
下一篇:
Python pysam.faidx函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap