• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python tabix.open函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tabix.open函数的典型用法代码示例。如果您正苦于以下问题:Python open函数的具体用法?Python open怎么用?Python open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了open函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: init_resource

    def init_resource(self):
        """ init features and other annotation resources """
        for rname in ['dbsnp']:
            if self.config.has_option(self.rv, 'dbsnp'):
                import tabix
                self.resources['dbsnp'] = tabix.open(self.config.get(self.rv, 'dbsnp'))

        self.features = []
        for rname in self.config.options(self.rv):
            featdb =  self.config.get(self.rv, rname)
            if featdb.endswith('.featuredb'):
                self.features.append((rname,tabix.open(featdb)))
开发者ID:zwdzwd,项目名称:transvar,代码行数:12,代码来源:annodb.py


示例2: get_cadd

def get_cadd(config, chrom, start, ref, alt):
    '''
    add cadd of variant
    '''
    tabix_fp = config['data_paths']['cadd']['whole_genome_cadd']
    try:
        tb = tabix.open(tabix_fp)
    except:
        logging.warning('{0} not available'.format(tabix_fp))
        return np.nan

    '''if stop != start:
        print('WARNING: the start {0} is different than stop {1}'.format(start, stop))
        return np.nan'''

    try:
        records = tb.querys(str(chrom) + ':' + str(start) + '-' + str(start))
    except:
        logging.warning('Error when trying to query {0}-{1}-{2}-{3}'.format(chrom, start, ref, alt))
        return np.nan

    for record in records:
        if record[2] != ref:
            logging.warning('Reference {0} is not the one in CADD for entry {1}-{2}-{3}-{4}'.format(ref, chrom, start, ref, alt))
            return np.nan
        if record[3] == alt:
            return float(record[5])

    logging.warning('I do not find a cadd entry for {0}-{1}-{2}-{4}'.format(alt, chrom, start, ref, alt))
    return np.nan
开发者ID:DavidTamborero,项目名称:MTBR,代码行数:30,代码来源:ct_lib.py


示例3: checkAndOpen

def checkAndOpen(db):
    if 'tabix' not in sys.modules:
        return None

    db = os.path.expanduser(db)
    if not os.path.exists(db):
        pass
    else:
        # 'source' is a variable used to title the column in the output
        # it is defined by the user in the configuration script step when generating the JSON file
        if os.path.splitext(db)[1] == ".gz" and os.path.exists(db + ".tbi"):
            try:
                database = gzip.open(db)
            except IOError:
                print("WARNING: could not open {}".format(db))
                return None
        elif os.path.splitext(db)[1] == ".vcf":
            abortWithMessage("Error: database file {0} must compressed with bgzip".format(db))
        elif os.path.splitext(db)[1] == ".gz" and not os.path.exists(db + ".tbi"):
            abortWithMessage("Compressed database is not tabix indexed")
        else: abortWithMessage("Error opening database files: {0}".format(db))
        
        try:
            row = database.readline()
        except StopIteration: 
            print("Empty file {}".format(db))
            return None

        tb = tabix.open(db)
        return tb
开发者ID:blachlylab,项目名称:mucor,代码行数:30,代码来源:databases.py


示例4: get_job_results

def get_job_results(job_id, job=None):
    filters = request.args.to_dict()
    epacts_filename = job.relative_path("output.epacts.gz")
    with gzip.open(epacts_filename, "rt") as f:
        header = f.readline().rstrip('\n').split('\t')
        if header[1] == "BEG":
            header[1] = "BEGIN"
        if header[0] == "#CHROM":
            header[0] = "CHROM"
    assert len(header) > 0
    headerpos = {x:i for i,x in enumerate(header)}

    if filters.get("region", ""):
        tb = tabix.open(epacts_filename)
        indata = tb.query(chrom, start_pos, end_pos)
    else:
        indata = (x.split("\t") for x in gzip.open(epacts_filename))

    pass_tests = []
    if filters.get("non-monomorphic", False):
        if "AC" not in headerpos:
            raise Exception("Column AC not found")
        ac_index = headerpos["AC"]
        def mono_pass(row):
            if float(row[ac_index])>0:
                return True
            else:
                return False
        pass_tests.append(mono_pass)

    if "max-pvalue" in filters:
        if "PVALUE" not in headerpos:
            raise Exception("Column PVALUE not found")
        pval_index = headerpos["PVALUE"]
        thresh = float(filters.get("max-pvalue", 1))
        def pval_pass(row):
            if row[pval_index] == "NA":
                return False
            if float(row[pval_index])<thresh:
                return True
            else:
                return False
        pass_tests.append(pval_pass)

    def pass_row(row):
        if len(pass_tests)==0:
            return True
        for f in pass_tests:
            if not f(row):
                return False
        return True

    def generate():
        yield "\t".join(header) + "\n"
        next(indata) #skip header
        for row in indata:
            if pass_row(row):
                yield "\t".join(row)

    return Response(generate(), mimetype="text/plain")
开发者ID:statgen,项目名称:gasp,代码行数:60,代码来源:api_blueprint.py


示例5: extract_CADD_score

def extract_CADD_score(arguments, q):
	vcf_record, caddfile = arguments
	
	tb = tabix.open(caddfile)

	chromosome = (vcf_record.CHROM).replace("chr","")
	vcf_record.INFO["RAWCADD"]   = 0
	vcf_record.INFO["PHREDCADD"] = 0

	# Specific for CADD files
	# FIXME: get info about chr or not from provided VCF file
	records = tb.query(chromosome, vcf_record.POS-1, vcf_record.POS)

	# Look for matching mutation
	# Works for SNVs, InDels optimisation is ongoing
	for rec in records:
		if rec[3] == vcf_record.ALT[0]:
			# FIXME: Make requested fields optional through arguments
			vcf_record.INFO["RAWCADD"]   = rec[4]
			vcf_record.INFO["PHREDCADD"] = rec[5]
			break
	
	# workaround since multiprocess can't handle VCF record class objects
	# FIXME: use VCF class records rather than this ugly string
	annotated = VCF_WRITER._map(str, [vcf_record.CHROM, vcf_record.POS, vcf_record.ID, vcf_record.REF]) + [VCF_WRITER._format_alt(vcf_record.ALT), str(vcf_record.QUAL) or '.', VCF_WRITER._format_filter(vcf_record.FILTER), VCF_WRITER._format_info(vcf_record.INFO)]

	# Return results to Queue
	q.put(annotated)
	return(annotated)
开发者ID:jdeligt,项目名称:Genetics,代码行数:29,代码来源:Annotate_CADD_Scores_In_VCF.py


示例6: get_exons

def get_exons(chrom, start, stop, file):
	tb = tabix.open(file)
	records = tb.query(chrom, start, stop)
	exons = []
	for record in records:
		exons.append(record)
	return exons
开发者ID:niab,项目名称:exac-scripts,代码行数:7,代码来源:variants_reader.py


示例7: __init__

 def __init__(self, _snp, _ref, _vcf, _restrict, \
                  _num_ctrls, _window, _match_context):
     self.snp = _snp
     self.ref = pyfasta.Fasta(_ref)
     self.vcf = tabix.open(_vcf)
     if _restrict is not None:
         self.restrict = tabix.open(_restrict)
     else: self.restrict = None
     self.chromToKey = {}
     for k in self.ref.keys():
         chrom = k.split()[0]
         self.chromToKey[chrom] = k
     self.num_ctrls = _num_ctrls
     self.window = _window
     self.match_context = _match_context
     if self.match_context >= 0:
         self.snp_context = self.GetContext(self.snp)
开发者ID:mgymrek,项目名称:non-coding-annotations,代码行数:17,代码来源:annotation_score.py


示例8: main

def main(args):
    chrom, coords = loadCoords(args.bedFile)
    tb = ""
    if chrom:
        tabix.open(
            "/home/evansj/me/data/ExAC/coverage/ftp.broadinstitute.org/pub/ExAC_release/current/coverage/Panel.chr%s.coverage.txt.gz"
            % (chrom,)
        )

    with open(args.outFile, "w") as fout:
        if chrom:
            for st in coords:
                # st is 1-idx in coords
                # tabix needs 0-based
                records = tb.query(chrom, st - 1, st)
                for record in records:
                    thisChrom, pos, mean, median, c1, c5, c10, c15 = record[0:8]
                    print("\t".join((thisChrom, pos, c10)), file=fout)
开发者ID:samesense,项目名称:target_exac_setup,代码行数:18,代码来源:pullGeneCov.py


示例9: tabix_vcf

def tabix_vcf(vcf_file, in_chr, in_start, in_stop):
	"""A generator to get records in a VCF given a location."""
	chrom = str(in_chr); start = int(in_start); stop = int(in_stop)
	try:
		vcf_tb = tabix.open(vcf_file)
		for rec in vcf_tb.query(chrom, start, stop):
			yield rec
	except:
		return
开发者ID:Jana-A,项目名称:WTSI.DDD-VET,代码行数:9,代码来源:parsing_setups.py


示例10: get_tabixhandle

def get_tabixhandle(path):
    """Check if a file is zipped and that the index exists
        If something looks wierd raise a TabixError
    """
    if not path.endswith('.gz'):
        raise TabixError("File {0} does not end with '.gz'".format(path))
    index_file = path + '.tbi'
    if not os.path.isfile(index_file):
        raise TabixError("No index could be found for {0}".format(path))
    
    return tabix.open(path)
开发者ID:moonso,项目名称:genmod,代码行数:11,代码来源:read_tabix_files.py


示例11: __init__

 def __init__(self, args):
     self.args = args
     # parse out TransciptInfos
     print('Loading transcripts...', file=sys.stderr)
     self.tx_infos = self._parse_tx_infos(args.gencode_gtf)
     self.tx_info_by_id = dict([(info.transcript_id, info) for info in self.tx_infos])
     # open tabix file
     print('Opening tabix file...', file=sys.stderr)
     self.tabix = tabix.open(args.gencode_gtf)
     # open BAM file and iterate over it
     print('Opening BAM file...', file=sys.stderr)
     self.sam_file = pysam.AlignmentFile(args.alignment_bam, 'r')
开发者ID:holtgrewe,项目名称:linc_splice,代码行数:12,代码来源:map_linc.py


示例12: ld_expand

def ld_expand(df, ld_beds):
    """
    Expand a set of SNVs into all SNVs with LD >= 0.8 and return a BedTool of
    the expanded SNPs.
    
    Parameters
    ----------
    df : pandas.DataFrame
        Pandas dataframe with SNVs. The index is of the form chrom:pos where pos
        is the one-based position of the SNV. The columns are chrom, start, end.
        chrom, start, end make a zero-based bed file with the SNV coordinates.

    ld_beds : dict
        Dict whose keys are chromosomes and whose values are filenames of
        tabixed LD bed files. The LD bed files should be formatted like this:
            chr1    14463   14464   14464:51479:0.254183
        where the the first three columns indicate the zero-based coordinates of
        a SNV and the the fourth column has the one-based coordinate of that
        SNV, the one-based coordinate of another SNV on the same chromosome, and
        the LD between these SNVs (all separated by colons).

    Returns
    -------
    bt : pybedtools.BedTool
        BedTool with input SNVs and SNVs they are in LD with.
        indepdent SNVs.
    """
    import pybedtools as pbt
    import tabix
    out_snps = []
    for chrom in ld_beds.keys():
        t = tabix.open(ld_beds[chrom])
        tdf = df[df['chrom'].astype(str) == chrom]
        for ind in tdf.index:
            p = tdf.ix[ind, 'end']
            out_snps.append('{}\t{}\t{}\t{}\n'.format(chrom, p - 1, p, ind))
            try:
                r = t.query('{}'.format(chrom), p - 1, p)
                while True:
                    try:
                        n = r.next()
                        p1, p2, r2 = n[-1].split(':')
                        if float(r2) >= 0.8:
                            out_snps.append('{}\t{}\t{}\t{}\n'.format(
                                n[0], int(p2) - 1, int(p2), ind))
                    except StopIteration:
                        break
            except tabix.TabixError:
                continue
    bt = pbt.BedTool(''.join(out_snps), from_string=True)
    bt = bt.sort()
    return bt
开发者ID:cdeboever3,项目名称:cdpybio,代码行数:52,代码来源:analysis.py


示例13: get_genotypes

def get_genotypes(CpG_location):
    import tabix
    import pandas as pd
    tb_file   = "/path/to/file/DF_meth_variants.gz"
    df        = pd.DataFrame(columns=xrange(0,782))
    tb        = tabix.open(tb_file)
#    print CpG_location
    records   = tb.querys(CpG_location)
    num       = 0
    for record in records:
        df.loc[num] = record[3:]
        num        += 1
    return(df)
开发者ID:CrystalHumphries,项目名称:MethylationCorrelationBlock,代码行数:13,代码来源:try_library.py


示例14: test_same_aa_different_positions

 def test_same_aa_different_positions(self):
     ''' check that same_aa() works correctly for different amino acids
     '''
     
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=5, extra='Protein_position=2'))
     lines.append(make_vcf_line(pos=7, extra='Protein_position=3'))
     lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     pairs = [[('1', 7), ('1', 8)]]
     
     self.assertEqual(same_aa(vcf, pairs), [])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:14,代码来源:test_multinucleotide_variants.py


示例15: test_same_aa

 def test_same_aa(self):
     ''' check that same_aa() works correctly
     '''
     
     # get the VCF lines
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=2, extra='Protein_position=1'))
     lines.append(make_vcf_line(pos=4, extra='Protein_position=1'))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     pairs = [[('1', 2), ('1', 4)]]
     
     self.assertEqual(same_aa(vcf, pairs), [[('1', 2), ('1', 4)]])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:14,代码来源:test_multinucleotide_variants.py


示例16: __search_pos_bdg

 def __search_pos_bdg(self):
   
     self.pd_frame = {}
     for i,bdg_file in enumerate(self.l_bdg_file):
         tb = tabix.open( bdg_file )
         record = tb.query( self.chrom, self.beg, self.end )
         l_pos  = []
         l_cons = []
         
         l_xticks = [ self.beg+1 ]
         bin_size = int( (self.end-self.beg)/10 )
         bin_size = 10**int(np.log10(bin_size))
         
         
         
         pre_pos  = 0
               
         for rec in record:
             for pos in xrange( int(rec[1]),int(rec[2]) ):
                 cons = float(rec[3])
               
                 if pre_pos == 0:
                     pre_pos = int(rec[1])
                    
                 """ Only consider the given region. """
                 if self.__is_intersect(pos):
                     
                     """ If bedGraph has gaps, """
                     if pos > pre_pos+1:
                         """ Using zero to fill bedGraph gaps """
                         for p in xrange( pre_pos+1,pos ):
                             l_pos.append( p )
                             l_cons.append( 0.0 )
                         
                     l_pos.append( pos  )
                     l_cons.append(cons )
                     
                     if pos % bin_size == 0:
                         l_xticks.append( pos )
                    
                 pre_pos  = pos
         
         l_xticks.append( self.end )
         data = { 'pos':l_pos, 'con':l_cons }
         self.pd_frame[ bdg_file ] = pd.DataFrame( data )
         
         if i == 0:
             self.l_xpos   = l_xticks
             self.l_xticks = [ str(tick) for tick in l_xticks ]
开发者ID:yzqheart,项目名称:ChIP,代码行数:49,代码来源:PlotChip_IGV.py


示例17: __init__

 def __init__(self, task_queue, results_queue, families={}, phased=False, 
             vep=False, cadd_raw=False, cadd_file=None, cadd_1000g=None, 
             cadd_exac=None, cadd_ESP=None, cadd_InDels=None, 
             thousand_g=None, exac=None, dbNSFP=None, strict=False, 
             verbosity=False):
     Process.__init__(self)
     self.task_queue = task_queue
     self.families = families
     self.results_queue = results_queue
     self.verbosity = verbosity
     self.phased = phased
     self.vep = vep
     self.cadd_raw = cadd_raw
     self.cadd_file = cadd_file
     self.cadd_1000g = cadd_1000g
     self.cadd_exac = cadd_exac
     self.cadd_ESP = cadd_ESP
     self.cadd_InDels = cadd_InDels
     self.thousand_g = thousand_g
     self.exac = exac
     self.dbNSFP = dbNSFP
     self.strict = strict
     self.any_cadd_info = False
     if self.cadd_file:
         self.cadd_file = tabix.open(self.cadd_file)
         self.any_cadd_info = True
     if self.cadd_1000g:
         self.cadd_1000g = tabix.open(self.cadd_1000g)
         self.any_cadd_info = True
     if self.cadd_exac:
         self.cadd_exac = tabix.open(self.cadd_exac)
         self.any_cadd_info = True
     if self.cadd_ESP:
         self.cadd_ESP = tabix.open(self.cadd_ESP)
         self.any_cadd_info = True
     if self.cadd_InDels:
         self.cadd_InDels = tabix.open(self.cadd_InDels)
         self.any_cadd_info = True
     if self.thousand_g:
         self.thousand_g = tabix.open(self.thousand_g)
     if self.exac:
         self.exac = tabix.open(self.exac)
     if self.dbNSFP:
         self.exac = tabix.open(self.exac)
开发者ID:gpcr,项目名称:genmod,代码行数:44,代码来源:variant_consumer.py


示例18: test_same_aa_missing_protein_positions

 def test_same_aa_missing_protein_positions(self):
     ''' check that same_aa() works correctly when the vars aren't in the CDS
     '''
     
     # if one of the variants in the pair does not have a protein position
     # listed (i.e. residue number), that indicates the variant could be
     # affecting the splice site, so we can't use the pair.
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=5))
     lines.append(make_vcf_line(pos=7))
     lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     pairs = [[('1', 7), ('1', 8)]]
     
     self.assertEqual(same_aa(vcf, pairs), [])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:17,代码来源:test_multinucleotide_variants.py


示例19: get_1mb_snps

def get_1mb_snps():
    import tabix 
    tb = tabix.open('snps_all.gz')

    fname = 'newMethPosFile.txt_2-3_col_1'
    snps = {}

    with open(fname) as f:
        for line in f:
            a = line.rstrip('\n').rsplit('\t')
            start = str(int(a[1]) - 1000000)
            stop  = str(int(a[1]) + 1000000)
            pos = a[0] + ":" + start + "-" + stop
            records = tb.querys(pos)
            for record in records:
                snps[record[3]] = 0
    return(snps)
开发者ID:CrystalHumphries,项目名称:MethylationCorrelationBlock,代码行数:17,代码来源:get_1mb_snps.py


示例20: test_screen_pairs_nonstandard_pair

 def test_screen_pairs_nonstandard_pair(self):
     ''' test that screen_pairs() works correctly
     '''
     
     # get the VCF lines
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=2))
     lines.append(make_vcf_line(pos=4))
     lines.append(make_vcf_line(pos=5))
     lines.append(make_vcf_line(pos=7))
     lines.append(make_vcf_line(pos=8))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     # set up a list of 'pairs', where one 'pair' has three variants in it.
     # we exclude 'pairs' where n != 2.
     pairs = [[('1', 2), ('1', 4), ('1', 5)], [('1', 7), ('1', 8)]]
     self.assertEqual(screen_pairs(vcf, pairs, is_not_indel), [[('1', 7), ('1', 8)]])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:18,代码来源:test_multinucleotide_variants.py



注:本文中的tabix.open函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python table.Table类代码示例发布时间:2022-05-27
下一篇:
Python taal.Translator类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap