本文整理汇总了Python中pysam.asGTF函数的典型用法代码示例。如果您正苦于以下问题:Python asGTF函数的具体用法?Python asGTF怎么用?Python asGTF使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了asGTF函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testDisjointIterators
def testDisjointIterators(self):
# two iterators working on the same file
tabix = pysam.TabixFile(self.filename)
a = tabix.fetch(parser=pysam.asGTF(), multiple_iterators=True).next()
b = tabix.fetch(parser=pysam.asGTF(), multiple_iterators=True).next()
# both iterators are at top of file
self.assertEqual(str(a), str(b))
开发者ID:humburg,项目名称:pysam,代码行数:7,代码来源:tabix_test.py
示例2: testJoinedIterators
def testJoinedIterators(self):
# two iterators working on the same file
tabix = pysam.TabixFile(self.filename)
a = tabix.fetch(parser=pysam.asGTF()).next()
b = tabix.fetch(parser=pysam.asGTF()).next()
# the first two lines differ only by the feature field
self.assertEqual(a.feature, "UTR")
self.assertEqual(b.feature, "exon")
self.assertEqual(re.sub("UTR", "", str(a)), re.sub("exon", "", str(b)))
开发者ID:dpryan79,项目名称:pysam,代码行数:10,代码来源:tabix_test.py
示例3: _TestMultipleIteratorsHelper
def _TestMultipleIteratorsHelper(filename, multiple_iterators):
"""open file within scope, return iterator."""
tabix = pysam.TabixFile(filename)
iterator = tabix.fetch(parser=pysam.asGTF(), multiple_iterators=multiple_iterators)
tabix.close()
return iterator
开发者ID:dpryan79,项目名称:pysam,代码行数:7,代码来源:tabix_test.py
示例4: read_gtf
def read_gtf(lines, scaffolds, contig_prefix):
table = {} # gene_id -> transcript_id -> exon_number -> feature -> [items]
for gtf in text.parse_lines(lines, pysam.asGTF()):
if not filter_gtf_record(gtf):
update_gtf_table(table, gtf, scaffolds, contig_prefix)
return table
开发者ID:UMNPonyClub,项目名称:paleomix,代码行数:7,代码来源:gtf_to_bed.py
示例5: subset_of_feature_in_region
def subset_of_feature_in_region(self, contig=None, start=None, end=None, types=None):
'''
Example:
# return dict of rna and tss id
for rna_tss in tabix.subset_of_feature_in_region(contig="chr2L", end=9839,
types=["transcript_id", 'tss_id']):
print rna_tss
'''
for gtf in pysam.Tabixfile.fetch(self.tabixfile, contig, start, end,
parser=pysam.asGTF()):
if isinstance(types, str):
try:
yield gtf.asDict()[types]
except KeyError:
print 'key \'{0}\' is not found in {1}'.format(t, self.ingtf)
elif isinstance(types, list):
tmp = dict()
for t in types:
try:
tmp.update({t: gtf.asDict()[t]})
except KeyError:
print 'key \'{0}\' is not found in {1}'.format(t, self.ingtf)
yield tmp
开发者ID:soh-i,项目名称:Ivy,代码行数:25,代码来源:gtf.py
示例6: gene_in_region
def gene_in_region(self, contig=None, start=None, end=None):
for gtf in pysam.Tabixfile.fetch(self.tabixfile, contig, start, end,
parser=pysam.asGTF()):
try:
yield gtf.asDict()['gene_name']
except KeyError:
print 'key \'{0}\' is not found in {1}'.format(t, self.ingtf)
开发者ID:soh-i,项目名称:Ivy,代码行数:7,代码来源:gtf.py
示例7: testCopy
def testCopy(self):
a = self.tabix.fetch(parser=pysam.asTuple()).next()
b = copy.copy(a)
self.assertEqual(a, b)
a = self.tabix.fetch(parser=pysam.asGTF()).next()
b = copy.copy(a)
self.assertEqual(a, b)
开发者ID:msto,项目名称:pysam,代码行数:8,代码来源:tabixproxies_test.py
示例8: __init__
def __init__(self, file_path):
file_path = str(file_path)
if not file_path.endswith('.gz'):
if os.path.exists(file_path + '.gz'):
file_path += '.gz'
else:
file_path = self.compress(file_path)
super().__init__(file_path, parser=pysam.asGTF())
开发者ID:jrderuiter,项目名称:ngs-tk,代码行数:9,代码来源:tabix.py
示例9: _open_file
def _open_file(self): # type: (...) -> pysam.TabixFile
# Open gtf file.
gtf_file = pysam.TabixFile(
native_str(self._gtf_path), parser=pysam.asGTF())
# Yield file object and ensure it is closed.
try:
yield gtf_file
finally:
gtf_file.close()
开发者ID:jrderuiter,项目名称:im-fusion,代码行数:10,代码来源:tabix.py
示例10: load_kg_gtf
def load_kg_gtf(gtf_file_name):
f = pysam.TabixFile(gtf_file_name)
gtf = f.fetch(parser=pysam.asGTF())
feats = []
for row in gtf:
attr = parse_gtf_attr(row.attributes)
currfeat = GFFFeature(row.seqname, row.source, row.feature,\
int(row.start), int(row.end), score, \
strand, frame, attr)
feats.append(currfeat)
return feats
开发者ID:weallen,项目名称:Pyseqwill,代码行数:11,代码来源:data.py
示例11: testRead
def testRead(self):
for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
c = self.compare[x]
self.assertEqual(len(c), len(r))
self.assertEqual(list(c), list(r))
self.assertEqual(c, str(r).split("\t"))
self.assertTrue(r.gene_id.startswith("ENSG"))
if r.feature != 'gene':
self.assertTrue(r.transcript_id.startswith("ENST"))
self.assertEqual(c[0], r.contig)
开发者ID:humburg,项目名称:pysam,代码行数:11,代码来源:tabix_test.py
示例12: testSetting
def testSetting(self):
for r in self.tabix.fetch(parser=pysam.asGTF()):
r.contig = r.contig + "_test"
r.source = r.source + "_test"
r.feature = r.feature + "_test"
r.start += 10
r.end += 10
r.score = 20
r.strand = "+"
r.frame = 0
r.attributes = 'gene_id "0001";'
开发者ID:zengfengbo,项目名称:pysam,代码行数:12,代码来源:tabix_test.py
示例13: fetch_gtf
def fetch_gtf(self, contig=None, start=None, end=None):
'''
Returns:
pysam.tabix object
Example:
# return dict of each GTF line
for _ in tabix.fetch_gtf(contig="chr2L", end=9839):
print _.asDict()
'''
for gtf in pysam.Tabixfile.fetch(self.tabixfile, contig, start, end,
parser=pysam.asGTF()):
yield gtf
开发者ID:soh-i,项目名称:Ivy,代码行数:14,代码来源:gtf.py
示例14: overlap_annotation
def overlap_annotation(junc, anno):
ts_set = anno.fetch(junc.chrom, junc.start, junc.end, parser=pysam.asGTF())
exon_set = filter(lambda x: x.feature=='exon', [i for i in ts_set])
for idx,exon in enumerate(exon_set):
if idx == len(exon_set) - 1:
continue
for(std::size_t i = 0; i < exons.size(); i++) {
if(exons[i].start > junction.end) {
//No need to look any further
//the rest of the exons are outside the junction
break;
}
//known junction
if(exons[i].end == junction.start &&
exons[i + 1].start == junction.end) {
junction.known_acceptor = true;
junction.known_donor = true;
junction.known_junction = true;
known_junction = true;
}
else {
if(!junction_start) {
if(exons[i].end >= junction.start) {
junction_start = true;
}
}
if(junction_start) {
if(exons[i].start > junction.start &&
exons[i].end < junction.end) {
junction.exons_skipped.insert(exons[i].name);
}
if(exons[i].start > junction.start) {
junction.donors_skipped.insert(exons[i].start);
}
if(exons[i].end < junction.end) {
junction.acceptors_skipped.insert(exons[i].end);
}
if(exons[i].end == junction.start) {
junction.known_donor = true;
}
//TODO - check for last exon
if(exons[i].start == junction.end) {
junction.known_acceptor = true;
}
}
}
}
开发者ID:CrescentLuo,项目名称:Amphisbaena,代码行数:49,代码来源:JunctionAnnotator.py
示例15: from_gtf
def from_gtf(
cls,
gtf_path, # type: pathlib.Path
chromosomes=None, # type: List[str]
record_filter=None # type: Callable[[Any], bool]
): # type: (...) -> TranscriptReference
"""Builds an Reference instance from the given GTF file."""
# Open gtf file.
gtf = pysam.TabixFile(native_str(gtf_path), parser=pysam.asGTF())
if chromosomes is None:
chromosomes = gtf.contigs
# Build the trees.
transcript_trees = {}
exon_trees = {}
for chrom in chromosomes:
# Collect exons and transcripts.
transcripts = []
exons = []
records = gtf.fetch(reference=chrom)
if record_filter is not None:
records = (rec for rec in records if record_filter(rec))
for record in records:
if record.feature == 'transcript':
transcripts.append(cls._record_to_transcript(record))
elif record.feature == 'exon':
exons.append(cls._record_to_exon(record))
# Build transcript lookup tree.
transcript_trees[chrom] = IntervalTree.from_tuples(
(tr.start, tr.end, tr) for tr in transcripts)
# Build exon lookup tree.
keyfunc = lambda rec: rec.transcript_id
exons = sorted(exons, key=keyfunc)
grouped = itertools.groupby(exons, key=keyfunc)
for tr_id, grp in grouped:
exon_trees[tr_id] = IntervalTree.from_tuples(
(exon.start, exon.end, exon) for exon in grp)
return cls(transcript_trees, exon_trees)
开发者ID:jrderuiter,项目名称:im-fusion,代码行数:49,代码来源:util.py
示例16: strand_info
def strand_info(self, contig=None, start=None, end=None):
"""
Args:
contig(str)='', start(int)='', end=''
Returns:
strand information [+-], or [.] is 404
"""
found = "."
for gtf in pysam.Tabixfile.fetch(self.tabixfile, contig, start, end,
parser=pysam.asGTF()):
if gtf.strand:
found = gtf.strand
break
else:
continue
return found
开发者ID:soh-i,项目名称:Ivy,代码行数:18,代码来源:gtf.py
示例17: main
def main():
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser()
parser.add_argument('--frac', type=float, default=0.0)
parser.add_argument('gtf_file')
args = parser.parse_args()
all_t_ids = set()
t_ids = set()
for f in pysam.tabix_iterator(open(args.gtf_file), pysam.asGTF()):
if f.feature == 'transcript':
t_id = f.transcript_id
frac = float(f.frac)
keep = (frac >= args.frac)
all_t_ids.add(t_id)
if keep:
t_ids.add(t_id)
print str(f)
elif f.feature == 'exon':
t_id = f.transcript_id
assert t_id in all_t_ids
if t_id in t_ids:
print str(f)
开发者ID:balajipandian,项目名称:taco,代码行数:23,代码来源:filter_assembly.py
示例18: readFromFile
def readFromFile(infile):
"""read records from file and return as list."""
result = []
for gff in pysam.tabix_iterator(infile, pysam.asGTF()):
result.append(gff)
return result
开发者ID:prasoonnema,项目名称:cgat,代码行数:6,代码来源:GTF.py
示例19: iterator
def iterator(infile):
"""return a simple iterator over all entries in a file."""
return pysam.tabix_iterator(infile, pysam.asGTF())
开发者ID:prasoonnema,项目名称:cgat,代码行数:3,代码来源:GTF.py
示例20: helper
def helper(self, thread=0):
for contig in self.tabix_reader.contigs[thread::self.num_threads]:
load_genes_helper(self.tabix_reader.fetch(contig, start=0, parser=pysam.asGTF(), multiple_iterators=True),
features=self.features)
开发者ID:NCBI-Hackathons,项目名称:NCBI_August_Hackathon_Push_Button_Genomics_Solution,代码行数:4,代码来源:load_genes.py
注:本文中的pysam.asGTF函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论