本文整理汇总了Python中pysam.asBed函数的典型用法代码示例。如果您正苦于以下问题:Python asBed函数的具体用法?Python asBed怎么用?Python asBed使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了asBed函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: load_gap_intervals
def load_gap_intervals(gap_file):
if gap_file is None: return []
logger.info("Loading the gaps in the genome from %s" % gap_file)
with open(gap_file) as gap_file_fd:
gap_intervals = [SVInterval(it.contig, it.start, it.end, it.name, "gap") for it in
pysam.tabix_file_iterator(gap_file_fd, parser=pysam.asBed())]
return merge_intervals(gap_intervals)
开发者ID:BioinformaticsArchive,项目名称:metasv,代码行数:7,代码来源:vcf_utils.py
示例2: _run
def _run(self, _config, temp):
def keyfunc(bed):
return (bed.contig, bed.name, bed.start)
fastafile = pysam.Fastafile(self._reference)
seqs = collections.defaultdict(list)
with open(self._intervals) as bedfile:
intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items()
for (contig, beds) in sorted(intervals):
beds.sort(key = keyfunc)
for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name):
gene_beds = tuple(gene_beds)
for bed in gene_beds:
seqs[(contig, gene)].append(fastafile.fetch(contig, bed.start, bed.end))
seq = "".join(seqs[(contig, gene)])
if any((bed.strand == "-") for bed in gene_beds):
assert all((bed.strand == "-") for bed in gene_beds)
seq = sequences.reverse_complement(seq)
seqs[(contig, gene)] = seq
temp_file = os.path.join(temp, "sequences.fasta")
with open(temp_file, "w") as out_file:
for ((_, gene), sequence) in sorted(seqs.items()):
fasta.print_fasta(gene, sequence, out_file)
move_file(temp_file, self._outfile)
开发者ID:schae234,项目名称:pypeline,代码行数:28,代码来源:genotype.py
示例3: combineMergedIntervals
def combineMergedIntervals(bedfiles):
'''combine intervals in a collection of bed files.
Overlapping intervals between tracks are merged.
Algorithm:
1. collect all intervals in all tracks into a single track
2. merge overlapping intervals
3. report all intervals that overlap with an interval in each track.
'''
# get all intervals
data_per_contig = collections.defaultdict(list)
for bedfile in bedfiles:
for contig in bedfile.contigs:
i = []
for bed in bedfile.fetch(contig, parser=pysam.asBed()):
i.append((bed.start, bed.end))
data_per_contig[contig].extend(i)
# merge intervals
for contig in data_per_contig.keys():
data_per_contig[contig] = Intervals.combine(data_per_contig[contig])
# filter intervals - take only those present in all bedfiles
for contig, data in data_per_contig.iteritems():
for start, end in data:
if isContainedInAll(contig, start, end, bedfiles):
yield contig, start, end
开发者ID:Charlie-George,项目名称:cgat,代码行数:31,代码来源:beds2beds.py
示例4: _bed_getter
def _bed_getter(bedfile, contig, start=0, end=None, strand=".", dtype="uint16"):
'''Get crosslink profiles from tabix indexed bedGraph/Bed'''
# check the file contains some data for the requested contig
if not contig in bedfile.contigs:
#print "%s not in bedfile" % contig
return pd.Series(dict(), dtype=dtype)
# fetch the rercords from the specificed region
crosslinks = bedfile.fetch(contig, start, end, parser=pysam.asBed())
profile = dict()
check_sum = 0
for base in crosslinks:
try:
correct_strand = strand == "." or base.strand == strand
except AttributeError:
correct_strand = True
if correct_strand:
profile[float(base.start)] = int(base.score)
check_sum += int(base.score)
profile = pd.Series(dict(profile), dtype=dtype)
#if not check_sum == profile.sum():
# raise OverflowError("Check sum failed (%i = %i). Possibly counts exceed specified dtype. Use bigger dtype"
# % (check_sum, profile.sum()))
return profile
开发者ID:sudlab,项目名称:iCLIPlib,代码行数:33,代码来源:getters.py
示例5: fetchProbeFragments
def fetchProbeFragments(probe_bed, digest_bed, outfile,
lookup_out):
digest_fragments = pysam.TabixFile(digest_bed)
bed = Bed.Bed()
with IOTools.openFile(outfile, "w") as outf, \
IOTools.openFile(lookup_out,"w") as lookup:
lookup.write("probe\tfragment\n")
for probe in Bed.iterator(IOTools.openFile(probe_bed)):
frag = digest_fragments.fetch(probe.contig,
probe.start,
probe.end,
parser=pysam.asBed())
frag = list(frag)
if not len(frag) == 1:
E.warn("%i fragments found for probe %s, skipping" %
(len(frag), probe.name))
continue
frag = frag[0]
bed.start = frag.start
bed.end = frag.end
bed.contig = frag.contig
bed["name"] = probe.name
bed["score"] = "."
bed["strand"] = "+"
lookup.write("%s\t%s\n" % (probe.name, frag.name))
outf.write(str(bed) + "\n")
开发者ID:sudlab,项目名称:Capture_C,代码行数:31,代码来源:PipelineCaptureC.py
示例6: getProbeFragments
def getProbeFragments(probe_bed, digest_bed, outfile,
lookup_out):
# First find the length of the restriction enzyme cut, required to obtain the start and end coordinates
# from the pregenerated file.
# First iteration, no comparison
first_iteration = True
length_RE_cut = 0
last_bed = None
for bed_digest in Bed.iterator(IOTools.openFile(digest_bed)):
if(first_iteration):
first_iteration = False
else:
# If they are in the same contig they can be compared
if(bed_digest.contig == last_bed.contig):
length_RE_cut = bed_digest.start - last_bed.end
break
last_bed = bed_digest
digest_fragments = pysam.TabixFile(digest_bed)
bed = Bed.Bed()
with IOTools.openFile(outfile, "w") as outf, \
IOTools.openFile(lookup_out,"w") as lookup:
lookup.write("probe\tfragment\n")
for probe in Bed.iterator(IOTools.openFile(probe_bed)):
frag = digest_fragments.fetch(probe.contig,
probe.start,
probe.end,
parser=pysam.asBed())
frag = list(frag)
if not len(frag) == 1:
E.warn("%i fragments found for probe %s, skipping" %
(len(frag), probe.name))
continue
frag = frag[0]
# The restriction enzyme cut on the left side of the fragment
# is the end site of the last restriction enzyme fragment + 1
# (+1 because according to the manual coordinates are specified
# in 1-origin for the bed start.)
bed.start = frag.start-length_RE_cut+1
bed.end = frag.end+length_RE_cut
bed.contig = frag.contig
bed["name"] = probe.name
bed["score"] = "."
bed["strand"] = "+"
lookup.write("%s\t%s\n" % (probe.name, frag.name))
outf.write(str(bed) + "\n")
开发者ID:sudlab,项目名称:pipeline_capt_c_perl,代码行数:59,代码来源:pipelineCaptCPerl.py
示例7: __init__
def __init__(self, file_path):
file_path = str(file_path)
if not file_path.endswith('.gz'):
if os.path.exists(file_path + '.gz'):
file_path += '.gz'
else:
file_path = self.compress(file_path)
super().__init__(file_path, parser=pysam.asBed())
开发者ID:jrderuiter,项目名称:ngs-tk,代码行数:9,代码来源:tabix.py
示例8: testRead
def testRead( self ):
for x, r in enumerate(self.tabix.fetch( parser = pysam.asBed() )):
c = self.compare[x]
self.assertEqual( "\t".join( c ), str(r) )
self.assertEqual( list(c), list(r) )
self.assertEqual( c[0], r.contig)
self.assertEqual( int(c[1]), r.start)
self.assertEqual( int(c[2]), r.end)
开发者ID:RLuisier,项目名称:RSeQC,代码行数:9,代码来源:tabix_test.py
示例9: _collect_and_validate_regions
def _collect_and_validate_regions(regions):
contigs = _collect_fasta_contigs(regions)
parser = pysam.asBed()
sequences = set()
with open(regions["BED"]) as bedhandle:
for (line_num, line) in enumerate(bedhandle):
line = line.strip()
if not line or line.startswith("#"):
continue
try:
bed = parser(line, len(line))
# Force evaluation of (lazily parsed) properties
bed_start = bed.start
bed_end = bed.end
except ValueError, error:
raise MakefileError(("Error parsing line %i in regions file:\n"
" Path = %r\n Line = %r\n\n%s")
% (line_num + 1, regions["BED"],
line, error))
if len(bed) < 6:
url = "http://genome.ucsc.edu/FAQ/FAQformat.html#format1"
name = repr(bed.name) if len(bed) > 3 else "unnamed record"
raise MakefileError(("Region at line #%i (%s) does not "
"contain the expected number of fields; "
"the first 6 fields are required. C.f. "
"defination at\n %s\n\nPath = %r")
% (line_num, name, url, regions["BED"]))
contig_len = contigs.get(bed.contig)
if contig_len is None:
raise MakefileError(("Regions file contains contig not found "
"in reference:\n Path = %r\n Contig = "
"%r\n\nPlease ensure that all contig "
"names match the reference names!")
% (regions["BED"], bed.contig))
elif not (0 <= int(bed_start) < int(bed_end) <= contig_len):
raise MakefileError(("Regions file contains invalid region:\n"
" Path = %r\n Contig = %r\n"
" Start = %s\n End = %s\n\n"
"Expected 0 <= Start < End <= %i!")
% (regions["BED"], bed.contig, bed.start,
bed.end, contig_len))
elif bed.strand not in "+-":
raise MakefileError(("Regions file contains invalid region: "
" Path = %r\n Line = %i\n Name = %r"
"\nStrand is %r, expected '+' or '-'.")
% (regions["BED"], line_num, bed.name,
bed.strand))
sequences.add(bed.name)
开发者ID:CarlesV,项目名称:paleomix,代码行数:52,代码来源:makefile.py
示例10: read_bed_records
def read_bed_records(filename):
"""Reads a bed-file (i.e. for a set of regions of interest), and returns
a sorted list containing each line as a tuple containing the contig name,
the start position, and the end position."""
regions = []
bed_parser = pysam.asBed()
with open(filename) as bed_file:
for line in bed_file:
line = line.strip()
if not line or line.startswith('#'):
continue
regions.append(bed_parser(line, len(line)))
return regions
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:13,代码来源:summarize_heterozygosity.py
示例11: combineUnmergedIntervals
def combineUnmergedIntervals(foreground, background):
'''combine intervals in a collection of bed files.
Only intervals in the first track are reported.
Algorithm:
1. report all intervals in the first track that overlap with an interval in every other track.
'''
intervals = []
c = 0
for bed in foreground.fetch(parser=pysam.asBed()):
c += 1
if isContainedInAll(bed.contig, bed.start, bed.end, background):
yield bed
开发者ID:Charlie-George,项目名称:cgat,代码行数:16,代码来源:beds2beds.py
示例12: testWrite
def testWrite(self):
for x, r in enumerate(self.tabix.fetch(parser=pysam.asBed())):
c = self.compare[x]
self.assertEqual(c, str(r).split("\t"))
self.assertEqual(list(c), list(r))
r.contig = "test"
self.assertEqual("test", r.contig)
self.assertEqual("test", r[0])
r.start += 1
self.assertEqual(int(c[1]) + 1, r.start)
self.assertEqual(str(int(c[1]) + 1), r[1])
r.end += 1
self.assertEqual(int(c[2]) + 1, r.end)
self.assertEqual(str(int(c[2]) + 1), r[2])
开发者ID:humburg,项目名称:pysam,代码行数:18,代码来源:tabix_test.py
示例13: read_bed_file
def read_bed_file(filename):
"""Parses a (gzip/bzip2 compressed) BED file, and yields
a sequence of records. Comments and empty lines are skipped."""
handle = None
try:
handle = fileutils.open_ro(filename)
parser = pysam.asBed()
for record in text.parse_lines(handle, parser):
# Force evaluation of (lazily parsed) properties
_ = record.start
_ = record.end
yield record
finally:
if handle:
handle.close()
开发者ID:CarlesV,项目名称:paleomix,代码行数:18,代码来源:bedtools.py
示例14: _get_hits
def _get_hits(coords, annotation, parser_type):
"""Retrieve BED information, recovering if BED annotation file does have a chromosome.
"""
if parser_type == "bed":
parser = pysam.asBed()
elif parser_type == "vcf":
parser = pysam.asVCF()
elif parser_type == "tuple":
parser = pysam.asTuple()
elif parser_type is None:
parser = None
else:
raise ValueError("Unexpected parser type: %s" % parser)
chrom, start, end = coords
try:
hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
# catch invalid region errors raised by ctabix
except ValueError:
hit_iter = []
return hit_iter
开发者ID:egafni,项目名称:gemini,代码行数:20,代码来源:annotations.py
示例15: _run
def _run(self, _config, temp):
def _by_name(bed):
return bed.name
fastafile = pysam.Fastafile(self._reference)
seqs = collections.defaultdict(list)
with open(self._bedfile) as bedfile:
bedrecords = text.parse_lines_by_contig(bedfile, pysam.asBed())
for (contig, beds) in sorted(bedrecords.iteritems()):
beds.sort(key=lambda bed: (bed.contig, bed.name, bed.start))
for (gene, gene_beds) in itertools.groupby(beds, _by_name):
gene_beds = tuple(gene_beds)
sequence = self._collect_sequence(fastafile, gene_beds)
seqs[(contig, gene)] = sequence
temp_file = os.path.join(temp, "sequences.fasta")
with open(temp_file, "w") as out_file:
for ((_, gene), sequence) in sorted(seqs.items()):
FASTA(gene, None, sequence).write(out_file)
fileutils.move_file(temp_file, self._outfile)
开发者ID:beeso018,项目名称:paleomix,代码行数:22,代码来源:sequences.py
示例16: main
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("--genotype", help="Tabix indexed pileup file.",
required=True)
parser.add_argument("--intervals", help="BED file.", required=True)
parser.add_argument("--padding", type=int, default=10,
help="Number of bases to expand intervals, when "
"filtering based on adjacent indels [%default]")
parser.add_argument("--min-distance-to-indels", type=int, default=5,
help="Variants closer than this distance from indels "
"are filtered [%default].")
args = parser.parse_args(argv)
genotype = pysam.Tabixfile(args.genotype)
with open(args.intervals) as bed_file:
intervals = text.parse_lines_by_contig(bed_file, pysam.asBed())
for (_, beds) in sorted(intervals.items()):
for (name, sequence) in build_genes(args, genotype, beds):
FASTA(name, None, sequence).write(sys.stdout)
return 0
开发者ID:CarlesV,项目名称:paleomix,代码行数:22,代码来源:sample_pileup.py
示例17: _stat_areas_of_interest
def _stat_areas_of_interest(cls, prefixes):
"""Returns (size, number of named intervals, total number of intervals)
for a set of areas of interest."""
areas_of_interest = {}
for (prefix_name, prefix) in prefixes.iteritems():
prefix_label = prefix.get("Label", prefix_name)
for (roi_name, roi_filename) in prefix.get("RegionsOfInterest", {}).iteritems():
count, names, size = 0, set(), 0
with open(roi_filename) as handle:
parser = pysam.asBed()
for line in handle:
bed = parser(line, len(line))
names.add(bed.name if len(bed) >= 4 else (bed.contig + "*"))
size += (bed.end - bed.start)
count += 1
areas_of_interest[(prefix_name, roi_name)] = {"Size" : size,
"NFeatures" : len(names),
"NIntervals" : count,
"Genome" : prefix["Name"],
"Name" : roi_name,
"Label" : "%s:%s" % (prefix_label, roi_name),
"Path" : roi_filename}
return areas_of_interest
开发者ID:CarlesV,项目名称:paleomix,代码行数:23,代码来源:summary.py
示例18: read_intervals
def read_intervals(filename):
with open(filename) as bed_file:
intervals = text.parse_lines_by_contig(bed_file, pysam.asBed())
for (key, beds) in intervals.iteritems():
bed_tuples = []
for bed in beds:
if len(bed) < 6:
sys.stderr.write(("ERROR: Invalid BED record '%s', must "
"have at least 6 fields ...\n") %
("\\t".join(bed),))
return None
# Transform to a named tuple, as Pysam has a tendency to
# segfault if you do anything wrong
bed = list(bed)[:6] # BED6 only
bed[1] = int(bed[1]) # start
bed[2] = int(bed[2]) # end
bed[4] = int(bed[4]) # score
bed_tuples.append(BEDTuple(*bed))
intervals[key] = bed_tuples
return intervals
开发者ID:health1987,项目名称:paleomix,代码行数:24,代码来源:vcf_to_fasta.py
示例19: iterate_file_uncompressed
def iterate_file_uncompressed(fn):
with open(fn) as f:
return len(list(pysam.tabix_file_iterator(f, parser=pysam.asBed())))
开发者ID:msto,项目名称:pysam,代码行数:3,代码来源:tabix_bench.py
示例20: iterate_parsed_compressed
def iterate_parsed_compressed(fn):
with gzip.open(fn) as f:
return len(list(pysam.tabix_iterator(f, parser=pysam.asBed())))
开发者ID:msto,项目名称:pysam,代码行数:3,代码来源:tabix_bench.py
注:本文中的pysam.asBed函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论