本文整理汇总了Python中pysam.idxstats函数的典型用法代码示例。如果您正苦于以下问题:Python idxstats函数的具体用法?Python idxstats怎么用?Python idxstats使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了idxstats函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testDoubleCalling
def testDoubleCalling(self):
# The following would fail if there is an
# issue with stdout being improperly caught.
retvals = pysam.idxstats(
os.path.join(DATADIR, "ex1.bam"))
retvals = pysam.idxstats(
os.path.join(DATADIR, "ex1.bam"))
开发者ID:grayshiwb512,项目名称:pysam,代码行数:7,代码来源:samtools_test.py
示例2: bam_total_reads
def bam_total_reads(bam_handle, chroms_to_ignore):
"""Count the total number of mapped reads in a BAM file, filtering
the chromosome given in chroms_to_ignore list
"""
if chroms_to_ignore:
import pysam
lines = pysam.idxstats(bam_handle.filename)
lines = toString(lines)
if type(lines) is str:
lines = lines.strip().split('\n')
if len(lines) == 0:
# check if this is a test running under nose
# in which case it will fail.
if len([val for val in sys.modules.keys() if val.find("nose") >= 0]):
sys.stderr.write("To run this code inside a test use disable "
"output buffering `nosetest -s`\n".format(bam_handle.filename))
else:
sys.stderr.write("Error running idxstats on {}\n".format(bam_handle.filename))
tot_mapped_reads = 0
for line in lines:
chrom, _len, nmapped, _nunmapped = line.split('\t')
if chrom not in chroms_to_ignore:
tot_mapped_reads += int(nmapped)
else:
tot_mapped_reads = bam_handle.mapped
return tot_mapped_reads
开发者ID:venuthatikonda,项目名称:deepTools,代码行数:29,代码来源:utilities.py
示例3: bam_blacklisted_reads
def bam_blacklisted_reads(bam_handle, chroms_to_ignore, blackListFileName=None, numberOfProcessors=1):
blacklisted = 0
if blackListFileName is None:
return blacklisted
# Get the chromosome lengths
chromLens = {}
lines = pysam.idxstats(bam_handle.filename)
lines = toString(lines)
if type(lines) is str:
lines = lines.strip().split('\n')
for line in lines:
chrom, _len, nmapped, _nunmapped = line.split('\t')
chromLens[chrom] = int(_len)
bl = GTF(blackListFileName)
regions = []
for chrom in bl.chroms:
if (not chroms_to_ignore or chrom not in chroms_to_ignore) and chrom in chromLens:
for reg in bl.findOverlaps(chrom, 0, chromLens[chrom]):
regions.append([bam_handle.filename, chrom, reg[0], reg[1]])
if len(regions) > 0:
import multiprocessing
if len(regions) > 1 and numberOfProcessors > 1:
pool = multiprocessing.Pool(numberOfProcessors)
res = pool.map_async(bam_blacklisted_worker, regions).get(9999999)
else:
res = [bam_blacklisted_worker(x) for x in regions]
for val in res:
blacklisted += val
return blacklisted
开发者ID:venuthatikonda,项目名称:deepTools,代码行数:33,代码来源:utilities.py
示例4: getNumReads
def getNumReads(bamfile):
'''count number of reads in bam file.
This methods works through pysam.idxstats.
Arguments
---------
bamfile : string
Filename of :term:`bam` formatted file. The file needs
to be indexed.
Returns
-------
nreads : int
Number of reads
'''
lines = pysam.idxstats(bamfile).splitlines()
try:
nreads = sum(
map(int, [x.split("\t")[2]
for x in lines if not x.startswith("#")]))
except IndexError, msg:
raise IndexError(
"can't get number of reads from bamfile, msg=%s, data=%s" %
(msg, lines))
开发者ID:sudlab,项目名称:cgat,代码行数:27,代码来源:BamTools.py
示例5: _setup
def _setup(self, config, temp):
with open(os.path.join(temp, "contigs.table"), "w") as handle:
handle.write("ID\tSize\tNs\tHits\n")
# Workaround for pysam < 0.9 returning list, >= 0.9 returning str
for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
line = line.strip()
if not line:
continue
name, size, hits, _ = line.split('\t')
name = contig_name_to_plink_name(name)
if name is None or not (name.isdigit() or name == 'X'):
continue
if int(size) != self._contigs[name]['Size']:
raise NodeError("TODO: size mismatch")
row = {
'ID': name,
'Size': self._contigs[name]['Size'],
'Ns': self._contigs[name]['Ns'],
'Hits': hits,
}
handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))
CommandNode._setup(self, config, temp)
开发者ID:muslih14,项目名称:paleomix,代码行数:28,代码来源:nuclear.py
示例6: print_sex
def print_sex(bam):
"""
Print sex based on chr x ratio
Args:
bam (str): Path to bam file
"""
idxstats = pysam.idxstats(bam)
chr_ratio = []
# Calculate read / chromosome length ratio per chromosome
for chr in idxstats[0:24]:
chr = chr.strip("\n").split("\t")
chr_length = float(chr[1])
chr_mapped = float(chr[2])
ratio = chr_mapped / chr_length
chr_ratio.append(ratio)
chr_ratio_std = numpy.std(chr_ratio)
chr_ratio_mean = numpy.mean(chr_ratio)
chr_x = idxstats[22].strip("\n").split("\t")
chr_x_ratio = float(chr_x[2]) / float(chr_x[1])
if (chr_x_ratio > chr_ratio_mean - (2 * chr_ratio_std)) and (chr_x_ratio < chr_ratio_mean + (2 * chr_ratio_std)):
print "female"
elif chr_x_ratio < chr_ratio_mean - (2 * chr_ratio_std):
print "male"
else:
print "unkown"
开发者ID:CuppenResearch,项目名称:IAP,代码行数:30,代码来源:determine_sex.py
示例7: main
def main(argv):
fullname = os.path.abspath(argv[1])
bamfile = pysam.Samfile(fullname)
header = bamfile.header
fullname_s = fullname.split("/")
# Populate info dict
info = dict()
info['idsequencing'] = fullname_s[-2].split("seq")[1]
info['filename'] = os.path.basename(fullname)
info['aligner_index'] = fullname_s[-4]
bam_datetime = b = datetime.datetime.fromtimestamp(os.stat(fullname).st_mtime)
info['align_datetime'] = bam_datetime.strftime("%Y-%m-%d %H:%M:%S")
info['aligner'] = header['PG'][0]['PN']
info['command'] = header['PG'][0]['cl']
# Compute total number of aligned reads
stats = pysam.idxstats(fullname)
stats = [el.split("\t") for el in stats]
total_reads = 0
for el in stats:
total_reads += int(el[2])
info['total_reads'] = total_reads
# Format dict entries for MySQL
for i in info.iterkeys():
info[i] = "'" + str(info[i]) + "'"
## Connect to db
try:
conn = mdb.connect('localhost', 'brad', 'Eu23ler1', 'sample_db')
cur = conn.cursor()
except mdb.Error, e:
print "MySQLdb error %d: %s " % (e.args[0] + e.args[1])
开发者ID:bradleycolquitt,项目名称:seqAnalysis,代码行数:35,代码来源:alignment2db.py
示例8: test_idxstats_parse
def test_idxstats_parse():
bam_filename = "./pysam_data/ex2.bam"
idxstats_string = pysam.idxstats(bam_filename, split_lines=False) # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
lines = idxstats_string.splitlines()
for line in lines:
splt = line.split("\t")
_seqname, _seqlen, nmapped, _nunmapped = splt
开发者ID:Bratdaking,项目名称:pysam,代码行数:7,代码来源:test_samtools_python.py
示例9: bamStats
def bamStats(bamfile):
""" Extract average depths + idxstats data from BAM file, return data frame """
istats = pysam.idxstats(bamfile)
result = []
samfile = pysam.Samfile(bamfile, "rb")
for x in istats:
xs = x.replace("\n", "").split("\t")
rec = {
"CHROM": xs[0],
"NT": int(xs[1]),
"MAPPED": int(xs[2]),
"UNMAPPED": int(xs[3]),
"READLEN": 0,
"COVERAGE": 0.0,
}
count = 0
rls = 0.0
try:
for read in samfile.fetch(xs[0]):
rls += float(read.rlen)
count += 1
if count > 10000:
break
rls /= count
rec["READLEN"] = rls
rec["COVERAGE"] = float(rec["MAPPED"] * rec["READLEN"])/float(rec["NT"])
except:
pass
result.append(rec)
if result:
return pandas.DataFrame(result, columns=["CHROM", "NT", "MAPPED", "UNMAPPED", "READLEN", "COVERAGE"])
else:
return pandas.DataFrame(columns=["CHROM", "NT", "MAPPED", "UNMAPPED", "READLEN", "COVERAGE"])
开发者ID:goranrakocevic,项目名称:hap.py,代码行数:35,代码来源:bamstats.py
示例10: get_num_reads
def get_num_reads(filename):
num_reads = 0
try:
num_reads = reduce(lambda x, y: x + y,
[eval('+'.join(l.rstrip('\n').split('\t')[2:])) for l in pysam.idxstats(filename)])
except:
sys.stderr.write("Unable to count reads in file: %s" % filename)
return num_reads
开发者ID:nebiolabs,项目名称:equalize_bam_reads,代码行数:8,代码来源:equalize_bam_files.py
示例11: testReturnValueString
def testReturnValueString(self):
retval = pysam.idxstats(os.path.join(BAM_DATADIR, "ex1.bam"))
if IS_PYTHON3:
self.assertFalse(isinstance(retval, bytes))
self.assertTrue(isinstance(retval, str))
else:
self.assertTrue(isinstance(retval, bytes))
self.assertTrue(isinstance(retval, basestring))
开发者ID:msto,项目名称:pysam,代码行数:8,代码来源:samtools_test.py
示例12: test_idxstats_parse
def test_idxstats_parse():
bam_filename = os.path.join(BAM_DATADIR, "ex2.bam")
# Test pysam 0.9.X style output, which returns a string that needs to be split by \n
idxstats_string = pysam.idxstats(bam_filename, split_lines=False)
lines = idxstats_string.splitlines()
for line in lines:
splt = line.split("\t")
_seqname, _seqlen, nmapped, _nunmapped = splt
开发者ID:msto,项目名称:pysam,代码行数:8,代码来源:test_samtools_python.py
示例13: calculate_samples
def calculate_samples(bamfile, count, ref_prefix=None):
refcounts = dict()
for s in pysam.idxstats(bamfile).split("\n"):
tok = s.rstrip().split("\t")
if ref_prefix is not None and tok[0].startswith(ref_prefix) == False:
continue
refcounts[tok[0]] = int(tok[2])
coef = (count * 1.0) / sum(refcounts.values())
return dict((k, (v, int(np.round(v * coef)))) for k, v in refcounts.items())
开发者ID:brianpenghe,项目名称:bamsam-scripts,代码行数:9,代码来源:resample_bam.py
示例14: bam_total_reads
def bam_total_reads(bam_fname):
"""Count the total number of mapped reads in a BAM file.
Uses the BAM index to do this quickly.
"""
lines = pysam.idxstats(bam_fname)
tot_mapped_reads = 0
for line in lines:
_seqname, _seqlen, nmapped, _nunmapped = line.split()
tot_mapped_reads += int(nmapped)
return tot_mapped_reads
开发者ID:roryk,项目名称:cnvkit,代码行数:11,代码来源:coverage.py
示例15: idxstats
def idxstats(bam_fname, drop_unmapped=False):
"""Get chromosome names, lengths, and number of mapped/unmapped reads.
Use the BAM index (.bai) to get the number of reads and size of each
chromosome. Contigs with no mapped reads are skipped.
"""
handle = StringIO(pysam.idxstats(bam_fname, split_lines=False))
table = pd.read_table(handle, header=None,
names=['chromosome', 'length', 'mapped', 'unmapped'])
if drop_unmapped:
table = table[table.mapped != 0].drop('unmapped', axis=1)
return table
开发者ID:JimmyLiJing,项目名称:cnvkit,代码行数:12,代码来源:samutil.py
示例16: CountRandom
def CountRandom(BamFile):
samIdxStats = pysam.idxstats(BamFile)
samfile = pysam.Samfile(BamFile,"rb")
TotalMapped = samfile.mapped
samfile.close()
countAlign = 0
List = GetChromoList()
for stat in samIdxStats:
if stat.split()[0] in List:
MappedforChromosome = stat.split()[2]
countAlign = countAlign+long(MappedforChromosome)
RandAlign = TotalMapped-countAlign
return [BamFile,{"Random":RandAlign}]
开发者ID:ThomasCarroll,项目名称:ChIPSeqQC,代码行数:13,代码来源:CRIBamRun3.py
示例17: _init_read_number
def _init_read_number(self, bamFile):
"""Compute number of reads and number of mapped reads for CoverageSet"""
# XXX ToDo add number of mapped reads in all cases
# try:
from distutils.version import LooseVersion
if LooseVersion("0.9.0") <= LooseVersion(pysam.__version__):
a = pysam.idxstats(bamFile)
mapped_reads = sum([int(el.split('\t')[2]) for el in a.split('\n')[:len(a.split('\n'))-1]])
unmapped_read = sum([int(el.split('\t')[3]) for el in a.split('\n')[:len(a.split('\n'))-1]])
self.reads = mapped_reads + unmapped_read
self.mapped_reads = mapped_reads
else:
self.reads = reduce(lambda x, y: x + y, [ eval('+'.join(l.rstrip('\n').split('\t')[2:]) ) for l in pysam.idxstats(bamFile)])
self.mapped_reads = None
开发者ID:eggduzao,项目名称:reg-gen,代码行数:14,代码来源:CoverageSet.py
示例18: _validate_mito_bam
def _validate_mito_bam(data, handle, info):
if data.mitochondria is None:
# No mitochondrial data .. skip phylogeny
return True
references = handle.references
min_length = min((len(record.sequence))
for record in data.mitochondria.itervalues())
for bam_contig, bam_length in zip(references, handle.lengths):
if bam_contig not in data.mitochondria:
continue
db_sequence = data.mitochondria[bam_contig].sequence
db_length = len(db_sequence) - db_sequence.count("-")
if bam_length != db_length:
print_err("ERROR: Length of mitochondrial contig %r (%i bp) "
"does not match the length of the corresponding "
"sequence in the database (%i bp)"
% (bam_contig, bam_length, db_length))
return False
if not os.path.exists(handle.filename + '.bai') \
and not os.path.exists(swap_ext(handle.filename, '.bai')):
print_info(' - Attempting to index BAM file %r!'
% (handle.filename,))
pysam.index(handle.filename)
# Workaround for pysam < 0.9 returning list, >= 0.9 returning str
for line in "".join(pysam.idxstats(handle.filename)).split('\n'):
line = line.strip()
if not line:
continue
name, _, hits, _ = line.split('\t')
if (name == bam_contig) and not int(hits):
print_err("WARNING: Mitochondrial BAM (%r) does not contain "
"any reads aligned to contig %r; inferring an "
"phylogeny is not possible."
% (handle.filename, name))
return True
info.mt_contig = bam_contig
info.mt_length = bam_length
info.mt_padding = len(db_sequence) - min_length
return True
return True
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:49,代码来源:database.py
示例19: get_chrom_lengths
def get_chrom_lengths(path_to_bam):
'''
Uses pysam to retrieve chromosome sizes form bam.
Useful helper to use with some pybedtools functions (e.g. coverage), when a bam was mapped with custom genome not available in UCSC.
Input: path to bam file (should be indexed)
Output: dictionary.
Example output:
{'chr4': (0, 1351857), 'chr3L': (0, 24543557), 'chr2L': (0, 23011544), '*': (0, 0), 'chrX': (0, 22422827), 'chr2R': (0, 21146708), 'chr3R': (0, 27905053)}
'''
idx = pysam.idxstats(path_to_bam).splitlines()
chromsizes = {}
for element in idx:
stats = element.split("\t")
chromsizes[stats[0]] = (0, int(stats[1]))
return chromsizes
开发者ID:adomingues,项目名称:NGSpipe2go,代码行数:15,代码来源:piRNABaseTerminalBases.py
示例20: verify_chrom_in_paths
def verify_chrom_in_paths(genome_path, bamfile1, bamfile2, chrom_sizes):
"""Check whether the chromsome info overlap in bamfiles, genome path and chrom size path"""
chrom_bams = set()
chrom_genome = set()
chrom_chrom_sizes = set()
#check bam files
try:
if pysam.__version__ == '0.9.0':
chrom_bams_1 = set([el.split('\t')[0] for el in pysam.idxstats(bamfile1).split('\n')[:len(pysam.idxstats(bamfile1).split('\n'))-1]])
chrom_bams_2 = set([el.split('\t')[0] for el in pysam.idxstats(bamfile2).split('\n')[:len(pysam.idxstats(bamfile2).split('\n'))-1]])
else:
chrom_bams_1 = set(map(lambda x: x.split('\t')[0], pysam.idxstats(bamfile1)))
chrom_bams_2 = set(map(lambda x: x.split('\t')[0], pysam.idxstats(bamfile2)))
except:
return True
chrom_bams = chrom_bams_1 & chrom_bams_2
#check chrom_sizes
with open(chrom_sizes) as f:
for line in f:
line = line.split('\t')
if line[0] not in chrom_chrom_sizes:
chrom_chrom_sizes.add(line[0])
tmp = chrom_bams & chrom_chrom_sizes
if len(tmp) == 0:
return False
#check genome
for s in FastaReader(genome_path):
if s.name not in chrom_genome:
chrom_genome.add(s.name)
if s.name in tmp: #one overlap is sufficient
return True
return len(chrom_bams & chrom_genome & chrom_chrom_sizes) >= 1
开发者ID:Marvin84,项目名称:reg-gen,代码行数:36,代码来源:dpc_help.py
注:本文中的pysam.idxstats函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论