本文整理汇总了Python中pysam.sort函数的典型用法代码示例。如果您正苦于以下问题:Python sort函数的具体用法?Python sort怎么用?Python sort使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sort函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: map_paired_reads
def map_paired_reads(pe1_path, pe2_path, genome_path, output_path):
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
with open(pe1_output, "w") as pe1_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)
with open(pe2_output, "w") as pe2_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "sampe",
"-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout=bwa_file, stderr=null)
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
开发者ID:tnoorasyikin,项目名称:BESST,代码行数:25,代码来源:reads_to_ctg_map.py
示例2: convertSortAlign
def convertSortAlign(output_filename):
# Pregenerate file names for all the intermediate steps (output_filename is the output of the Bowtie2 alignment)
# Note that the file extension is not always given depending on the input conventions of the tool being called
sam_filename=output_filename+'.sam'
bam_filename=output_filename+'.bam'
sorted_filename_input=output_filename+'_sorted'
sorted_filename_output=output_filename+'_sorted.bam'
# convert sam to bam
print 'Converting {0} to {1} . . .'.format(sam_filename,bam_filename)
try:
SamtoBam(sam_filename,bam_filename)
except Exception as ex:
print "Error converting sam to bam ({0}): {1}".format(ex.errno, ex.strerror)
return False
# sort
print 'Sorting {0} -> {1}'.format(bam_filename,sorted_filename_output)
try:
pysam.sort(bam_filename,sorted_filename_input)
except Exception as ex:
print "Error sorting bam file ({0}): {1}".format(ex.errno, ex.strerror)
return False
# index
print 'Indexing {0} . . .'.format(sorted_filename_output)
try:
pysam.index(sorted_filename_output)
except Exception as ex:
print "Error indexing bam file ({0}): {1}".format(ex.errno, ex.strerror)
return False
print
print 'Done'
return True
开发者ID:phageghost,项目名称:align-sanger,代码行数:35,代码来源:alignsanger.py
示例3: map_paired_reads
def map_paired_reads(pe1_path, pe2_path, genome_path, output_path, args):
work_dir = tempfile.mkdtemp( )
genome_db = os.path.join( work_dir, "genome" )
pe1_output = os.path.join( work_dir, "pe1.sai" )
pe2_output = os.path.join( work_dir, "pe2.sai" )
bwa_output = os.path.join( work_dir, "output.sam" )
null = open( "/dev/null" ) #open("/tmp/bwa_out")#
subprocess.check_call( [ "bwa", "index", "-p", genome_db, genome_path ], stderr = null )
with open( pe1_output, "w" ) as pe1_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe1_path ], stdout = pe1_file, stderr = null )
with open( pe2_output, "w" ) as pe2_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe2_path ], stdout = pe2_file, stderr = null )
with open( bwa_output, "w" ) as bwa_file:
subprocess.check_call( [ "bwa", "sampe",
"-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout = bwa_file, stderr = null )
if args.sam:
shutil.move(bwa_output ,output_path+'.sam')
#os.rename(bwa_output ,output_path+'.sam')
else:
sam_to_bam( bwa_output, bwa_output + ".bam" )
if args.sort:
# coordinate sort the file
pysam.sort( bwa_output + ".bam", output_path )
pysam.index(output_path+'.bam')
else:
shutil.move(bwa_output +".bam",output_path+'.bam')
开发者ID:ksahlin,项目名称:generate_assembly,代码行数:35,代码来源:align.py
示例4: sort_by_position
def sort_by_position(bam_file, dir):
## get the file prefix
prefix = ""
prefix_match = re.match(r"(.*).bam", bam_file)
try:
prefix = prefix_match.group(1)
except:
print "Existing: Invalid bam file -i %s" %(bam_file)
sys.exit(2)
# sort the bam file
bam_input = dir + bam_file
sort_bam = dir + prefix + "_sorted"
pysam.sort(bam_input, sort_bam)
sort_bam = sort_bam + ".bam"
# index the sort bam file
pysam.index(sort_bam)
print ""
print "Writing Sorted Bam File : %s" %(sort_bam)
print "Writing Index Sorted Bam File : %s.bai" %(sort_bam)
return sort_bam
开发者ID:chelseaju,项目名称:Pseudogene,代码行数:27,代码来源:bamfile_sorter.py
示例5: run
def run(self):
AbstractAnalysis.run(self) #Call base method to do some logging
localBamFile = os.path.join(self.getLocalTempDir(), "mapping.bam")
localSortedBamFile = os.path.join(self.getLocalTempDir(), "mapping.sorted")
samToBamFile(self.samFile, localBamFile)
pysam.sort(localBamFile, localSortedBamFile)
pysam.index(localSortedBamFile + ".bam")
pysam.faidx(self.referenceFastaFile)
file_header = self.readFastqFile.split(".fastq")[0].split("/")[-1] + "_" + self.referenceFastaFile.split(".fa")[0].split("/")[-1]
consensus_vcf = os.path.join(self.outputDir, file_header + "_Consensus.vcf")
consensus_fastq = os.path.join(self.outputDir, file_header + "_Consensus.fastq")
system("samtools mpileup -Q 0 -uf %s %s | bcftools view -cg - > %s" \
% (self.referenceFastaFile, localSortedBamFile + ".bam", consensus_vcf))
system("vcfutils.pl vcf2fq %s > %s" % (consensus_vcf, consensus_fastq))
system("rm -rf %s" % (self.referenceFastaFile + ".fai"))
formatted_consensus_fastq = os.path.join(self.getLocalTempDir(), "Consensus.fastq")
formatConsensusFastq(consensus_fastq, formatted_consensus_fastq)
system("mv %s %s" % (formatted_consensus_fastq, consensus_fastq))
self.finish()
开发者ID:mitenjain,项目名称:nanopore,代码行数:25,代码来源:consensus.py
示例6: check_bam
def check_bam(bam, p, make_new_index=False):
"""
Sort and index bam file
returns dictionary of chromosome names and lengths
"""
# check if sorted
test_head = pysam.AlignmentFile(bam, 'rb')
chrom_sizes = {}
p = str(p)
for i in test_head.header['SQ']:
chrom_sizes[i['SN']] = int(i['LN'])
try:
test_head.header['HD']['SO']
except KeyError:
print ' sorting bam file'
pysam.sort('[email protected]', p, bam, 'sorted.temp')
os.remove(bam)
os.rename('sorted.temp.bam', bam)
else:
if test_head.header['HD']['SO'] == 'coordinate':
pass
else:
print ' sorting bam file'
pysam.sort('[email protected]', p, bam, 'sorted.temp')
os.remove(bam)
os.rename('sorted.temp.bam', bam)
test_head.close()
# check if indexed
if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
pass
else:
print ' indexing bam file'
pysam.index(bam)
return chrom_sizes
开发者ID:timoast,项目名称:TEpy,代码行数:34,代码来源:tepy.py
示例7: main
def main(infile, snp_dir, max_window=MAX_WINDOW_DEFAULT,
is_paired_end=False, is_sorted=False):
name_split = infile.split(".")
if len(name_split) > 1:
pref = ".".join(name_split[:-1])
else:
pref = name_split[0]
if not is_sorted:
pysam.sort(infile, pref + ".sort")
infile = pref + ".sort"
sort_file_name = pref + ".sort.bam"
else:
sort_file_name = infile
keep_file_name = pref + ".keep.bam"
remap_name = pref + ".to.remap.bam"
remap_num_name = pref + ".to.remap.num.gz"
if is_paired_end:
fastq_names = [pref + ".remap.fq1.gz",
pref + ".remap.fq2.gz"]
else:
fastq_names = [pref + ".remap.fq.gz"]
bam_data = BamScanner(is_paired_end, max_window,
sort_file_name, keep_file_name, remap_name,
remap_num_name, fastq_names, snp_dir)
bam_data.run()
开发者ID:hobrien,项目名称:WASP,代码行数:30,代码来源:find_intersecting_snps.py
示例8: disciple
def disciple(bam_fname, bam_hdr, rg_id, long_qname_table, cigar_v2, in_queue):
"""Create a BAM file from the FASTQ lines fed to it via in_queue
:param bam_fname:
:param bam_hdr:
:param rg_id:
:param long_qname_table:
:param cigar_v2:
:param in_queue:
:return:
"""
logger.debug('Writing to {} ...'.format(bam_fname))
t0 = time.time()
fp = pysam.AlignmentFile(bam_fname, 'wb', header=bam_hdr)
ref_dict = {k['SN']: n for n, k in enumerate(bam_hdr['SQ'])}
cnt = 0
for cnt, (qname, read_data) in enumerate(iter(in_queue.get, __process_stop_code__)):
write_perfect_reads(qname, rg_id, long_qname_table, ref_dict, read_data, cigar_v2, fp)
fp.close()
t1 = time.time()
logger.debug('... {}: {} reads in {:0.2f}s ({:0.2f} t/s)'.format(bam_fname, cnt, t1 - t0, cnt/(t1 - t0)))
logger.debug('Sorting {} -> {}'.format(bam_fname, bam_fname + '.sorted'))
t0 = time.time()
pysam.sort('-m', '1G', '-o', bam_fname + '.sorted', bam_fname)
os.remove(bam_fname)
t1 = time.time()
logger.debug('... {:0.2f}s'.format(t1 - t0))
logger.debug('Shutting down thread for {}'.format(bam_fname))
开发者ID:sbg,项目名称:Mitty,代码行数:30,代码来源:god_aligner.py
示例9: miraligner
def miraligner(args):
"""
Realign BAM hits to miRBAse to get better accuracy and annotation
"""
hairpin, mirna = _download_mirbase(args)
precursors = _read_precursor(args.hairpin, args.sps)
matures = _read_mature(args.mirna, args.sps)
gtf = _read_gtf(args.gtf)
out_dts = []
for bam_fn in args.files:
sample = op.splitext(op.basename(bam_fn))[0]
if bam_fn.endswith("bam") or bam_fn.endswith("sam"):
logger.info("Reading %s" % bam_fn)
bam_fn = _sam_to_bam(bam_fn)
bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort"
pysam.sort("-n", bam_fn, bam_sort_by_n)
reads = _read_bam(bam_sort_by_n + ".bam", precursors)
elif bam_fn.endswith("fasta") or bam_fn.endswith("fa") or bam_fn.endswith("fastq"):
out_file = op.join(args.out, sample + ".premirna")
bam_fn = _filter_seqs(bam_fn)
if args.miraligner:
_cmd_miraligner(bam_fn, out_file, args.sps, args.hairpin)
reads = _read_miraligner(out_file)
else:
if bam_fn.endswith("fastq"):
bam_fn = _convert_to_fasta(bam_fn)
logger.info("Aligning %s" % bam_fn)
if not file_exists(out_file):
pyMatch.Miraligner(hairpin, bam_fn, out_file, 1, 4)
reads = _read_pyMatch(out_file, precursors)
else:
raise ValueError("Format not recognized.")
if not args.miraligner:
reads = _annotate(reads, matures, precursors)
out_file = op.join(args.out, sample + ".mirna")
out_file, dt, dt_pre= _tab_output(reads, out_file, sample)
try:
vcf_file = op.join(args.out, sample + ".vcf")
if not file_exists(vcf_file):
# if True:
create_vcf(dt_pre, matures, gtf, vcf_file)
try:
import vcf
vcf.Reader(filename=vcf_file)
except Exception as e:
logger.warning(e.__doc__)
logger.warning(e.message)
except Exception as e:
# traceback.print_exc()
logger.warning(e.__doc__)
logger.warning(e.message)
if isinstance(dt, pd.DataFrame):
out_dts.append(dt)
if out_dts:
_create_counts(out_dts, args.out)
# _summarize(out_dts)
else:
print "No files analyzed!"
开发者ID:JackieMium,项目名称:seqcluster,代码行数:60,代码来源:__init__.py
示例10: main
def main():
# Read options, args.
parser = optparse.OptionParser()
(options, args) = parser.parse_args()
input_fname, output_fname = args
slots = os.getenv('GALAXY_SLOTS', 1)
pysam.sort("[email protected]%s" % slots, '-o', output_fname, '-O', 'bam', '-T', '.', input_fname)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:7,代码来源:cram_to_bam.py
示例11: run_cufflinks
def run_cufflinks(org_db, num_cpus=4):
"""
run cufflinks program on mapped reads
"""
try:
subprocess.call(["cufflinks"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except:
exit("Please make sure that the `Cufflinks` binary is in your $PATH")
org_name = org_db['short_name']
print("preparing for cufflinks run for organism %s" % org_name)
min_intron_length = 20
min_isoform_frac = 0.25
max_intron_length = org_db['max_intron_len']
result_dir = org_db['read_assembly_dir']
bam_file = "%s/%s_Aligned_mmr_sortbyCoord.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
sys.stdout.write("failed to fetch sorted mmr BAM file for organism: %s, trying to get the mmr file...\n" % org_name)
bam_file = "%s/%s_Aligned_mmr.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
exit("error: failed to fetch mmr BAM file for organism %s" % org_name)
## sorting, indexing the bam file
file_prefix, ext = os.path.splitext(bam_file)
sorted_bam = "%s_sortbyCoord" % file_prefix
sys.stdout.write("trying to sort based by the coordinates with output prefix as: %s\n" % sorted_bam)
if not os.path.isfile("%s.bam" % sorted_bam):
pysam.sort(bam_file, sorted_bam)
bam_file = "%s.bam" % sorted_bam
print('using bam file from %s' % bam_file)
if not os.path.exists(bam_file + ".bai"):
pysam.index(bam_file)
## always use quiet mode to avoid problems with storing log output.
cli_cuff = "cufflinks -q --no-update-check \
-F %.2f \
-I %d \
--min-intron-length %d \
--library-type fr-unstranded \
-p %d \
-o %s \
%s" % (min_isoform_frac, max_intron_length, min_intron_length, num_cpus, result_dir, bam_file)
sys.stdout.write('\trun cufflinks as: %s \n' % cli_cuff)
try:
os.chdir(result_dir)
process = subprocess.Popen(cli_cuff, shell=True)
returncode = process.wait()
if returncode !=0:
raise Exception, "Exit status return code = %i" % returncode
except Exception, e:
print 'Error running cufflinks.\n%s' % str( e )
开发者ID:vipints,项目名称:genomeutils,代码行数:60,代码来源:transcript_assembly.py
示例12: align_to_bam_file
def align_to_bam_file(self, reference_fasta_path, query_fasta_path, output_bam_path, multiple=False, assert_record=None):
logging.debug('LastzRunner: running on reference %s and query %s' %
(reference_fasta_path, query_fasta_path))
output_sam_path = os.path.abspath(
os.path.expandvars(output_bam_path.replace('.bam', '.sam')))
output_bam_unsorted_path = os.path.abspath(
os.path.expandvars(output_bam_path + '.unsorted'))
logging.debug(
'LastzRunner: aligning with output in temporary sam file %s' %
output_sam_path)
with open(output_sam_path, 'w') as output_sam_handler:
for line in self._align(reference_fasta_path, query_fasta_path, multiple):
output_sam_handler.write(line)
logging.debug(
'LastzRunner: transforming sam into unsorted bam file %s' %
output_bam_unsorted_path)
input_sam_handler = pysam.Samfile(output_sam_path, "r")
output_bam_file = pysam.Samfile(
output_bam_unsorted_path, "wb", template=input_sam_handler)
logging.debug(
'LastzRunner: copying from sam file to bam file')
for s in input_sam_handler:
output_bam_file.write(s)
output_bam_file.close()
logging.debug('LastzRunner: sorting and indexing bam file %s' %
output_bam_path)
pysam.sort(output_bam_unsorted_path,
output_bam_path.replace('.bam', ''))
pysam.index(output_bam_path)
开发者ID:achenge07,项目名称:virana,代码行数:35,代码来源:vhom.py
示例13: makeAggregate
def makeAggregate(cells, directory, suffix, output):
"""
Create aggregate sample.
Make an aggregate bam file from a list of cells, sorts and indexes
the file for easy use in IGV. Suffix is required to prevent non
0-padded numbers matching the wrong files. Return final file name.
Parameters
----------
cells : list
List of cell names to create aggregate from.
directory : string
Directory path with the bam files from each cell.
suffix : string
String to match the end of the bam file, use to add file extension
and to anchor the extension after file numbers - this will prevent
cell_4 matching cell_4*.
output : string
String containing output file location.
"""
from glob import glob
cells = set(cells)
fileList = []
for cell in cells:
fileList.append(glob(os.path.join(directory, "*" + cell + suffix))[0])
pysam.cat("-o", output + ".bam", *fileList, catch_stdout=False)
pysam.sort(output + ".bam", output + ".sorted", catch_stdout=False)
pysam.index(output + ".sorted.bam", catch_stdout=False)
return output + ".sorted.bam"
开发者ID:mfiers,项目名称:rat,代码行数:31,代码来源:scatac.py
示例14: saveReads
def saveReads(dataHub, nameExtra=None):
if dataHub.args.save_reads:
logging.info("* Saving relevant reads *")
for i, sample in enumerate(dataHub):
outbam_path = dataHub.args.save_reads
if not outbam_path.endswith(".bam"):
outbam_path += ".bam"
if len(dataHub.samples) > 1:
logging.debug("Using i = {}".format(i))
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))
if nameExtra is not None:
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))
logging.info(" Outpath: {}".format(outbam_path))
# print out just the reads we're interested for use later
bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
for read in sample.reads:
bam_small.write(read)
for read in sample.readStatistics.reads:
bam_small.write(read)
bam_small.close()
sorted_path = outbam_path.replace(".bam", ".sorted")
pysam.sort(outbam_path, sorted_path)
pysam.index(sorted_path+".bam")
开发者ID:MMesbahU,项目名称:svviz,代码行数:29,代码来源:app.py
示例15: bwa_sampe
def bwa_sampe(pe1_path, pe2_path, genome_path, output_path):
print 'Aligning with bwa aln/sampe'
start = time()
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
with open(pe1_output, "w") as pe1_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)
with open(pe2_output, "w") as pe2_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "sampe",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout=bwa_file, stderr=null)
elapsed = time() - start
print 'Time elapsed for bwa aln/sampe: ', elapsed
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
开发者ID:jvhaarst,项目名称:BESST,代码行数:29,代码来源:reads_to_ctg_map.py
示例16: sort_output
def sort_output(outPrefix):
'''Sorts the output file by read coordinate'''
pysam.sort(outPrefix+'.originalSort.bam', outPrefix + '.coordSort')
#os.remove(outPrefix+'.originalSort.tmp.bam')
## Build the bam index for output
pysam.index(outPrefix + '.coordSort.bam')
开发者ID:friedue,项目名称:AlleleSpecific,代码行数:7,代码来源:allelicFilterOct2014.py
示例17: convert_sam_to_bam
def convert_sam_to_bam():
"""
This method should take a newly create .sam file from alignment and
- convert it to .bam
- sort .bam
- index .bam
"""
ids = generate_ids()
for id in ids:
start_time = time()
print 'converting: %s'%id
base_path = os.path.join(SAMPLE_DIR, id)
sam_path = os.path.join(base_path, id+'-bwape.sam')
bam_path = os.path.join(base_path, id+'-bwape.bam')
bam_content = pysam.view('-bS', sam_path)
bam_file = open(bam_path, 'w+')
bam_file.writelines(bam_content)
bam_file.close()
pysam.sort(bam_path, bam_path+'_sorted')
pysam.index(bam_path+'_sorted.bam')
# indexing creates file.bam.bam. Move it to file.bam
bam_call = "mv {0} {1}".format(bam_path+'_sorted.bam', bam_path)
index_call = "mv {0} {1}".format(bam_path+'_sorted.bam.bai',
bam_path+'.bam.bai')
subprocess.call(bam_call, shell=True)
subprocess.call(index_call, shell=True)
end_time = time()
print 'completed: %.3fs'%(end_time-start_time)
开发者ID:wflynny,项目名称:miseq-analysis,代码行数:31,代码来源:bam_utilities.py
示例18: bwa_mem
def bwa_mem(pe1_path, pe2_path, genome_path, threads, output_path):
print 'Aligning with bwa mem'
start = time()
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
stderr_file = open(output_path+'.bwa.1','w')
#null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=stderr_file)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "mem", "-t", threads,
genome_db, pe1_path, pe2_path ],
stdout=bwa_file,
stderr=stderr_file)
elapsed = time() - start
print 'Time elapsed for bwa mem: ', elapsed
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
shutil.rmtree(work_dir)
开发者ID:sidorov-si,项目名称:BESST,代码行数:25,代码来源:reads_to_ctg_map.py
示例19: extend_bam
def extend_bam(bam, type, reheader, size=0):
bam_prefix = bam.split(".bam")[0]
bam_file = pysam.Samfile(bam, 'rb')
tmp_name = bam_prefix + ".bed"
tmp_bed = open(tmp_name, 'w')
size_name = str(size)
if size == 0 : size_name = "insert"
out_name = "_".join([bam_prefix, type, size_name]) + ".bam"
out_bam = open(out_name, 'w')
#pdb.set_trace()
## Convert BAM to temporary BED
try:
print "BAM to BED..."
if type=="extend":
bamToFragmentBed(bam_file, tmp_bed, size)
elif type=="dyad":
trimToDyad(bam_file, tmp_bed, size)
except:
print "BAM to BED conversion failed."
print ">> " + ":".join(sys.exc_info()[1])
tmp_bed.close()
out_bam.close()
os.remove(tmp_name)
return
else:
print "BAM to BED conversion successful."
tmp_bed.close()
#out_bam.close()
## Convert tmp bed to bam
bedToBam(tmp_name, out_name)
## Replace header
if reheader:
cmd_args1 = ['samtools', 'view', '-h', bam]
cmd_args2 = ['samtools', 'reheader', '-', out_name]
tmp_name = bam_prefix + "_tmp"
tmp = open(tmp_name, 'w')
try:
print "Reheader..."
p1 = Popen(cmd_args1, stdout=PIPE)
p2 = Popen(cmd_args2, stdin=p1.stdout, stdout=tmp)
p2.wait()
except:
print "Failed reheader"
tmp.close()
os.remove(tmp_name)
return
else:
#os.remove(bam)
tmp.close()
#os.rename(tmp_name, out_name)
print "Sorting..."
pysam.sort(out_name, out_name + "_sort")
os.rename(out_name + "_sort.bam", out_name)
pysam.index(out_name)
开发者ID:aruncoorg,项目名称:seqAnalysis,代码行数:59,代码来源:readMidDistances.py
示例20: run_mmr
def run_mmr(org_name, read_map_dir, threads=3):
"""
a pythonic wrapper for multiple mapper resolution program
@args org_name: Organism name, example case A_thaliana
@type org_name: str
@args read_map_dir: directory where the STAR bam (aligned reads) file located
@type read_map_dir: str
@args threads: number of threads to use for the run (default: 3)
@type threads: int
"""
import pysam
try:
subprocess.call(["mmr"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except:
exit("Please make sure that the `mmr` binary is in your $PATH")
## mmr works well with bam file sorted by read id
bam_file = "%s/%s_Aligned.sortedByName.out.bam" % (read_map_dir, org_name)
if not os.path.isfile(bam_file):
sys.stdout.write(
"warning: failed to fetch read id sorted BAM file for organism: %s, trying to get the raw alignment file\n"
% org_name
)
bam_file = "%s/%s_Aligned.out.bam" % (read_map_dir, org_name) ## unsorted bam file from STAR output
if not os.path.isfile(bam_file):
exit("error: failed to fetch STAR read alignment file for %s %s\n" % (org_name, bam_file))
## sorting bam file
sorted_bam = "%s/%s_Aligned.sortedByName.out" % (read_map_dir, org_name)
if not os.path.isfile("%s.bam" % sorted_bam):
sys.stdout.write("trying to sort based by read id with output prefix as: %s\n" % sorted_bam)
pysam.sort("-n", bam_file, sorted_bam)
bam_file = "%s.bam" % sorted_bam
sys.stdout.write("using bam file from %s\n" % bam_file)
outFile = "%s/%s_Aligned_mmr.bam" % (read_map_dir, org_name)
iterations = 3
## provide a bam file sorted by read id
cli_mmr = "module load gcc; mmr -b -p -V -t %d -I %d -o %s %s" % (threads, iterations, outFile, bam_file)
try:
sys.stdout.write("\trun MMR as: %s \n" % cli_mmr)
## changing the working dir to run mmr
os.chdir(read_map_dir)
process = subprocess.Popen(cli_mmr, shell=True)
returncode = process.wait()
if returncode != 0:
raise Exception, "Exit status return code = %i" % returncode
sys.stdout.write("MMR run finished. result file stored at %s\n" % outFile)
except Exception, e:
exit("Error running MMR.\n%s" % str(e))
开发者ID:ricket1978,项目名称:genomeutils,代码行数:59,代码来源:star_align_rna.py
注:本文中的pysam.sort函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论