本文整理汇总了Python中pypeflow.data.fn函数的典型用法代码示例。如果您正苦于以下问题:Python fn函数的具体用法?Python fn怎么用?Python fn使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fn函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: quiver_reseq
def quiver_reseq(self):
config = self.config
sge_option_ck = config["sge_option_ck"]
sge_option_qv = config["sge_option_qv"]
big_tmpdir = config["big_tmpdir"]
try:
os.makedirs("quiver_reseq")
except:
pass
SEYMOUR_HOME = config["SEYMOUR_HOME"]
if SEYMOUR_HOME == None:
print "SEYMOUR_HOME not set, bypass quiver consensus step"
return 0
job_name = "QuiverReq_"+str(uuid.uuid4())
quiver_script = """#!/bin/bash
export SEYMOUR_HOME=%s
. $SEYMOUR_HOME/etc/setup.sh
cd %s/quiver_reseq
cp ../CA/9-terminator/asm.ctg.fasta .
referenceUploader -c -p $PWD -n assembly -f asm.ctg.fasta --skipIndexUpdate
compareSequences.py --info --useGuidedAlign --algorithm=blasr --nproc=24 --noXML --h5mode=w --h5fn=out.cmp.h5 --minAccuracy=0.70 --minLength=200 -x -nCandidates 50 -x -minMatch 12 -x -bestn 1 -x -minPctIdentity 70.0 %s assembly/
loadPulses %s out.cmp.h5 -metrics DeletionQV,IPD,InsertionQV,PulseWidth,QualityValue,MergeQV,SubstitutionQV,DeletionTag -byread
cmph5tools.py sort out.cmp.h5 --tmp %s
variantCaller.py --algorithm quiver -j 16 --referenceFilename assembly/sequence/assembly.fasta --parameters best -o output.gff -o output.fasta -o output.fastq -q 0 -X 80 -x 5 --mapQvThreshold 0 out.cmp.h5
""" % (SYMOURE_HOME, os.getcwd(), fn(self.input_fofn), fn(self.input_fofn), big_tmpdir)
with open("scripts/quiver_reseq.sh", "w") as f:
print >>f, quiver_script
os.system( """qsub -sync y {sge_option_qv} -N {jn} -o {cwd}/sge_log -j y -S /bin/bash scripts/quiver_reseq.sh """.format(jn=job_name, cwd=os.getcwd(), sge_option_qv = sge_option_qv) )
with open("scripts/quiver_done.sh","w") as f:
print >>f, "echo done > %s" % fn(self.Quiver_done)
os.system("bash scripts/quiver_done.sh")
开发者ID:lhon,项目名称:HBAR-DTK,代码行数:34,代码来源:HBAR_WF.py
示例2: build_rdb
def build_rdb(self):
input_fofn = self.input_fofn
input_fofn_fn = fn(input_fofn)
rdb_build_done = self.rdb_build_done
work_dir = self.parameters["work_dir"]
config = self.parameters["config"]
sge_option_da = config["sge_option_da"]
install_prefix = config["install_prefix"]
length_cutoff = config["length_cutoff"]
pa_HPCdaligner_option = config["pa_HPCdaligner_option"]
pa_DBsplit_option = config["pa_DBsplit_option"]
script_fn = os.path.join( work_dir, "prepare_db.sh" )
with open(script_fn,"w") as script_file:
script_file.write("source {install_prefix}/bin/activate\n".format(install_prefix = install_prefix))
script_file.write("cd {work_dir}\n".format(work_dir = work_dir))
script_file.write("for f in `cat {input_fofn_fn}`; do fasta2DB raw_reads $f; done\n".format(input_fofn_fn = input_fofn_fn))
script_file.write("DBsplit %s raw_reads\n" % pa_DBsplit_option)
script_file.write("HPCdaligner %s -H%d raw_reads > run_jobs.sh\n" % (pa_HPCdaligner_option, length_cutoff))
script_file.write("touch {rdb_build_done}\n".format(rdb_build_done = fn(rdb_build_done)))
job_name = self.URL.split("/")[-1]
job_name += "-"+str(uuid.uuid1())[:8]
job_data = {"job_name": job_name,
"cwd": os.getcwd(),
"sge_option": sge_option_da,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn(rdb_build_done), task=self, job_name=job_name )
开发者ID:acdarby,项目名称:FALCON,代码行数:31,代码来源:fc_run.py
示例3: query_filter
def query_filter(self):
#print self.parameters
#print [fn(f) for f in self.inputs.values()]
output_dir = self.parameters["mapping_data_dir"]
q_sn = self.parameters["q_sn"]
script_fn = os.path.join( output_dir, "qf%05d.sh" % q_sn)
qf_fofn = os.path.join( output_dir, "qf%05d_input.fofn" % (q_sn, ) )
install_prefix = config["install_prefix"]
sge_option_qf = config["sge_option_qf"]
length_cutoff_pr = config["length_cutoff_pr"]
bestn = config["bestn"]
with open(script_fn,"w") as script_file:
script_file.write("source {install_prefix}/bin/activate\n".format(install_prefix = install_prefix))
script_file.write("""find %s -name "q[0-9]*_t[0-9]*.m4" > %s\n""" % (output_dir, qf_fofn))
script_file.write("""query_m4_filtering.py %s 1 0 %d %d %s\n""" % (qf_fofn, bestn, length_cutoff_pr, fn(self.qf_out) ))
script_file.write("""touch %s\n""" % fn(self.job_done) )
job_name = self.URL.split("/")[-1]
job_data = {"job_name": job_name,
"cwd": os.getcwd(),
"sge_option": sge_option_qf,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn(self.job_done), task=self, job_name=job_name )
开发者ID:chad388,项目名称:HBAR-DTK,代码行数:26,代码来源:HBAR_WF2.py
示例4: run_daligner
def run_daligner(self):
daligner_cmd = self.parameters["daligner_cmd"]
job_uid = self.parameters["job_uid"]
cwd = self.parameters["cwd"]
job_done = self.job_done
config = self.parameters["config"]
sge_option_da = config["sge_option_da"]
install_prefix = config["install_prefix"]
db_prefix = self.parameters["db_prefix"]
nblock = self.parameters["nblock"]
script_dir = os.path.join( cwd )
script_fn = os.path.join( script_dir , "rj_%s.sh" % (job_uid))
script = []
script.append( "set -vex" )
script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = fn(job_done)) )
script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
script.append( "cd %s" % cwd )
script.append( "hostname" )
script.append( "date" )
script.append( "time "+ daligner_cmd )
for p_id in xrange( 1, nblock+1 ):
script.append( """ for f in `find $PWD -wholename "*%s.%d.%s.*.*.las"`; do ln -sf $f ../m_%05d; done """ % (db_prefix, p_id, db_prefix, p_id) )
script.append( "touch {job_done}".format(job_done = fn(job_done)) )
with open(script_fn,"w") as script_file:
script_file.write("\n".join(script))
job_data = make_job_data(self.URL, script_fn)
job_data["sge_option"] = sge_option_da
run_script(job_data, job_type = config["job_type"])
wait_for_file(fn(job_done), task=self, job_name=job_data['job_name'])
开发者ID:evolvedmicrobe,项目名称:FALCON,代码行数:35,代码来源:run.py
示例5: run_merge_task
def run_merge_task(self):
p_script_fn = self.parameters["merge_script"]
job_id = self.parameters["job_id"]
cwd = self.parameters["cwd"]
job_done = self.job_done
config = self.parameters["config"]
sge_option_la = config["sge_option_la"]
install_prefix = config["install_prefix"]
script_dir = os.path.join( cwd )
script_fn = os.path.join( script_dir , "rp_%05d.sh" % (job_id))
script = []
script.append( "set -vex" )
script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = fn(job_done)) )
script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
script.append( "cd %s" % cwd )
script.append( "hostname" )
script.append( "date" )
script.append( "time bash %s" % p_script_fn )
script.append( "touch {job_done}".format(job_done = fn(job_done)) )
with open(script_fn,"w") as script_file:
script_file.write("\n".join(script))
job_data = make_job_data(self.URL, script_fn)
job_data["sge_option"] = sge_option_la
run_script(job_data, job_type = config["job_type"])
wait_for_file(fn(job_done), task=self, job_name=job_data['job_name'])
开发者ID:evolvedmicrobe,项目名称:FALCON,代码行数:29,代码来源:run.py
示例6: check_r_cns_task
def check_r_cns_task(self):
with open(fn(self.pread_fofn), "w") as f:
fn_list = glob.glob("%s/preads/out*.fa" % rawread_dir)
fn_list.sort()
for fa_fn in fn_list:
print >>f, fa_fn
os.system("touch %s" % fn(self.cns_done))
开发者ID:JinfengChen,项目名称:Pacbio,代码行数:7,代码来源:fc_run.py
示例7: gather_qm4
def gather_qm4(self):
all_qf = [fn(o) for o in self.inputs.values()]
all_qf.sort()
with open( fn( self.qm4_fofn ),"w" ) as f:
for m4f in all_qf:
if m4f.endswith("m4"):
print >> f, m4f
开发者ID:PacificBiosciences,项目名称:HBAR-DTK,代码行数:7,代码来源:HBAR_WF2.py
示例8: blasr_align
def blasr_align(self):
q_fofn = self.query_fofn
target_fa = self.target_fa
target_sa = self.target_sa
output_dir = self.parameters["mapping_data_dir"]
q_sn = self.parameters["q_sn"]
t_sn = self.parameters["t_sn"]
out_fn = os.path.join( output_dir, "q%05d_t%05d.m4" % (q_sn, t_sn))
script_fn = os.path.join( output_dir, "q%05d_t%05d.sh" % (q_sn, t_sn))
config = self.parameters["config"]
blasr_opt = config["blasr_opt"]
sge_option_dm = config["sge_option_dm"]
install_prefix = config["install_prefix"]
#blasr_cmd = """blasr %s %s -sa %s -noSplitSubreads -bestn 16 -nCandidates 32 -maxScore -1000 -minMatch 12 -maxLCPLength 15 -nproc 16 -m 4 -out %s""" % (fn(q_fofn), fn(target_fa), fn(target_sa), out_fn)
blasr_cmd = """blasr {query} {target} -sa {target_sa} {blasr_opt} -noSplitSubreads -m 4 -out {out_fn}"""
blasr_cmd = blasr_cmd.format( query=fn(q_fofn), target=fn(target_fa), target_sa=fn(target_sa), blasr_opt = blasr_opt, out_fn=out_fn )
with open(script_fn,"w") as script_file:
script_file.write("source {install_prefix}/bin/activate\n".format(install_prefix = install_prefix))
script_file.write(blasr_cmd+"\n")
script_file.write("touch %s" % fn(self.job_done))
job_name = self.URL.split("/")[-1]
job_data = {"job_name": job_name,
"cwd": os.getcwd(),
"sge_option": sge_option_dm,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn(self.job_done), task=self, job_name=job_name )
开发者ID:chad388,项目名称:HBAR-DTK,代码行数:34,代码来源:HBAR_WF2.py
示例9: run_falcon_asm_task
def run_falcon_asm_task(self):
wd = self.parameters["wd"]
config = self.parameters["config"]
install_prefix = config["install_prefix"]
pread_dir = self.parameters["pread_dir"]
script_dir = os.path.join( wd )
script_fn = os.path.join( script_dir ,"run_falcon_asm.sh" )
script = []
script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
script.append( "cd %s" % pread_dir )
script.append( "DB2Falcon preads")
script.append( "cd %s" % wd )
script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
overlap_filtering_setting = config["overlap_filtering_setting"]
length_cutoff_pr = config["length_cutoff_pr"]
script.append( """fc_ovlp_filter.py --fofn las.fofn %s --min_len %d > preads.ovl""" %\
(overlap_filtering_setting, length_cutoff_pr) )
script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr)
script.append( """fc_graph_to_contig.py""" )
script.append( """touch %s\n""" % fn(self.falcon_asm_done))
with open(script_fn, "w") as script_file:
script_file.write("\n".join(script))
job_name = self.URL.split("/")[-1]
job_name += "-"+str(uuid.uuid4())[:8]
job_data = {"job_name": job_name,
"cwd": wd,
"sge_option": config["sge_option_fc"],
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn(self.falcon_asm_done), task=self, job_name=job_name )
开发者ID:JinfengChen,项目名称:Pacbio,代码行数:35,代码来源:fc_run.py
示例10: run_merge_task
def run_merge_task(self):
p_script_fn = self.parameters["merge_script"]
job_id = self.parameters["job_id"]
cwd = self.parameters["cwd"]
config = self.parameters["config"]
install_prefix = config["install_prefix"]
script_dir = os.path.join( cwd )
script_fn = os.path.join( script_dir , "rp_%05d.sh" % (job_id))
log_path = os.path.join( script_dir, "rp_%05d.log" % (job_id))
script = []
script.append( "cd %s" % cwd )
script.append( "hostname >> %s" % log_path )
script.append( "date >> %s" % log_path )
# Jason's time path does not work on Centos (where time has no path!?!)
# this code is also rather fugly - the time output is not logged - encapsulate in brackets
script.append( "(time bash %s) >> %s 2>&1 " % (p_script_fn, log_path) )
script.append( "touch %s" % fn( self.job_done ) )
with open(script_fn,"w") as script_file:
script_file.write("\n".join(script))
job_name = self.URL.split("/")[-1]
job_name += "-"+str(uuid.uuid4())[:8]
job_data = {"job_name": job_name,
"cwd": cwd,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn( self.job_done ), task=self, job_name=job_name )
开发者ID:sagrudd,项目名称:FALCON,代码行数:31,代码来源:fc_run.py
示例11: run_daligner
def run_daligner(self):
daligner_cmd = self.parameters["daligner_cmd"]
job_uid = self.parameters["job_uid"]
cwd = self.parameters["cwd"]
config = self.parameters["config"]
install_prefix = config["install_prefix"]
db_prefix = self.parameters["db_prefix"]
nblock = self.parameters["nblock"]
script_dir = os.path.join( cwd )
script_fn = os.path.join( script_dir , "rj_%s.sh" % (job_uid))
log_path = os.path.join( script_dir, "rj_%s.log" % (job_uid))
script = []
script.append( "cd %s" % cwd )
script.append( "hostname >> %s" % log_path )
script.append( "date >> %s" % log_path )
# Jason's time path does not work on Centos (where time has no path!?!)
# this code is also rather fugly - the time output is not logged - encapsulate in brackets
script.append( "(time "+ daligner_cmd + ") >> %s 2>&1 " % log_path )
script.append( "touch %s" % fn( self.job_done ) )
for p_id in xrange( 1, nblock+1 ):
script.append( """ for f in `find $PWD -wholename "*%s.%d.%s.*.*.las"`; do ln -sf $f ../m_%05d; done """ % (db_prefix, p_id, db_prefix, p_id) )
with open(script_fn,"w") as script_file:
script_file.write("\n".join(script))
job_name = self.URL.split("/")[-1]
job_name += "-"+str(uuid.uuid4())[:8]
job_data = {"job_name": job_name,
"cwd": cwd,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn( self.job_done ), task=self, job_name=job_name )
开发者ID:sagrudd,项目名称:FALCON,代码行数:35,代码来源:fc_run.py
示例12: create_daligner_tasks
def create_daligner_tasks(wd, db_prefix, db_file, rdb_build_done, config, pread_aln=False):
job_id = 0
tasks = []
tasks_out = {}
nblock = 1
new_db = True
if os.path.exists(fn(db_file)):
with open(fn(db_file)) as f:
for l in f:
l = l.strip().split()
if l[0] == "blocks" and l[1] == "=":
nblock = int(l[2])
new_db = False
break
for pid in xrange(1, nblock + 1):
support.make_dirs("%s/m_%05d" % (wd, pid))
with open(os.path.join(wd, "run_jobs.sh")) as f:
for l in f:
l = l.strip()
job_uid = hashlib.md5(l).hexdigest()
job_uid = job_uid[:8]
l = l.split()
if l[0] == "daligner":
support.make_dirs(os.path.join(wd, "./job_%s" % job_uid))
call = "cd %s/job_%s;ln -sf ../.%s.bps .; ln -sf ../.%s.idx .; ln -sf ../%s.db ." % (
wd,
job_uid,
db_prefix,
db_prefix,
db_prefix,
)
rc = os.system(call)
if rc:
raise Exception("Failure in system call: %r -> %d" % (call, rc))
job_done = makePypeLocalFile(os.path.abspath("%s/job_%s/job_%s_done" % (wd, job_uid, job_uid)))
if pread_aln == True:
l[0] = "daligner_p"
parameters = {
"daligner_cmd": " ".join(l),
"cwd": os.path.join(wd, "job_%s" % job_uid),
"job_uid": job_uid,
"config": config,
"nblock": nblock,
"db_prefix": db_prefix,
}
make_daligner_task = PypeTask(
inputs={"rdb_build_done": rdb_build_done},
outputs={"job_done": job_done},
parameters=parameters,
TaskType=PypeThreadTaskBase,
URL="task://localhost/d_%s_%s" % (job_uid, db_prefix),
)
daligner_task = make_daligner_task(run_daligner)
tasks.append(daligner_task)
tasks_out["ajob_%s" % job_uid] = job_done
job_id += 1
return tasks, tasks_out
开发者ID:aleksandarmihajlovic,项目名称:FALCON,代码行数:60,代码来源:run.py
示例13: run_falcon_asm_task
def run_falcon_asm_task(self):
wd = self.parameters["wd"]
config = self.parameters["config"]
install_prefix = config["install_prefix"]
pread_dir = self.parameters["pread_dir"]
script_dir = os.path.join( wd )
script_fn = os.path.join( script_dir ,"run_falcon_asm.sh" )
script = []
script.append( "set -vex" )
script.append( "trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done) )
script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
script.append( "cd %s" % pread_dir )
# Write preads4falcon.fasta, in 1-preads_ovl:
script.append( "DB2Falcon -U preads")
script.append( "cd %s" % wd )
script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
overlap_filtering_setting = config["overlap_filtering_setting"]
length_cutoff_pr = config["length_cutoff_pr"]
script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\
(fn(db_file), overlap_filtering_setting, length_cutoff_pr) )
script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr) # TODO: drop this logfile
# Write 'p_ctg.fa' and 'a_ctg.fa':
script.append( """fc_graph_to_contig.py""" )
script.append( """touch %s""" % fn(self.falcon_asm_done))
with open(script_fn, "w") as script_file:
script_file.write("\n".join(script))
job_data = make_job_data(self.URL, script_fn)
job_data["sge_option"] = config["sge_option_fc"]
run_script(job_data, job_type = config["job_type"])
wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_data['job_name'])
开发者ID:evolvedmicrobe,项目名称:FALCON,代码行数:34,代码来源:run.py
示例14: build_pdb
def build_pdb(self):
input_fofn = self.pread_fofn
input_fofn_fn = fn(input_fofn)
pdb_build_done = self.pdb_build_done
work_dir = self.parameters["work_dir"]
config = self.parameters["config"]
sge_option_pda = config["sge_option_pda"]
install_prefix = config["install_prefix"]
length_cutoff = config["length_cutoff_pr"]
ovlp_HPCdaligner_option = config["ovlp_HPCdaligner_option"]
ovlp_DBsplit_option = config["ovlp_DBsplit_option"]
script_fn = os.path.join( work_dir, "prepare_pdb.sh" )
with open(script_fn,"w") as script_file:
script_file.write("set -vex\n")
script_file.write("trap 'touch {pdb_build_done}.exit' EXIT\n".format(pdb_build_done = fn(pdb_build_done)))
script_file.write("source {install_prefix}/bin/activate\n".format(install_prefix = install_prefix))
script_file.write("cd {work_dir}\n".format(work_dir = work_dir))
script_file.write("hostname\n")
script_file.write("date\n")
script_file.write("fasta2DB -v preads -f{input_fofn_fn}\n".format(input_fofn_fn = input_fofn_fn))
script_file.write("DBsplit -x%d %s preads\n" % (length_cutoff, ovlp_DBsplit_option))
script_file.write("HPCdaligner %s -H%d preads > run_jobs.sh\n" % (ovlp_HPCdaligner_option, length_cutoff))
script_file.write("touch {pdb_build_done}\n".format(pdb_build_done = fn(pdb_build_done)))
job_data = make_job_data(self.URL, script_fn)
job_data["sge_option"] = sge_option_pda
run_script(job_data, job_type = config["job_type"])
wait_for_file(fn(pdb_build_done), task=self, job_name=job_data['job_name'])
开发者ID:evolvedmicrobe,项目名称:FALCON,代码行数:32,代码来源:run.py
示例15: split_fofn_task
def split_fofn_task(self):
query_chunk_size = self.parameters["config"]["q_chunk_size"]
target_chunk_size = self.parameters["config"]["t_chunk_size"]
split_fofn( fn(self.query_fa_fofn), self.parameters["dist_map_dir"], "query", query_chunk_size,
incremental = True, allow_fraction = True)
split_fofn( fn(self.target_fa_fofn), self.parameters["dist_map_dir"], "target", target_chunk_size,
incremental = True, allow_fraction = True)
os.system("touch %s" % fn(self.split_fofn_done))
开发者ID:chad388,项目名称:HBAR-DTK,代码行数:9,代码来源:HBAR_WF2.py
示例16: h5fofn_to_fasta
def h5fofn_to_fasta(self):
os.system("h5fofn_to_fasta.py %s %s --min_length 500 --min_seed_length %d --min_read_score %f" %\
(fn(self.input_fofn),
self.parameters["fasta_dir"],
self.parameters["min_length"],
self.parameters["min_read_score"]))
os.system("""find %s -name "*_t.fa" | sort > %s""" % (self.parameters["fasta_dir"], fn(self.target_fa_fofn)))
os.system("""find %s -name "*_q.fa" | sort > %s""" % (self.parameters["fasta_dir"], fn(self.query_fa_fofn)))
os.system("touch %s" % fn(self.fasta_dump_done))
开发者ID:chad388,项目名称:HBAR-DTK,代码行数:9,代码来源:HBAR_WF2.py
示例17: get_phased_reads
def get_phased_reads(self):
q_id_map_fn = fn(self.q_id_map_file)
vmap_fn = fn(self.vmap_file)
p_variant_fn = fn(self.phased_variant_file)
ctg_id = parameters["ctg_id"]
phased_read_fn = fn(self.phased_read_file)
rid_map = {}
with open(q_id_map_fn) as f:
for l in f:
l = l.strip().split()
rid_map[int(l[0])] = l[1]
read_to_variants = {}
variant_to_reads = {}
with open(vmap_fn) as f:
for l in f:
l = l.strip().split()
variant = "_".join(l[:3])
read_id = int(l[3])
read_to_variants.setdefault(read_id, set())
read_to_variants[read_id].add(variant)
variant_to_reads.setdefault(variant, set())
variant_to_reads[variant].add(read_id)
variant_to_phase = {}
with open(p_variant_fn) as f:
for l in f:
"""line format example: V 1 6854 6854_A_A 6854_A_G 6854 22781"""
l = l.strip().split()
if l[0] != "V":
continue
pb_id = int(l[1])
variant_to_phase[ l[3] ] = (pb_id, 0)
variant_to_phase[ l[4] ] = (pb_id, 1)
with open(phased_read_fn, "w") as out_f:
for r in read_to_variants:
vl = {}
pl = set()
for v in list( read_to_variants[r] ):
if v in variant_to_phase:
p = variant_to_phase[v]
vl[ p ] = vl.get(p, 0) + 1
pl.add(p[0])
pl = list(pl)
pl.sort()
for p in pl:
if vl.get( (p,0), 0) - vl.get( (p,1), 0) > 1:
print >> out_f, r, ctg_id, p, 0, vl.get( (p,0), 0), vl.get( (p,1), 0), rid_map[r]
elif vl.get( (p,1), 0) - vl.get( (p,0), 0) > 1:
print >> out_f, r, ctg_id, p, 1, vl.get( (p,0), 0), vl.get( (p,1), 0), rid_map[r]
开发者ID:pb-jchin,项目名称:FALCON_unzip,代码行数:57,代码来源:fc_phasing.py
示例18: build_rdb
def build_rdb(self):
input_fofn = self.input_fofn
input_fofn_fn = fn(input_fofn)
rdb_build_done = self.rdb_build_done
work_dir = self.parameters["work_dir"]
config = self.parameters["config"]
sge_option_da = config["sge_option_da"]
install_prefix = config["install_prefix"]
length_cutoff = config["length_cutoff"]
pa_HPCdaligner_option = config["pa_HPCdaligner_option"]
pa_DBsplit_option = config["pa_DBsplit_option"]
openending = config["openending"]
script_fn = os.path.join( work_dir, "prepare_db.sh" )
last_block = 1
new_db = True
if os.path.exists( os.path.join(work_dir, "raw_reads.db") ):
with open( os.path.join(work_dir, "raw_reads.db") ) as f:
for l in f:
l = l.strip().split()
if l[0] == "blocks" and l[1] == "=":
last_block = int(l[2])
new_db = False
break
with open(script_fn,"w") as script_file:
script_file.write("source {install_prefix}/bin/activate\n".format(install_prefix = install_prefix))
script_file.write("cd {work_dir}\n".format(work_dir = work_dir))
script_file.write("hostname >> db_build.log\n")
script_file.write("date >> db_build.log\n")
script_file.write("for f in `cat {input_fofn_fn}`; do fasta2DB raw_reads $f; done >> db_build.log \n".format(input_fofn_fn =
input_fofn_fn))
if new_db == True:
script_file.write("DBsplit %s raw_reads\n" % pa_DBsplit_option)
if openending == True:
script_file.write("""LB=$(cat raw_reads.db | awk '$1 == "blocks" {print $3-1}')\n""")
else:
script_file.write("""LB=$(cat raw_reads.db | awk '$1 == "blocks" {print $3}')\n""")
script_file.write("HPCdaligner %s -H%d raw_reads %d-$LB > run_jobs.sh\n" % (pa_HPCdaligner_option, length_cutoff, last_block)
)
script_file.write("touch {rdb_build_done}\n".format(rdb_build_done = fn(rdb_build_done)))
job_name = self.URL.split("/")[-1]
job_name += "-"+str(uuid.uuid4())[:8]
job_data = {"job_name": job_name,
"cwd": os.getcwd(),
"sge_option": sge_option_da,
"script_fn": script_fn }
run_script(job_data, job_type = config["job_type"])
wait_for_file( fn(rdb_build_done), task=self, job_name=job_name )
开发者ID:webste01,项目名称:EMMC-Assembler,代码行数:56,代码来源:fc_run_local_fixed.py
示例19: task_run_quiver
def task_run_quiver(self):
ref_fasta = fn(self.ref_fasta)
read_sam = fn(self.read_sam)
cns_fasta = fn(self.cns_fasta)
cns_fastq = fn(self.cns_fastq)
job_done = fn(self.job_done)
job_uid = self.parameters["job_uid"]
wd = self.parameters["wd"]
config = self.parameters["config"]
ctg_id = self.parameters["ctg_id"]
smrt_bin = config["smrt_bin"]
sge_quiver = config["sge_quiver"]
job_type = config["job_type"]
samtools = os.path.join( smrt_bin, "samtools")
pbalign = os.path.join( smrt_bin, "pbalign")
makePbi = os.path.join( smrt_bin, "makePbi")
variantCaller = os.path.join( smrt_bin, "variantCaller")
script_dir = os.path.join( wd )
script_fn = os.path.join( script_dir , "cns_%s.sh" % (ctg_id))
script = []
script.append( "set -vex" )
script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) )
script.append( "cd %s" % wd )
script.append( "hostname" )
script.append( "date" )
script.append( "cd {wd}".format(wd = wd) )
script.append( "{samtools} faidx {ref_fasta}".format( samtools=samtools, ref_fasta=ref_fasta ) )
script.append( "{samtools} view -b -S {read_sam} > {ctg_id}.bam".format( samtools=samtools, read_sam = read_sam, ctg_id = ctg_id ) )
script.append( "{pbalign} --tmpDir=/localdisk/scratch/ --nproc=24 --minAccuracy=0.75 --minLength=50\
--minAnchorSize=12 --maxDivergence=30 --concordant --algorithm=blasr\
--algorithmOptions=-useQuality --maxHits=1 --hitPolicy=random --seed=1\
{ctg_id}.bam {ref_fasta} aln-{ctg_id}.bam".format( pbalign=pbalign , ctg_id = ctg_id, ref_fasta = ref_fasta))
script.append( "#{makePbi} --referenceFasta {ref_fasta} aln-{ctg_id}.bam".format(makePbi = makePbi, ref_fasta = ref_fasta, ctg_id = ctg_id) )
script.append( "({variantCaller} -x 5 -X 120 -q 20 -j 24 -r {ref_fasta} aln-{ctg_id}.bam\
-o {cns_fasta} -o {cns_fastq}) || echo quvier failed".format( variantCaller = variantCaller, ctg_id = ctg_id, ref_fasta = ref_fasta,
cns_fasta=cns_fasta, cns_fastq=cns_fastq ))
script.append( "date" )
script.append( "touch {job_done}".format(job_done = job_done) )
with open(script_fn,"w") as script_file:
script_file.write("\n".join(script) + '\n')
job_data = support.make_job_data(self.URL, script_fn)
job_data["sge_option"] = sge_quiver
run_script(job_data, job_type = job_type)
wait_for_file(job_done, task=self, job_name=job_data['job_name'])
开发者ID:pb-jchin,项目名称:FALCON_unzip,代码行数:54,代码来源:fc_quiver.py
示例20: prepare_seed_reads
def prepare_seed_reads(self):
config = self.config
length_cutoff = config["length_cutoff"]
f = pbcore.io.FastaReader(fn(self.normalized_fasta))
with open(fn(self.seed_fasta),'w') as sfasta:
for r in f:
if len(r.sequence) > length_cutoff:
sfasta.write( ">%s\n" % r.name )
sfasta.write( "%s\n" % r.sequence.upper() )
开发者ID:lhon,项目名称:HBAR-DTK,代码行数:11,代码来源:HBAR_WF.py
注:本文中的pypeflow.data.fn函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论