本文整理汇总了Python中scilifelab.db.statusdb.ProjectSummaryConnection类的典型用法代码示例。如果您正苦于以下问题:Python ProjectSummaryConnection类的具体用法?Python ProjectSummaryConnection怎么用?Python ProjectSummaryConnection使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ProjectSummaryConnection类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_dbcon
def test_dbcon(self):
"""Test database connection and that we get expected values."""
s_con = SampleRunMetricsConnection(dbname="samples-test", username="u", password="p")
samples = [s_con.get_entry(x) for x in s_con.name_view]
samples_d = {x["name"]: x for x in samples}
self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924")
self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX")
self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics")
self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3")
self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA")
self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002")
fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="u", password="p")
flowcells = [fc_con.get_entry(x) for x in fc_con.name_view]
flowcells_d = {x["name"]: x for x in flowcells}
self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX")
self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX")
self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics")
p_con = ProjectSummaryConnection(dbname="projects-test", username="u", password="p")
projects = [p_con.get_entry(x) for x in p_con.name_view]
projects_d = {x["project_name"]: x for x in projects}
self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1)
self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2)
self.assertEqual(
set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])
)
self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome")
self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2)
self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"])
self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
开发者ID:pombredanne,项目名称:scilifelab,代码行数:29,代码来源:test_db.py
示例2: setUpModule
def setUpModule():
"""Create test databases in local server"""
if not has_couchdb:
return
server = couchdb.Server()
## Create databases
for x in DATABASES:
if not server.__contains__(x):
LOG.info("Creating database {}".format(x))
server.create(x)
## Create views for flowcells and samples
for dbname in DATABASES:
dblab = dbname.replace("-test", "")
db = server[dbname]
for k, v in VIEWS[dblab].items():
for title, view in v.items():
viewdef = ViewDefinition(k, title, view)
viewdef.sync(db)
## Create and upload project summary
with open(os.path.join(filedir, "data", "config", "project_summary.yaml")) as fh:
prj_sum = yaml.load(fh)
db = server["samples-test"]
p_con = ProjectSummaryConnection(dbname="projects-test", username="u", password="p")
for p in prj_sum:
prj = ProjectSummaryDocument(**p)
p_con.save(prj, key="project_name")
开发者ID:pombredanne,项目名称:scilifelab,代码行数:27,代码来源:test_db.py
示例3: test_2_make_note
def test_2_make_note(self):
"""Make a note subset by example flowcell and project"""
s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"])
project = p_con.get_entry(self.examples["project"])
samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True)
for k,v in samples.items():
s_param = parameters
s = s_con.get_entry(k)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s["date"], s["flowcell"])
s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"])
s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None
s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None)
if project:
s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"])
s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference'])
s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id'])
s_param['success'] = sequencing_success(s_param, cutoffs)
s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None})
make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
开发者ID:hussius,项目名称:scilifelab,代码行数:27,代码来源:test_sample_delivery_note.py
示例4: list_projects
def list_projects(self):
if not self._check_pargs(["flowcell"]):
return
url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
if not url:
self.app.log.warn("Please provide a valid url: got {}".format(url))
return
if not validate_fc_directory_format(self.pargs.flowcell):
self.app.log.warn(
"Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
)
return
out_data = [[self.pargs.flowcell]]
s = self.pargs.flowcell.split("_")
fcid = "_".join([s[0], s[-1]])
self.log.debug("Establishing FlowcellRunMetricsConnection")
fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
self.log.debug("Establishing ProjectSummaryConnection")
p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid))
fc = fc_con.get_entry(fcid)
if fc is None:
self.log.warn("No flowcell metric document for flowcell {}".format(fcid))
return
self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid))
ssheet_data = self._get_samplesheet_sample_data(fc)
if len(ssheet_data) == 0:
self.log.warn("No csv samplesheet data for flowcell {}".format(fcid))
return
self.log.debug("Fetch runParameter data for flowcell {}".format(fcid))
run_data = self._get_run_parameter_data(fc)
if len(run_data) == 0:
self.log.warn("No runParameter data for flowcell {}".format(fcid))
out_data = [
[self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")]
]
# Extract the project names
projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()])
# Extract application for each project
for project in projects:
self.log.debug("Fetching project data document for project {}".format(project))
pdoc = p_con.get_entry(project)
if pdoc is None:
self.log.warn("No project data document for project {}".format(project))
pdoc = {}
application = pdoc.get("application", "N/A")
out_data.append([project, application])
self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
开发者ID:vals,项目名称:scilifelab,代码行数:59,代码来源:ext_qc.py
示例5: bcbb_configuration_from_samplesheet
def bcbb_configuration_from_samplesheet(csv_samplesheet, couch_credentials):
"""Parse an illumina csv-samplesheet and return a dictionary suitable for the bcbb-pipeline
"""
tfh, yaml_file = tempfile.mkstemp('.yaml','samplesheet')
os.close(tfh)
yaml_file = bcbio.solexa.samplesheet.csv2yaml(csv_samplesheet,yaml_file)
with open(yaml_file) as fh:
config = yaml.load(fh)
application_setup = {
'Amplicon': {'analysis': 'Align_standard'},
'ChIP-seq': {'analysis': 'RNA-seq'},
'Custom capture': {'analysis': 'Align_standard_seqcap'},
'de novo': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Exome capture': {'analysis': 'Align_standard_seqcap'},
'Finished library': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Mate-pair': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'Metagenome': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'miRNA-seq': {'analysis': 'Align_standard',
'genome_build': 'unknown'},
'RNA-seq (mRNA)': {'analysis': 'RNA-seq'},
'RNA-seq (total RNA)': {'analysis': 'RNA-seq'},
'WG re-seq': {'analysis': 'Align_standard'},
'default': {'analysis': 'Align_standard'},
}
#Connect to maggie to get project application
try:
p_con = ProjectSummaryConnection(**couch_credentials)
except:
print "Can't connect to maggie to get application"
p_con = None
# Replace the default analysis
## TODO: This is an ugly hack, should be replaced by a custom config
for lane in config:
for plex in lane.get('multiplex',[]):
application='default'
if p_con is not None:
try:
Proj=plex.get('sample_prj','')
project = p_con.get_entry(Proj)
if project is not None:
application = project.get("application", 'default').strip()
except:
application='default'
setup = application_setup.get(application,application_setup['default'])
for key, val in setup.items():
plex[key] = val
# Remove the yaml file, we will write a new one later
os.remove(yaml_file)
return config
开发者ID:percyfal,项目名称:scilifelab,代码行数:58,代码来源:run_bcbb_pipeline.py
示例6: update
def update(self):
if not self._check_pargs(["sample_prj"]):
return
url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
if not url:
self.app.log.warn("Please provide a valid url: got {}".format(url))
return
s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)
if self.pargs.project_id:
self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
for s in samples:
if not s.get("project_id", None) is None:
if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
continue
s["project_id"] = self.pargs.project_id
s_con.save(s)
if self.pargs.names:
self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
if os.path.exists(self.pargs.names):
with open(self.pargs.names) as fh:
names_d = json.load(fh)
else:
names_d= ast.literal_eval(self.pargs.names)
samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
groups = {}
for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
groups[k] = list(g)
for barcode_name in names_d:
sample_list = groups.get(barcode_name, None)
if not sample_list:
continue
for s in sample_list:
if not s.get("project_sample_name", None) is None:
if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
continue
s["project_sample_name"] = names_d[barcode_name]
s_con.save(s)
else:
self.app.log.info("Trying to use extensive matching...")
p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
project_name = self.pargs.sample_prj
if self.pargs.project_alias:
project_name = self.pargs.project_alias
for s in samples:
project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
if project_sample:
self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
s["project_sample_name"] = project_sample["sample_name"]
s_con.save(s)
开发者ID:emmser,项目名称:scilifelab,代码行数:52,代码来源:ext_qc.py
示例7: __init__
def __init__(self, config):
super(GDocsUpdater, self).__init__(config)
# Connect to the Google Docs api
gdconf = self.config.get("gdocs",{})
creds = os.path.expanduser(gdconf.get("credentials_file",""))
assert os.path.exists(creds), "Supplied GDocs credentials file does not exist"
self.gdcon = SpreadSheet(get_credentials(creds))
assert self.gdcon, "Could not get a SpreadSheet object, please verify gdocs credentials"
doc = gdconf.get("qc_checklist",None)
assert doc, "No QC checklist specified in configuration, please specify"
ssheet = self.gdcon.get_spreadsheet(doc)
assert ssheet, "Could not locate QC checklist '{}' on Google Docs. Please make sure it exists".format(doc)
self.gdcon.ssheet = ssheet
# Get the Ongoing, Finished and Coming worksheets
self.ongoing = self.gdcon.get_worksheet("Ongoing")
self.coming = self.gdcon.get_worksheet("Coming")
self.finished = self.gdcon.get_worksheet("Finished")
assert self.ongoing and self.coming and self.finished, "Could not get 'Ongoing', 'Finished' and 'Coming' worksheets from '{}'. Please make sure that they exist".format(doc)
# Get a connection to the StatusDB project database
dbconf = self.config.get("statusdb",{})
try:
self.pcon = ProjectSummaryConnection(url=dbconf.get("url","localhost"),
username=dbconf.get("user","user"),
password=dbconf.get("password","pass"))
except ConnectionError:
self.pcon = None
开发者ID:mayabrandi,项目名称:hugin,代码行数:29,代码来源:gdocs_updater.py
示例8: setUp
def setUp(self):
self.user = "user"
self.pw = "pw"
self.url = "localhost"
self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
self.p_con = ProjectSummaryConnection(
dbname="projects-test", username=self.user, password=self.pw, url=self.url
)
开发者ID:pombredanne,项目名称:scilifelab,代码行数:8,代码来源:test_db.py
示例9: upload_qc
def upload_qc(self):
if not self._check_pargs(["flowcell"]):
return
url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
if not url:
self.app.log.warn("Please provide a valid url: got {}".format(url))
return
if not validate_fc_directory_format(self.pargs.flowcell):
self.app.log.warn(
"Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
)
return
runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell)))
runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
(fc_date, fc_name) = fc_parts(self.pargs.flowcell)
if int(fc_date) < 120815:
self.log.info("Assuming pre-casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
qc_objects = self._collect_pre_casava_qc()
else:
self.log.info("Assuming casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
qc_objects = self._collect_casava_qc()
if len(qc_objects) == 0:
self.log.info("No out-of-date qc objects for {}".format(fc_id(self.pargs.flowcell)))
return
else:
self.log.info("Retrieved {} updated qc objects".format(len(qc_objects)))
s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
for obj in qc_objects:
if self.app.pargs.debug:
self.log.debug("{}: {}".format(str(obj), obj["_id"]))
if isinstance(obj, FlowcellRunMetricsDocument):
dry("Saving object {}".format(repr(obj)), fc_con.save(obj))
if isinstance(obj, SampleRunMetricsDocument):
project_sample = p_con.get_project_sample(
obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching
)
if project_sample:
obj["project_sample_name"] = project_sample["sample_name"]
dry("Saving object {}".format(repr(obj)), s_con.save(obj))
开发者ID:vals,项目名称:scilifelab,代码行数:44,代码来源:ext_qc.py
示例10: test_2_make_project_note
def test_2_make_project_note(self):
"""Make a project note subset by flowcell and project"""
s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
paragraphs = project_note_paragraphs()
headers = project_note_headers()
param = parameters
project = p_con.get_entry(self.examples["project"])
if not project:
print "No project named {}".format(self.examples["project"])
return
if project:
ordered_amount = p_con.get_ordered_amount(self.examples["project"])
else:
return
ordered_amount = self.pargs.ordered_million_reads
## Start collecting the data
sample_table = []
sample_list = project['samples']
param.update({key:project.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
samples = p_con.map_name_to_srm(self.examples["project"], check_consistency=True, use_bc_map=True)
all_passed = True
for k,v in samples.items():
if k=="Unexpected":
continue
project_sample = sample_list[k]
vals = {x:project_sample.get(prjs_to_table[x], None) for x in prjs_to_table.keys()}
vals['MOrdered'] = ordered_amount
vals['BarcodeSeq'] = s_con.get_entry(v.keys()[0], "sequence")
## Set status
vals['Status'] = set_status(vals) if vals['Status'] is None else vals['Status']
vals.update({k:"N/A" for k in vals.keys() if vals[k] is None})
if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
sample_table.append([vals[k] for k in table_keys])
if all_passed: param["finished"] = 'Project finished.'
sample_table.sort()
sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'])
paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
make_note("{}.pdf".format(self.examples["project"]), headers, paragraphs, **param)
开发者ID:hussius,项目名称:scilifelab,代码行数:43,代码来源:test_project_status_note.py
示例11: application_qc
def application_qc(project_name=None, flowcell=None, application=None,
username=None, password=None, url=None,
sampledb="samples", projectdb="projects", **kw):
"""Perform application specific qc on a project.
:param project_name: project name
:param flowcell: flowcell identifier
:param application: application for which to perform qc
:param username: database username
:param password: database password
:param url: database url
:param sampledb: samples database name
:param projectdb: project database name
"""
LOG.debug("Doing application qc for project {}, flowcell {}".format(project_name, flowcell))
output_data = {'stdout':StringIO(), 'stderr':StringIO()}
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
s_con = SampleRunMetricsConnection(dbname=sampledb, username=username, password=password, url=url)
prj_summary = p_con.get_entry(project_name)
qc_data = get_qc_data(project_name, p_con, s_con, flowcell)
if not prj_summary is None:
qc_data = get_qc_data(project_name, p_con, s_con, flowcell)
if prj_summary.get("application") not in APPLICATION_MAP.keys():
if not application:
LOG.warn("No such application {}. Please use the application option (available choices {})".format(application, ",".join(QC_CUTOFF.keys())))
return output_data
application = application
else:
application = APPLICATION_MAP[prj_summary.get("application")]
else:
LOG.info("No such project {} in project summary. Trying to get qc data anyway.".format(project_name))
if not application:
LOG.warn("No application provided. Please use the application option (available choices {})".format(",".join(QC_CUTOFF.keys())))
return output_data
qc_data = _get_sample_qc_data(project_name, application, s_con, flowcell)
output_data = _qc_info_header(project_name, application, output_data)
for k,v in sorted(qc_data.iteritems()):
y = [str(x) for x in assess_qc(v, application)]
output_data["stdout"].write("".join(y) + "\n")
return output_data
开发者ID:Galithil,项目名称:scilifelab,代码行数:43,代码来源:qc.py
示例12: test_3_sample_map
def test_3_sample_map(self):
"""Test getting a sample mapping"""
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
sample_map = p_con.map_name_to_srm(self.examples["project"], use_ps_map=False, check_consistency=True)
print sample_map
开发者ID:hussius,项目名称:scilifelab,代码行数:5,代码来源:test_project_status_note.py
示例13: test_4_srm_map
def test_4_srm_map(self):
"""Test getting a sample mapping from srm to project samples"""
p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_ps_map=False, check_consistency=True)
print samples
开发者ID:hussius,项目名称:scilifelab,代码行数:5,代码来源:test_project_status_note.py
示例14: raw_data
def raw_data(self):
if not self._check_pargs(["project"]):
return
# if necessary, reformat flowcell identifier
if self.pargs.flowcell:
self.pargs.flowcell = self.pargs.flowcell.split("_")[-1]
# get the uid and gid to use for destination files
uid = os.getuid()
gid = os.getgid()
if self.pargs.group is not None and len(self.pargs.group) > 0:
gid = grp.getgrnam(group).gr_gid
self.log.debug("Connecting to project database")
p_con = ProjectSummaryConnection(**vars(self.pargs))
assert p_con, "Could not get connection to project databse"
self.log.debug("Connecting to samples database")
s_con = SampleRunMetricsConnection(**vars(self.pargs))
assert s_con, "Could not get connection to samples databse"
# Fetch the Uppnex project to deliver to
if not self.pargs.uppmax_project:
self.pargs.uppmax_project = p_con.get_entry(self.pargs.project, "uppnex_id")
if not self.pargs.uppmax_project:
self.log.error("Uppmax project was not specified and could not be fetched from project database")
return
# Extract the list of samples and runs associated with the project and sort them
samples = sorted(s_con.get_samples(fc_id=self.pargs.flowcell, sample_prj=self.pargs.project), key=lambda k: (k.get('project_sample_name','NA'), k.get('flowcell','NA'), k.get('lane','NA')))
# Setup paths and verify parameters
self._meta.production_root = self.app.config.get("production", "root")
self._meta.root_path = self._meta.production_root
proj_base_dir = os.path.join(self._meta.root_path, self.pargs.project)
assert os.path.exists(self._meta.production_root), "No such directory {}; check your production config".format(self._meta.production_root)
assert os.path.exists(proj_base_dir), "No project {} in production path {}".format(self.pargs.project,self._meta.root_path)
try:
self._meta.uppnex_project_root = self.app.config.get("deliver", "uppnex_project_root")
except Exception as e:
self.log.warn("{}, will use '/proj' as uppnext_project_root".format(e))
self._meta.uppnex_project_root = '/proj'
try:
self._meta.uppnex_delivery_dir = self.app.config.get("deliver", "uppnex_project_delivery_path")
except Exception as e:
self.log.warn("{}, will use 'INBOX' as uppnext_project_delivery_path".format(e))
self._meta.uppnex_delivery_dir = 'INBOX'
destination_root = os.path.join(self._meta.uppnex_project_root,self.pargs.uppmax_project,self._meta.uppnex_delivery_dir)
assert os.path.exists(destination_root), "Delivery destination folder {} does not exist".format(destination_root)
destination_root = os.path.join(destination_root,self.pargs.project)
# If interactively select, build a list of samples to skip
if self.pargs.interactive:
to_process = []
for sample in samples:
sname = sample.get("project_sample_name")
index = sample.get("sequence")
fcid = sample.get("flowcell")
lane = sample.get("lane")
date = sample.get("date")
self.log.info("Sample: {}, Barcode: {}, Flowcell: {}, Lane: {}, Started on: {}".format(sname,
index,
fcid,
lane,
date))
if query_yes_no("Deliver sample?", default="no"):
to_process.append(sample)
samples = to_process
# Find uncompressed fastq
uncompressed = self._find_uncompressed_fastq_files(proj_base_dir,samples)
if len(uncompressed) > 0:
self.log.warn("The following samples have uncompressed *.fastq files that cannot be delivered: {}".format(",".join(uncompressed)))
if not query_yes_no("Continue anyway?", default="no"):
return
self.log.info("Will deliver data for {} samples from project {} to {}".format(len(samples),self.pargs.project,destination_root))
if not query_yes_no("Continue?"):
return
# Get the list of files to transfer and the destination
self.log.debug("Gathering list of files to copy")
to_copy = self.get_file_copy_list(proj_base_dir,
destination_root,
samples)
# Make sure that transfer will be with rsync
if not self.pargs.rsync:
self.log.warn("Files must be transferred using rsync")
if not query_yes_no("Do you wish to continue delivering using rsync?", default="yes"):
return
self.pargs.rsync = True
# Process each sample run
for id, files in to_copy.items():
# get the sample database object
[sample] = [s for s in samples if s.get('_id') == id]
#.........这里部分代码省略.........
开发者ID:guillermo-carrasco,项目名称:scilifelab,代码行数:101,代码来源:deliver.py
示例15: TestDbConnection
class TestDbConnection(unittest.TestCase):
def setUp(self):
self.user = "user"
self.pw = "pw"
self.url = "localhost"
self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
self.p_con = ProjectSummaryConnection(
dbname="projects-test", username=self.user, password=self.pw, url=self.url
)
def test_connection(self):
"""Test database connection"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url))
def test_get_flowcell(self):
"""Test getting a flowcell for a given sample"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
fc = sample_con.get_entry(self.examples["sample"], "flowcell")
self.assertEqual(str(fc), self.examples["flowcell"])
def test_get_sample_ids(self):
"""Test getting sample ids given flowcell and sample_prj"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
LOG.info("Number of samples before subsetting: " + str(len(sample_ids)))
self.assertEqual(len(sample_ids), 4)
sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
LOG.info("Number of samples after subsetting: " + str(len(sample_ids)))
self.assertEqual(len(sample_ids), 2)
def test_get_samples(self):
"""Test getting samples given flowcell and sample_prj."""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
LOG.info("Selecting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 4)
samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
self.assertEqual(len(samples), 2)
samples = sample_con.get_samples(sample_prj=self.examples["project"])
LOG.info("Selecting on project: " + str(len(samples)))
self.assertEqual(len(samples), 3)
samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 2)
def test_get_samples_wrong_info(self):
"""Test getting samples when either flowcell or project id information is wrong"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
self.assertEqual(len(samples), 0)
def test_get_project_sample_ids(self):
"""Test getting project sample ids"""
sample_con = SampleRunMetricsConnection(
dbname="samples-test", username=self.user, password=self.pw, url=self.url
)
sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"])
sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
self.assertEqual(
set(sample_names),
set(["1_120924_AC003CCCXX_TGACCA", "2_120924_AC003CCCXX_ACAGTG", "1_121015_BB002BBBXX_TGACCA"]),
)
def test_get_latest_library_prep(self):
"""Test getting latest library prep"""
prj = self.p_con.get_entry("J.Doe_00_01")
prj["samples"]["P001_102"]["library_prep"]["B"] = {"sample_run_metrics": {"2_120924_AC003CCCXX_TTGGAA": None}}
self.p_con.save(prj)
preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"])
srm = [x for l in preps.values() for x in l]
# Make sure A prep not in list
self.assertNotIn("2_120924_AC003CCCXX_ACAGTG", srm)
# Make sure B prep in list
self.assertIn("2_120924_AC003CCCXX_TTGGAA", srm)
# Reset data
prj = self.p_con.get_entry("J.Doe_00_01")
del prj["samples"]["P001_102"]["library_prep"]["B"]
self.p_con.save(prj)
开发者ID:pombredanne,项目名称:scilifelab,代码行数:94,代码来源:test_db.py
示例16: sample_status_note
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
phix=None, **kw):
"""Make a sample status note. Used keywords:
:param project_name: project name
:param flowcell: flowcell id
:param username: db username
:param password: db password
:param url: db url
:param ordered_million_reads: number of ordered reads in millions
:param uppnex_id: the uppnex id
:param customer_reference: customer project name
:param project_alias: project alias name
:param phix: phix error rate
"""
# Cutoffs
cutoffs = {
"phix_err_cutoff" : 2.0,
"qv_cutoff" : 30,
}
# parameters
parameters = {
"project_name" : None,
"start_date" : None,
"FC_id" : None,
"scilifelab_name" : None,
"rounded_read_count" : None,
"phix_error_rate" : None,
"avg_quality_score" : None,
"success" : None,
"run_mode":None,
}
# key mapping from sample_run_metrics to parameter keys
srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell",
"scilifelab_name":"barcode_name", "start_date":"date", "rounded_read_count":"bc_count"}
LOG.debug("got parameters {}".format(parameters))
output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
if not _assert_flowcell_format(flowcell):
LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9]+XX\")".format(flowcell) )
return output_data
output_data = _update_sample_output_data(output_data, cutoffs)
# Connect and run
s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
# Set up paragraphs
paragraphs = sample_note_paragraphs()
headers = sample_note_headers()
# Get project
project = p_con.get_entry(project_name)
if not project:
LOG.warn("No such project '{}'".format(project_name))
return output_data
# Set samples list
sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
if len(sample_run_list) == 0:
LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
return output_data
# Set options
ordered_million_reads = _literal_eval_option(ordered_million_reads)
bc_count = _literal_eval_option(bc_count)
phix = _literal_eval_option(phix)
# Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
sample_count = Counter([x.get("barcode_name") for x in sample_run_list])
# Loop samples and collect information
s_param_out = []
for s in sample_run_list:
s_param = {}
LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
s_param.update(parameters)
s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
# Get instrument
try:
s_param.update(instrument[fc_con.get_instrument(str(fc))])
except:
LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
s_param.update(instrument['default'])
# Get run mode
s_param["run_mode"] = fc_con.get_run_mode(str(fc))
s_param.update(software_versions)
s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
if phix:
s_param["phix_error_rate"] = _get_phix_error
|
请发表评论