• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python statusdb.ProjectSummaryConnection类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scilifelab.db.statusdb.ProjectSummaryConnection的典型用法代码示例。如果您正苦于以下问题:Python ProjectSummaryConnection类的具体用法?Python ProjectSummaryConnection怎么用?Python ProjectSummaryConnection使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了ProjectSummaryConnection类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_dbcon

 def test_dbcon(self):
     """Test database connection and that we get expected values."""
     s_con = SampleRunMetricsConnection(dbname="samples-test", username="u", password="p")
     samples = [s_con.get_entry(x) for x in s_con.name_view]
     samples_d = {x["name"]: x for x in samples}
     self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924")
     self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX")
     self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics")
     self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3")
     self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA")
     self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002")
     fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="u", password="p")
     flowcells = [fc_con.get_entry(x) for x in fc_con.name_view]
     flowcells_d = {x["name"]: x for x in flowcells}
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX")
     self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX")
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics")
     p_con = ProjectSummaryConnection(dbname="projects-test", username="u", password="p")
     projects = [p_con.get_entry(x) for x in p_con.name_view]
     projects_d = {x["project_name"]: x for x in projects}
     self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1)
     self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2)
     self.assertEqual(
         set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])
     )
     self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome")
     self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2)
     self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"])
     self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
开发者ID:pombredanne,项目名称:scilifelab,代码行数:29,代码来源:test_db.py


示例2: setUpModule

def setUpModule():
    """Create test databases in local server"""
    if not has_couchdb:
        return
    server = couchdb.Server()
    ## Create databases
    for x in DATABASES:
        if not server.__contains__(x):
            LOG.info("Creating database {}".format(x))
            server.create(x)
    ## Create views for flowcells and samples
    for dbname in DATABASES:
        dblab = dbname.replace("-test", "")
        db = server[dbname]
        for k, v in VIEWS[dblab].items():
            for title, view in v.items():
                viewdef = ViewDefinition(k, title, view)
                viewdef.sync(db)

    ## Create and upload project summary
    with open(os.path.join(filedir, "data", "config", "project_summary.yaml")) as fh:
        prj_sum = yaml.load(fh)
    db = server["samples-test"]
    p_con = ProjectSummaryConnection(dbname="projects-test", username="u", password="p")
    for p in prj_sum:
        prj = ProjectSummaryDocument(**p)
        p_con.save(prj, key="project_name")
开发者ID:pombredanne,项目名称:scilifelab,代码行数:27,代码来源:test_db.py


示例3: test_2_make_note

    def test_2_make_note(self):
        """Make a note subset by example flowcell and project"""
        s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
        fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
        p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
        paragraphs = sample_note_paragraphs()
        headers = sample_note_headers()
        samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"])
        project = p_con.get_entry(self.examples["project"])
        samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True)
        for k,v  in samples.items():
            s_param = parameters
            s = s_con.get_entry(k)
            s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
            fc = "{}_{}".format(s["date"], s["flowcell"])
            s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
            s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"])
            s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None
            s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None)

            if project:
                s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"])
                s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference'])
                s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id'])
            s_param['success'] = sequencing_success(s_param, cutoffs)
            s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None})
            make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
开发者ID:hussius,项目名称:scilifelab,代码行数:27,代码来源:test_sample_delivery_note.py


示例4: list_projects

    def list_projects(self):
        if not self._check_pargs(["flowcell"]):
            return

        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return
        if not validate_fc_directory_format(self.pargs.flowcell):
            self.app.log.warn(
                "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
            )
            return

        out_data = [[self.pargs.flowcell]]
        s = self.pargs.flowcell.split("_")
        fcid = "_".join([s[0], s[-1]])

        self.log.debug("Establishing FlowcellRunMetricsConnection")
        fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
        self.log.debug("Establishing ProjectSummaryConnection")
        p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))

        self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid))
        fc = fc_con.get_entry(fcid)
        if fc is None:
            self.log.warn("No flowcell metric document for flowcell {}".format(fcid))
            return

        self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid))
        ssheet_data = self._get_samplesheet_sample_data(fc)
        if len(ssheet_data) == 0:
            self.log.warn("No csv samplesheet data for flowcell {}".format(fcid))
            return

        self.log.debug("Fetch runParameter data for flowcell {}".format(fcid))
        run_data = self._get_run_parameter_data(fc)
        if len(run_data) == 0:
            self.log.warn("No runParameter data for flowcell {}".format(fcid))

        out_data = [
            [self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")]
        ]

        # Extract the project names
        projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()])

        # Extract application for each project
        for project in projects:
            self.log.debug("Fetching project data document for project {}".format(project))
            pdoc = p_con.get_entry(project)
            if pdoc is None:
                self.log.warn("No project data document for project {}".format(project))
                pdoc = {}

            application = pdoc.get("application", "N/A")
            out_data.append([project, application])

        self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
开发者ID:vals,项目名称:scilifelab,代码行数:59,代码来源:ext_qc.py


示例5: bcbb_configuration_from_samplesheet

def bcbb_configuration_from_samplesheet(csv_samplesheet, couch_credentials):
    """Parse an illumina csv-samplesheet and return a dictionary suitable for the bcbb-pipeline
    """
    tfh, yaml_file = tempfile.mkstemp('.yaml','samplesheet')
    os.close(tfh)
    yaml_file = bcbio.solexa.samplesheet.csv2yaml(csv_samplesheet,yaml_file)
    with open(yaml_file) as fh:
        config = yaml.load(fh)

    application_setup = {
                         'Amplicon': {'analysis': 'Align_standard'},
                         'ChIP-seq': {'analysis': 'RNA-seq'},
                         'Custom capture': {'analysis': 'Align_standard_seqcap'},
                         'de novo': {'analysis': 'Align_standard',
                                     'genome_build': 'unknown'},
                         'Exome capture': {'analysis': 'Align_standard_seqcap'},
                         'Finished library': {'analysis': 'Align_standard',
                                              'genome_build': 'unknown'},
                         'Mate-pair': {'analysis': 'Align_standard',
                                       'genome_build': 'unknown'},
                         'Metagenome': {'analysis': 'Align_standard',
                                        'genome_build': 'unknown'},
                         'miRNA-seq': {'analysis': 'Align_standard',
                                       'genome_build': 'unknown'},
                         'RNA-seq (mRNA)': {'analysis': 'RNA-seq'},
                         'RNA-seq (total RNA)': {'analysis': 'RNA-seq'},
                         'WG re-seq': {'analysis': 'Align_standard'},
                         'default': {'analysis': 'Align_standard'},
                         }

    #Connect to maggie to get project application 
    try:
        p_con = ProjectSummaryConnection(**couch_credentials)
    except:
        print "Can't connect to maggie to get application"
        p_con = None
  
  # Replace the default analysis
    ## TODO: This is an ugly hack, should be replaced by a custom config 
    for lane in config:
        for plex in lane.get('multiplex',[]):
            application='default'
            if p_con is not None:
                try:
                    Proj=plex.get('sample_prj','')
                    project = p_con.get_entry(Proj)
                    if project is not None:
                        application = project.get("application", 'default').strip()
                except:
                    application='default'
            setup = application_setup.get(application,application_setup['default'])
            for key, val in setup.items():
                plex[key] = val
            
    # Remove the yaml file, we will write a new one later
    os.remove(yaml_file)
    
    return config
开发者ID:percyfal,项目名称:scilifelab,代码行数:58,代码来源:run_bcbb_pipeline.py


示例6: update

    def update(self):
        if not self._check_pargs(["sample_prj"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        samples = s_con.get_samples(sample_prj=self.pargs.sample_prj)

        if self.pargs.project_id:
            self.app.log.debug("Going to update 'project_id' to {} for sample runs with 'sample_prj' == {}".format(self.pargs.project_id, self.pargs.sample_prj))
            for s in samples:
                if not s.get("project_id", None) is None:
                    if not query_yes_no("'project_id':{} for sample {}; are you sure you want to overwrite?".format(s["project_id"], s["name"]), force=self.pargs.force):
                        continue
                s["project_id"] = self.pargs.project_id
                s_con.save(s)
        if self.pargs.names:
            self.app.log.debug("Going to update 'project_sample_name' for sample runs with 'sample_prj' == {}".format(self.pargs.sample_prj))
            if os.path.exists(self.pargs.names):
                with open(self.pargs.names) as fh:
                    names_d = json.load(fh)
            else:
                names_d= ast.literal_eval(self.pargs.names)
            samples_sort = sorted(samples, key=lambda s:s["barcode_name"])
            groups = {}
            for k, g in itertools.groupby(samples_sort, key=lambda x:x["barcode_name"]):
                groups[k] = list(g)
            for barcode_name in names_d:
                sample_list = groups.get(barcode_name, None)
                if not sample_list:
                    continue
                for s in sample_list:
                    if not s.get("project_sample_name", None) is None:
                        if not query_yes_no("'project_sample_name':{} for sample {}; are you sure you want to overwrite?".format(s["project_sample_name"], s["name"]), force=self.pargs.force):
                            continue
                    s["project_sample_name"] = names_d[barcode_name]
                    s_con.save(s)
        else:
            self.app.log.info("Trying to use extensive matching...")
            p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
            project_name = self.pargs.sample_prj
            if self.pargs.project_alias:
                project_name = self.pargs.project_alias
            for s in samples:
                project_sample = p_con.get_project_sample(project_name, s["barcode_name"], extensive_matching=True)
                if project_sample:
                    self.app.log.info("using mapping '{} : {}'...".format(s["barcode_name"], project_sample["sample_name"]))
                    s["project_sample_name"] = project_sample["sample_name"]
                    s_con.save(s)
开发者ID:emmser,项目名称:scilifelab,代码行数:52,代码来源:ext_qc.py


示例7: __init__

 def __init__(self, config):
     super(GDocsUpdater, self).__init__(config)
         
     # Connect to the Google Docs api
     gdconf = self.config.get("gdocs",{})
     creds = os.path.expanduser(gdconf.get("credentials_file",""))
     assert os.path.exists(creds), "Supplied GDocs credentials file does not exist"
     self.gdcon = SpreadSheet(get_credentials(creds))
     assert self.gdcon, "Could not get a SpreadSheet object, please verify gdocs credentials"
     doc = gdconf.get("qc_checklist",None)
     assert doc, "No QC checklist specified in configuration, please specify"
     ssheet = self.gdcon.get_spreadsheet(doc)
     assert ssheet, "Could not locate QC checklist '{}' on Google Docs. Please make sure it exists".format(doc)
     self.gdcon.ssheet = ssheet
     
     # Get the Ongoing, Finished and Coming worksheets
     self.ongoing = self.gdcon.get_worksheet("Ongoing")
     self.coming = self.gdcon.get_worksheet("Coming")
     self.finished = self.gdcon.get_worksheet("Finished")
     assert self.ongoing and self.coming and self.finished, "Could not get 'Ongoing', 'Finished' and 'Coming' worksheets from '{}'. Please make sure that they exist".format(doc)
     
     # Get a connection to the StatusDB project database
     dbconf = self.config.get("statusdb",{})
     try:
         self.pcon = ProjectSummaryConnection(url=dbconf.get("url","localhost"), 
                                              username=dbconf.get("user","user"), 
                                              password=dbconf.get("password","pass"))
     except ConnectionError:
         self.pcon = None
开发者ID:mayabrandi,项目名称:hugin,代码行数:29,代码来源:gdocs_updater.py


示例8: setUp

 def setUp(self):
     self.user = "user"
     self.pw = "pw"
     self.url = "localhost"
     self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
     self.p_con = ProjectSummaryConnection(
         dbname="projects-test", username=self.user, password=self.pw, url=self.url
     )
开发者ID:pombredanne,项目名称:scilifelab,代码行数:8,代码来源:test_db.py


示例9: upload_qc

    def upload_qc(self):
        if not self._check_pargs(["flowcell"]):
            return
        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return
        if not validate_fc_directory_format(self.pargs.flowcell):
            self.app.log.warn(
                "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
            )
            return

        runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell)))
        runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
        (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
        if int(fc_date) < 120815:
            self.log.info("Assuming pre-casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_pre_casava_qc()
        else:
            self.log.info("Assuming casava based file structure for {}".format(fc_id(self.pargs.flowcell)))
            qc_objects = self._collect_casava_qc()

        if len(qc_objects) == 0:
            self.log.info("No out-of-date qc objects for {}".format(fc_id(self.pargs.flowcell)))
            return
        else:
            self.log.info("Retrieved {} updated qc objects".format(len(qc_objects)))

        s_con = SampleRunMetricsConnection(dbname=self.app.config.get("db", "samples"), **vars(self.app.pargs))
        fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
        p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
        for obj in qc_objects:
            if self.app.pargs.debug:
                self.log.debug("{}: {}".format(str(obj), obj["_id"]))
            if isinstance(obj, FlowcellRunMetricsDocument):
                dry("Saving object {}".format(repr(obj)), fc_con.save(obj))
            if isinstance(obj, SampleRunMetricsDocument):
                project_sample = p_con.get_project_sample(
                    obj.get("sample_prj", None), obj.get("barcode_name", None), self.pargs.extensive_matching
                )
                if project_sample:
                    obj["project_sample_name"] = project_sample["sample_name"]
                dry("Saving object {}".format(repr(obj)), s_con.save(obj))
开发者ID:vals,项目名称:scilifelab,代码行数:44,代码来源:ext_qc.py


示例10: test_2_make_project_note

 def test_2_make_project_note(self):
     """Make a project note subset by flowcell and project"""
     s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
     fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
     p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
     paragraphs = project_note_paragraphs()
     headers = project_note_headers()
     param = parameters
     project = p_con.get_entry(self.examples["project"])
     if not project:
         print "No project named {}".format(self.examples["project"])
         return
     if project:
         ordered_amount = p_con.get_ordered_amount(self.examples["project"])
     else:
         return
         ordered_amount = self.pargs.ordered_million_reads
         
     ## Start collecting the data
     sample_table = []
     sample_list = project['samples']
     param.update({key:project.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
     samples = p_con.map_name_to_srm(self.examples["project"], check_consistency=True, use_bc_map=True)
     all_passed = True
     for k,v in samples.items():
         if k=="Unexpected":
             continue
         project_sample = sample_list[k]
         vals = {x:project_sample.get(prjs_to_table[x], None) for x in prjs_to_table.keys()}
         vals['MOrdered'] = ordered_amount
         vals['BarcodeSeq'] = s_con.get_entry(v.keys()[0], "sequence")
         
         ## Set status
         vals['Status'] = set_status(vals) if vals['Status'] is None else vals['Status']
         vals.update({k:"N/A" for k in vals.keys() if vals[k] is None})
         if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
         sample_table.append([vals[k] for k in table_keys])
     if all_passed: param["finished"] = 'Project finished.'
     sample_table.sort()
     sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
     sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'])
     paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
     make_note("{}.pdf".format(self.examples["project"]), headers, paragraphs, **param)
开发者ID:hussius,项目名称:scilifelab,代码行数:43,代码来源:test_project_status_note.py


示例11: application_qc

def application_qc(project_name=None, flowcell=None, application=None,
                   username=None, password=None, url=None,
                   sampledb="samples", projectdb="projects", **kw):
    """Perform application specific qc on a project.

    :param project_name: project name
    :param flowcell: flowcell identifier
    :param application: application for which to perform qc
    :param username: database username
    :param password: database password
    :param url: database url
    :param sampledb: samples database name
    :param projectdb: project database name
    """
    LOG.debug("Doing application qc for project {}, flowcell {}".format(project_name, flowcell))

    output_data = {'stdout':StringIO(), 'stderr':StringIO()}
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)
    s_con = SampleRunMetricsConnection(dbname=sampledb, username=username, password=password, url=url)
    prj_summary = p_con.get_entry(project_name)
    qc_data = get_qc_data(project_name, p_con, s_con, flowcell)

    if not prj_summary is None:
        qc_data = get_qc_data(project_name, p_con, s_con, flowcell)
        if prj_summary.get("application") not in APPLICATION_MAP.keys():
            if not application:
                LOG.warn("No such application {}. Please use the application option (available choices {})".format(application, ",".join(QC_CUTOFF.keys())))
                return output_data
            application = application
        else:
            application = APPLICATION_MAP[prj_summary.get("application")]
    else:
        LOG.info("No such project {} in project summary. Trying to get qc data anyway.".format(project_name))
        if not application:
            LOG.warn("No application provided. Please use the application option (available choices {})".format(",".join(QC_CUTOFF.keys())))
            return output_data
        qc_data = _get_sample_qc_data(project_name, application, s_con, flowcell)

    output_data = _qc_info_header(project_name, application, output_data)
    for k,v in sorted(qc_data.iteritems()):
        y = [str(x) for x in assess_qc(v, application)]
        output_data["stdout"].write("".join(y) + "\n")
    return output_data
开发者ID:Galithil,项目名称:scilifelab,代码行数:43,代码来源:qc.py


示例12: test_3_sample_map

 def test_3_sample_map(self):
     """Test getting a sample mapping"""
     p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
     sample_map = p_con.map_name_to_srm(self.examples["project"], use_ps_map=False, check_consistency=True)
     print sample_map
开发者ID:hussius,项目名称:scilifelab,代码行数:5,代码来源:test_project_status_note.py


示例13: test_4_srm_map

 def test_4_srm_map(self):
     """Test getting a sample mapping from srm to project samples"""
     p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url)
     samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_ps_map=False, check_consistency=True)
     print samples
开发者ID:hussius,项目名称:scilifelab,代码行数:5,代码来源:test_project_status_note.py


示例14: raw_data

    def raw_data(self):
        if not self._check_pargs(["project"]):
            return

        # if necessary, reformat flowcell identifier
        if self.pargs.flowcell:
            self.pargs.flowcell = self.pargs.flowcell.split("_")[-1]

        # get the uid and gid to use for destination files
        uid = os.getuid()
        gid = os.getgid()
        if self.pargs.group is not None and len(self.pargs.group) > 0:
            gid = grp.getgrnam(group).gr_gid

        self.log.debug("Connecting to project database")
        p_con = ProjectSummaryConnection(**vars(self.pargs))
        assert p_con, "Could not get connection to project databse"
        self.log.debug("Connecting to samples database")
        s_con = SampleRunMetricsConnection(**vars(self.pargs))
        assert s_con, "Could not get connection to samples databse"

        # Fetch the Uppnex project to deliver to
        if not self.pargs.uppmax_project:
            self.pargs.uppmax_project = p_con.get_entry(self.pargs.project, "uppnex_id")
            if not self.pargs.uppmax_project:
                self.log.error("Uppmax project was not specified and could not be fetched from project database")
                return

        # Extract the list of samples and runs associated with the project and sort them
        samples = sorted(s_con.get_samples(fc_id=self.pargs.flowcell, sample_prj=self.pargs.project), key=lambda k: (k.get('project_sample_name','NA'), k.get('flowcell','NA'), k.get('lane','NA')))

        # Setup paths and verify parameters
        self._meta.production_root = self.app.config.get("production", "root")
        self._meta.root_path = self._meta.production_root
        proj_base_dir = os.path.join(self._meta.root_path, self.pargs.project)
        assert os.path.exists(self._meta.production_root), "No such directory {}; check your production config".format(self._meta.production_root)
        assert os.path.exists(proj_base_dir), "No project {} in production path {}".format(self.pargs.project,self._meta.root_path)

        try:
            self._meta.uppnex_project_root = self.app.config.get("deliver", "uppnex_project_root")
        except Exception as e:
            self.log.warn("{}, will use '/proj' as uppnext_project_root".format(e))
            self._meta.uppnex_project_root = '/proj'

        try:
            self._meta.uppnex_delivery_dir = self.app.config.get("deliver", "uppnex_project_delivery_path")
        except Exception as e:
            self.log.warn("{}, will use 'INBOX' as uppnext_project_delivery_path".format(e))
            self._meta.uppnex_delivery_dir = 'INBOX'

        destination_root = os.path.join(self._meta.uppnex_project_root,self.pargs.uppmax_project,self._meta.uppnex_delivery_dir)
        assert os.path.exists(destination_root), "Delivery destination folder {} does not exist".format(destination_root)
        destination_root = os.path.join(destination_root,self.pargs.project)

        # If interactively select, build a list of samples to skip
        if self.pargs.interactive:
            to_process = []
            for sample in samples:
                sname = sample.get("project_sample_name")
                index = sample.get("sequence")
                fcid = sample.get("flowcell")
                lane = sample.get("lane")
                date = sample.get("date")
                self.log.info("Sample: {}, Barcode: {}, Flowcell: {}, Lane: {}, Started on: {}".format(sname,
                                                                                                           index,
                                                                                                           fcid,
                                                                                                           lane,
                                                                                                           date))
                if query_yes_no("Deliver sample?", default="no"):
                    to_process.append(sample)
            samples = to_process

        # Find uncompressed fastq
        uncompressed = self._find_uncompressed_fastq_files(proj_base_dir,samples)
        if len(uncompressed) > 0:
            self.log.warn("The following samples have uncompressed *.fastq files that cannot be delivered: {}".format(",".join(uncompressed)))
            if not query_yes_no("Continue anyway?", default="no"):
                return

        self.log.info("Will deliver data for {} samples from project {} to {}".format(len(samples),self.pargs.project,destination_root))
        if not query_yes_no("Continue?"):
            return

        # Get the list of files to transfer and the destination
        self.log.debug("Gathering list of files to copy")
        to_copy = self.get_file_copy_list(proj_base_dir,
                                          destination_root,
                                          samples)

        # Make sure that transfer will be with rsync
        if not self.pargs.rsync:
            self.log.warn("Files must be transferred using rsync")
            if not query_yes_no("Do you wish to continue delivering using rsync?", default="yes"):
                return
            self.pargs.rsync = True

        # Process each sample run
        for id, files in to_copy.items():
            # get the sample database object
            [sample] = [s for s in samples if s.get('_id') == id]
#.........这里部分代码省略.........
开发者ID:guillermo-carrasco,项目名称:scilifelab,代码行数:101,代码来源:deliver.py


示例15: TestDbConnection

class TestDbConnection(unittest.TestCase):
    def setUp(self):
        self.user = "user"
        self.pw = "pw"
        self.url = "localhost"
        self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
        self.p_con = ProjectSummaryConnection(
            dbname="projects-test", username=self.user, password=self.pw, url=self.url
        )

    def test_connection(self):
        """Test database connection"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url))

    def test_get_flowcell(self):
        """Test getting a flowcell for a given sample"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        fc = sample_con.get_entry(self.examples["sample"], "flowcell")
        self.assertEqual(str(fc), self.examples["flowcell"])

    def test_get_sample_ids(self):
        """Test getting sample ids given flowcell and sample_prj"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
        LOG.info("Number of samples before subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 4)
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Number of samples after subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 2)

    def test_get_samples(self):
        """Test getting samples given flowcell and sample_prj."""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )

        samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
        LOG.info("Selecting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 4)
        samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

        samples = sample_con.get_samples(sample_prj=self.examples["project"])
        LOG.info("Selecting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 3)
        samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
        LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

    def test_get_samples_wrong_info(self):
        """Test getting samples when either flowcell or project id information is wrong"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )

        samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
        LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 0)

    def test_get_project_sample_ids(self):
        """Test getting project sample ids"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"])
        sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
        self.assertEqual(
            set(sample_names),
            set(["1_120924_AC003CCCXX_TGACCA", "2_120924_AC003CCCXX_ACAGTG", "1_121015_BB002BBBXX_TGACCA"]),
        )

    def test_get_latest_library_prep(self):
        """Test getting latest library prep"""
        prj = self.p_con.get_entry("J.Doe_00_01")
        prj["samples"]["P001_102"]["library_prep"]["B"] = {"sample_run_metrics": {"2_120924_AC003CCCXX_TTGGAA": None}}
        self.p_con.save(prj)
        preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"])
        srm = [x for l in preps.values() for x in l]
        # Make sure A prep not in list
        self.assertNotIn("2_120924_AC003CCCXX_ACAGTG", srm)
        # Make sure B prep in list
        self.assertIn("2_120924_AC003CCCXX_TTGGAA", srm)
        # Reset data
        prj = self.p_con.get_entry("J.Doe_00_01")
        del prj["samples"]["P001_102"]["library_prep"]["B"]
        self.p_con.save(prj)
开发者ID:pombredanne,项目名称:scilifelab,代码行数:94,代码来源:test_db.py


示例16: sample_status_note

def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
                       ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
                       project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                       phix=None, **kw):
    """Make a sample status note. Used keywords:

    :param project_name: project name
    :param flowcell: flowcell id
    :param username: db username
    :param password: db password
    :param url: db url
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param project_alias: project alias name
    :param phix: phix error rate
    """
    # Cutoffs
    cutoffs = {
        "phix_err_cutoff" : 2.0,
        "qv_cutoff" : 30,
        }
    
    # parameters
    parameters = {
        "project_name" : None,
        "start_date" : None,
        "FC_id" : None,
        "scilifelab_name" : None,
        "rounded_read_count" : None,
        "phix_error_rate" : None,
        "avg_quality_score" : None,
        "success" : None,
        "run_mode":None,
        }
    # key mapping from sample_run_metrics to parameter keys
    srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell", 
                        "scilifelab_name":"barcode_name", "start_date":"date", "rounded_read_count":"bc_count"}
    
    LOG.debug("got parameters {}".format(parameters))
    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    if not _assert_flowcell_format(flowcell):
        LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9]+XX\")".format(flowcell) )
        return output_data
    output_data = _update_sample_output_data(output_data, cutoffs)

    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)

    # Set up paragraphs
    paragraphs = sample_note_paragraphs()
    headers = sample_note_headers()

    # Get project
    project = p_con.get_entry(project_name)
    if not project:
        LOG.warn("No such project '{}'".format(project_name))
        return output_data

    # Set samples list
    sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
    if len(sample_run_list) == 0:
        LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
        return output_data
    
    # Set options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    bc_count = _literal_eval_option(bc_count)
    phix = _literal_eval_option(phix)

    # Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
    sample_count = Counter([x.get("barcode_name") for x in sample_run_list])

    # Loop samples and collect information
    s_param_out = []
    for s in sample_run_list:
        s_param = {}
        LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
        s_param.update(parameters)
        s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
        fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
        # Get instrument
        try:
            s_param.update(instrument[fc_con.get_instrument(str(fc))])
        except:
            LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
            s_param.update(instrument['default'])
        # Get run mode
        s_param["run_mode"] = fc_con.get_run_mode(str(fc))
        s_param.update(software_versions)
        s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
        if phix:
            s_param["phix_error_rate"] = _get_phix_error 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python statusdb.SampleRunMetricsConnection类代码示例发布时间:2022-05-27
下一篇:
Python lib.cmov_window函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap