• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python common.timesofar函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.common.timesofar函数的典型用法代码示例。如果您正苦于以下问题:Python timesofar函数的具体用法?Python timesofar怎么用?Python timesofar使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了timesofar函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_genome_in_bit

def get_genome_in_bit(chr_fa_folder):
    ''' encode each chromosome fasta sequence into a bitarray,
        and store them in a dictionary with chr numbers as keys
        chr_fa_folder is the folder to put all gzipped fasta files:

        fasta files can be downloaded from NCBI FTP site:

        ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh37.p13/Primary_Assembly/assembled_chromosomes/FASTA/
        chr<i>.fa.gz  (e.g. chr1.fa.gz)

    '''
    chr_bit_d = {}
    chr_range = [str(i) for i in range(1, 23)] + ['X', 'Y', 'MT']
    t0 = time.time()
    for i in chr_range:
        t1 = time.time()
        #file_name = 'hs_ref_GRCh37.p5_chr{}.fa.gz'.format(i)
        file_name = 'chr{}.fa.gz'.format(i)
        print("Loading {}...".format(file_name), end='')
        file_name = os.path.join(chr_fa_folder, file_name)
        with open_anyfile(file_name) as seq_f:
            seq_f.readline()   # skip header
            seq_bit = bitarray()
            for line in seq_f:
                line = line.rstrip('\n')
                line_bit = nuc_to_bit(line)
                seq_bit += line_bit
            chr_bit_d.update({i: seq_bit})
        print("done.[{}]".format(timesofar(t1)))
    print('='*20)
    print("Finished. [{}]".format(timesofar(t0)))

    return chr_bit_d
开发者ID:bainscou,项目名称:myvariant.info,代码行数:33,代码来源:validate.py


示例2: redo_parse_gbff

def redo_parse_gbff(path):
    '''call this function manually to re-start the parsing step and set src_dump.
       This is used when main() is broken at parsing step, then parsing need to be re-started
       after the fix.
    '''
    #mark the download starts
    src_dump = get_src_dump()

    t0 = time.time()
    t_download = timesofar(t0)
    t1 = time.time()
    #mark parsing starts
    src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
    parse_gbff(path)
    t_parsing = timesofar(t1)
    t_total = timesofar(t0)

    #mark the download finished successfully
    _updates = {
        'status': 'success',
        'time': {
            'download': t_download,
            'parsing': t_parsing,
            'total': t_total
        },
        'pending_to_upload': True    # a flag to trigger data uploading
    }

    src_dump.update({'_id': 'entrez'}, {'$set': _updates})
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:29,代码来源:dl_entrez.py


示例3: doc_feeder

    def doc_feeder(self, index_type=None, index_name=None, step=10000, verbose=True, query=None, scroll='10m', **kwargs):
        conn = self.conn
        index_name = index_name or self.ES_INDEX_NAME
        doc_type = index_type or self.ES_INDEX_TYPE

        n = self.count(query=query)['count']
        cnt = 0
        t0 = time.time()
        if verbose:
            print('\ttotal docs: {}'.format(n))

        _kwargs = kwargs.copy()
        _kwargs.update(dict(size=step, index=index_name, doc_type=doc_type))
        res = helpers.scan(conn, query=query, scroll=scroll, **_kwargs)
        t1 = time.time()
        for doc in res:
            if verbose and cnt % step == 0:
                if cnt != 0:
                    print('done.[%.1f%%,%s]' % (cnt*100./n, timesofar(t1)))
                print('\t{}-{}...'.format(cnt+1, min(cnt+step, n)), end='')
                t1 = time.time()
            yield doc
            cnt += 1
        if verbose:
            print('done.[%.1f%%,%s]' % (cnt*100./n, timesofar(t1)))
            print("Finished! [{}]".format(timesofar(t0)))
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:26,代码来源:es.py


示例4: load_contig

def load_contig(contig):
    '''save cadd contig into mongodb collection.
       should be an iterable.
    '''
    # if CADD_INPUT == "exome":
    # CADD_INPUT = exome
    tabix = pysam.Tabixfile(whole_genome)
    src_db = get_src_db()
    target_coll = src_db["cadd"]
    t0 = time.time()
    cnt = 0
    docs = (doc for doc in fetch_generator(tabix, contig))
    doc_list = []
    for doc in docs:
        doc_list.append(doc)
        cnt += 1
        if len(doc_list) == 100:
            target_coll.insert(doc_list, manipulate=False, check_keys=False, w=0)
            doc_list = []
        if cnt % 100000 == 0:
            print(cnt, timesofar(t0))
    if doc_list:
        target_coll.insert(doc_list, manipulate=False, check_keys=False, w=0)
    print("successfully loaded cadd chromosome %s into mongodb" % contig)
    print("total docs: {}; total time: {}".format(cnt, timesofar(t0)))
开发者ID:mjuchler,项目名称:myvariant.info,代码行数:25,代码来源:cadd_parser.py


示例5: doc_feeder

def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None, batch_callback=None, fields=None):
    '''A iterator for returning docs in a collection, with batch query.
       additional filter query can be passed via "query", e.g.,
       doc_feeder(collection, query={'taxid': {'$in': [9606, 10090, 10116]}})
       batch_callback is a callback function as fn(cnt, t), called after every batch
       fields is optional parameter passed to find to restrict fields to return.
    '''
    src = get_src_db()
    if type(collection) == str:
        cur = src[collection].find()
    else:
        cur = collection.find()
    n = cur.count()
    s = s or 0
    e = e or n
    print('Retrieving {} documents from database "{}".'.format(n, collection))
    t0 = time.time()
    if inbatch:
        doc_li = []
    cnt = 0
    t1 = time.time()
    try:
        if s:
            cur.skip(s)
            cnt = s
            print("Skipping {} documents.".format(s))
        if e:
            cur.limit(e - (s or 0))
        cur.batch_size(step)
        print("Processing {}-{} documents...".format(cnt + 1, min(cnt + step, e)), end='')
        for doc in cur:
            if inbatch:
                doc_li.append(doc)
            else:
                yield doc
            cnt += 1
            if cnt % step == 0:
                if inbatch:
                    yield doc_li
                    doc_li = []
                print('Done.[%.1f%%,%s]' % (cnt * 100. / n, timesofar(t1)))
                if batch_callback:
                    batch_callback(cnt, time.time()-t1)
                if cnt < e:
                    t1 = time.time()
                    print("Processing {}-{} documents...".format(cnt + 1, min(cnt + step, e)), end='')
        if inbatch and doc_li:
            #Important: need to yield the last batch here
            yield doc_li

        #print 'Done.[%s]' % timesofar(t1)
        print('Done.[%.1f%%,%s]' % (cnt * 100. / n, timesofar(t1)))
        print("=" * 20)
        print('Finished.[total time: {}]'.format(timesofar(t0)))
    finally:
        cur.close()
开发者ID:bainscou,项目名称:myvariant.info,代码行数:56,代码来源:mongo.py


示例6: two_docs_iterator

def two_docs_iterator(b1, b2, id_list, step=10000):
    t0 = time.time()
    n = len(id_list)
    for i in range(0, n, step):
        t1 = time.time()
        print "Processing %d-%d documents..." % (i + 1, min(i + step, n)),
        _ids = id_list[i:i+step]
        iter1 = b1.mget_from_ids(_ids, asiter=True)
        iter2 = b2.mget_from_ids(_ids, asiter=True)
        for doc1, doc2 in zip(iter1, iter2):
            yield doc1, doc2
        print 'Done.[%.1f%%,%s]' % (i*100./n, timesofar(t1))
    print "="*20
    print 'Finished.[total time: %s]' % timesofar(t0)
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:14,代码来源:diff.py


示例7: main

    def main(self, index, collection, diff_filepath, validate=False, wait=60):
        self._index = index
        self._esi._index = index
        diff = loadobj(diff_filepath)
        source_collection = diff['source']
        add_list = self.add(source_collection, diff['add'])
        delete_list = self.delete(collection, diff['delete'])
        update_list = self.update(diff['update'])
        t00 = time()
        print('Adding new {} docs...'.format(len(diff['add'])))
        t0 = time()
        bulk(self._es, add_list)
        print("Done. [{}]".format(timesofar(t0)))
        print('Deleting {} docs'.format(len(diff['delete'])))
        t0 = time()
        bulk(self._es, delete_list)
        print("Done. [{}]".format(timesofar(t0)))
        print('Updating {} docs'.format(len(diff['update'])))
        t0 = time()
        bulk(self._es, update_list)
        print("Done. [{}]".format(timesofar(t0)))
        print("="*20)
        print("Finished! [{}]".format(timesofar(t00)))
        if validate:
	    print('Waiting {}s to let ES to finish...'.format(wait), end="")
            sleep(wait)
            print("Done.")
            print("Validating...")
            t0 = time()
            q = {
                "query": {
                    "constant_score": {
                        "filter": {
                            "exists": {
                                "field": collection
                            }
                        }
                    }
                }
            }
            data = self._esi.doc_feeder(query=q, _source=collection)
            temp_collection = collection + '_temp_' + get_random_string()
            self._src[temp_collection].drop()
            load_source(temp_collection, src_data=data)
            c1 = get_backend(source_collection, 'mongodb')
            c2 = get_backend(temp_collection, 'mongodb')
            diff_result = diff_collections(c1, c2, use_parallel=False)
            self._src[temp_collection].drop()
            print("Done. [{}]".format(t0))
            return diff_result
开发者ID:dmcgoldrick,项目名称:myvariant.info,代码行数:50,代码来源:es_sync.py


示例8: apply_changes

    def apply_changes(self, changes, verify=True, noconfirm=False):
        if verify:
            self.pre_verify_changes(changes)

        if not (noconfirm or ask('\nContinue to apply changes?') == 'Y'):
            print("Aborted.")
            return -1
        #src = self.get_source_collection(changes)
        step = self.step
        _db = get_target_db()
        source_col = _db[changes['source']]
        src = GeneDocMongoDBBackend(source_col)
        target = GeneDocESBackend(self)
        _timestamp = changes['timestamp']

        def _add_docs(ids):
            i = 0
            for _ids in iter_n(ids, step):
                t1 = time.time()
                _doc_li = src.mget_from_ids(_ids)
                for _doc in _doc_li:
                    _doc['_timestamp'] = _timestamp
                    i += 1
                target.insert(_doc_li)
                print('\t{}\t{}'.format(i, timesofar(t1)))

        t0 = time.time()
        if changes['add']:
            print("Adding {} new docs...".format(len(changes['add'])))
            t00 = time.time()
            _add_docs(changes['add'])
            print("done. [{}]".format(timesofar(t00)))
        if changes['delete']:
            print("Deleting {} discontinued docs...".format(len(changes['delete'])), end='')
            t00 = time.time()
            target.remove_from_ids(changes['delete'], step=step)
            print("done. [{}]".format(timesofar(t00)))
        if changes['update']:
            print("Updating {} existing docs...".format(len(changes['update'])))
            t00 = time.time()
            ids = [x['_id'] for x in changes['update']]
            _add_docs(ids)
            print("done. [{}]".format(timesofar(t00)))

        target.finalize()

        print("\n")
        print("Finished.", timesofar(t0))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:48,代码来源:es_sync.py


示例9: run_jobs_on_ipythoncluster

def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False):

    t0 = time.time()
    rc = Client(CLUSTER_CLIENT_JSON)
    lview = rc.load_balanced_view()
    print "\t# nodes in use: {}".format(len(lview.targets or rc.ids))
    lview.block = False

    print "\t# of tasks: {}".format(len(task_list))
    print "\tsubmitting...",
    job = lview.map_async(worker, task_list)
    print "done."
    try:
        job.wait_interactive()
    except KeyboardInterrupt:
        #handle "Ctrl-C"
        if ask("\nAbort all submitted jobs?") == 'Y':
            lview.abort()
            print "Aborted, all submitted jobs are cancelled."
        else:
            print "Aborted, but your jobs are still running on the cluster."
        return

    if len(job.result) != len(task_list):
        print "WARNING:\t# of results returned ({}) != # of tasks ({}).".format(len(job.result), len(task_list))
    print "\ttotal time: {}".format(timesofar(t0))

    if shutdown_ipengines_after_done:
        print "\tshuting down all ipengine nodes...",
        lview.shutdown()
        print 'Done.'
    return job.result
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:32,代码来源:parallel.py


示例10: merge

    def merge(self, step=100000, restart_at=0):
        t0 = time.time()
        self.validate_src_collections()
        self.log_building_start()
        try:
            if self.using_ipython_cluster:
                self._merge_ipython_cluster(step=step)
            else:
                self._merge_local(step=step, restart_at=restart_at)

            if self.target.name == 'es':
                print "Updating metadata...",
                self.update_mapping_meta()

            t1 = round(time.time() - t0, 0)
            t = timesofar(t0)
            self.log_src_build({'status': 'success',
                                'time': t,
                                'time_in_s': t1,
                                'timestamp': datetime.now()})

        finally:
            #do a simple validation here
            if getattr(self, '_stats', None):
                print "Validating..."
                target_cnt = self.target.count()
                if target_cnt == self._stats['total_genes']:
                    print "OK [total count={}]".format(target_cnt)
                else:
                    print "Warning: total count of gene documents does not match [{}, should be {}]".format(target_cnt, self._stats['total_genes'])

            if self.merge_logging:
                sys.stdout.close()
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:33,代码来源:builder.py


示例11: load_x

def load_x(idx, fieldname, cvt_fn=None):
    print('DATA_FOLDER: ' + DATA_FOLDER)
    DATAFILE = os.path.join(DATA_FOLDER, 'idmapping_selected.tab.gz')
    load_start(DATAFILE)
    t0 = time.time()
    xli = []
    for ld in tabfile_feeder(DATAFILE, header=1):
        ld = listitems(ld, *(2,19,idx))    # GeneID Ensembl(Gene) target_value
        for value in dupline_seperator(dupline=ld,
                                       dup_sep='; '):
            xli.append(value)

    ensembl2geneid = list2dict(list_nondup([(x[1], x[0]) for x in xli if x[0]!='' and x[1]!='']), 0, alwayslist=True)
    xli2 = []
    for entrez_id, ensembl_id, x_value in xli:
        if x_value:
            if cvt_fn:
                x_value = cvt_fn(x_value)
            if entrez_id:
                xli2.append((entrez_id, x_value))
            elif ensembl_id:
                entrez_id = ensembl2geneid.get(ensembl_id, None)
                if entrez_id:
                    for _eid in entrez_id:
                        xli2.append((_eid, x_value))
                else:
                    xli2.append((ensembl_id, x_value))

    gene2x = list2dict(list_nondup(xli2), 0)
    fn = lambda value: {fieldname: sorted(value) if type(value) is types.ListType else value}
    gene2x = value_convert(gene2x, fn, traverse_list=False)
    load_done('[%d, %s]' % (len(gene2x), timesofar(t0)))

    return gene2x
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:34,代码来源:uniprot_base.py


示例12: run2

def run2():
    from databuild.esbuilder import ESIndexerBase
    esb = ESIndexerBase()
    doc_d = build(sources)
    t0 = time.time()
    esb.build_index(doc_d)
    print 'Done[%s]' % timesofar(t0)
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:7,代码来源:__init__.py


示例13: update_index

def update_index(changes, sync_src, sync_target, noconfirm=False):
    # changes['_add'] = changes['delete']
    # changes['_delete'] = changes['add']
    # changes['delete'] = changes['_delete']
    # changes['add'] = changes['_add']
    # del changes['_add']
    # del changes['_delete']

    print "\t{}\trecords will be added.".format(len(changes['add']))
    print "\t{}\trecords will be deleted.".format(len(changes['delete']))
    print "\t{}\trecords will be updated.".format(len(changes['update']))

    print
    print '\tsync_src:\t{:<45}{}'.format(sync_src.target_collection.name,
                                             sync_src.name)
    print '\tsync_target\t{:<45}{}'.format(sync_target.target_esidxer.ES_INDEX_NAME,
                                               sync_target.name)

    if noconfirm or ask("Continue?")=='Y':
        t00 = time.time()
        es_idxer = sync_target.target_esidxer

        if len(changes['add']) > 0:
            print "Adding {} new records...".format(len(changes['add']))
            t0 = time.time()
            _q = {'_id': {'$in': changes['add']}}
            for docs in doc_feeder(sync_src.target_collection, step=1000, inbatch=True, query=_q):
                es_idxer.add_docs(docs)
            print "Done. [{}]".format(timesofar(t0))

        if len(changes['delete']) > 0:
            print "Deleting {} old records...".format(len(changes['delete']))
            t0 = time.time()
            es_idxer.delete_docs(changes['delete'])
            print "Done. [{}]".format(timesofar(t0))

        if len(changes['update']) > 0:
            print "Updating {} existing records...".format(len(changes['update']))
            t0 = time.time()
            ids = [d['_id'] for d in changes['update']]
            _q = {'_id': {'$in': ids}}
            for docs in doc_feeder(sync_src.target_collection, step=1000, inbatch=True, query=_q):
                es_idxer.add_docs(docs)
            print "Done. [{}]".format(timesofar(t0))
        print '='*20
        print 'Finished. [{}]'.format(timesofar(t00))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:46,代码来源:indexer.py


示例14: main

def main():
    no_confirm = True   # set it to True for running this script automatically without intervention.

    if not os.path.exists(DATA_FOLDER):
        os.makedirs(DATA_FOLDER)
    else:
        if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
            sys.exit()

    log_f, logfile = safewfile(os.path.join(DATA_FOLDER, 'entrez_dump.log'), prompt=(not no_confirm), default='O')
    sys.stdout = LogPrint(log_f, timestamp=True)
    sys.stderr = sys.stdout

    #mark the download starts
    src_dump = get_src_dump()
    doc = {'_id': 'entrez',
           'timestamp': timestamp,
           'data_folder': DATA_FOLDER,
           'logfile': logfile,
           'status': 'downloading'}
    src_dump.save(doc)
    t0 = time.time()
    try:
        download(DATA_FOLDER, no_confirm=no_confirm)
        t_download = timesofar(t0)
        t1 = time.time()
        #mark parsing starts
        src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
        parse_gbff(DATA_FOLDER)
        t_parsing = timesofar(t1)
        t_total = timesofar(t0)
    finally:
        sys.stdout.close()

    #mark the download finished successfully
    _updates = {
        'status': 'success',
        'time': {
            'download': t_download,
            'parsing': t_parsing,
            'total': t_total
        },
        'pending_to_upload': True    # a flag to trigger data uploading
    }

    src_dump.update({'_id': 'entrez'}, {'$set': _updates})
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:46,代码来源:dl_entrez.py


示例15: load

    def load(self, genedoc_d=None, update_data=True, update_master=True, test=False, step=10000):
        if not self.temp_collection:
            self.make_temp_collection()

        self.temp_collection.drop()       # drop all existing records just in case.

        if update_data:
            genedoc_d = genedoc_d or self.load_genedoc()

            print("Uploading to the DB...", end='')
            t0 = time.time()
            # for doc in self.doc_iterator(genedoc_d, batch=False):
            #     if not test:
            #         doc.save()
            for doc_li in self.doc_iterator(genedoc_d, batch=True, step=step):
                if not test:
                    self.temp_collection.insert(doc_li, manipulate=False, check_keys=False)
            print('Done[%s]' % timesofar(t0))
            self.switch_collection()

            if getattr(self, 'ENTREZ_GENEDOC_ROOT', False):
                print('Uploading "geneid_d" to GridFS...', end='')
                t0 = time.time()
                geneid_d = self.get_geneid_d()
                dump2gridfs(geneid_d, self.__collection__ + '__geneid_d.pyobj', self.db)
                print('Done[%s]' % timesofar(t0))
            if getattr(self, 'ENSEMBL_GENEDOC_ROOT', False):
                print('Uploading "mapping2entrezgene" to GridFS...', end='')
                t0 = time.time()
                x2entrezgene_list = self.get_mapping_to_entrez()
                dump2gridfs(x2entrezgene_list, self.__collection__ + '__2entrezgene_list.pyobj', self.db)
                print('Done[%s]' % timesofar(t0))

        if update_master:
            # update src_master collection
            if not test:
                _doc = {"_id": unicode(self.__collection__),
                        "name": unicode(self.__collection__),
                        "timestamp": datetime.datetime.now()}
                for attr in ['ENTREZ_GENEDOC_ROOT', 'ENSEMBL_GENEDOC_ROOT', 'id_type']:
                    if hasattr(self, attr):
                        _doc[attr] = getattr(self, attr)
                if hasattr(self, 'get_mapping'):
                    _doc['mapping'] = getattr(self, 'get_mapping')()

                conn.GeneDocSourceMaster(_doc).save()
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:46,代码来源:__init__.py


示例16: main

def main(daemon=False):
    running_processes = {}
    while 1:
        src_to_update_li = check_mongo()
        if src_to_update_li:
            print '\nDispatcher:  found pending jobs ', src_to_update_li
            for src_to_update in src_to_update_li:
                if src_to_update not in running_processes:
                    mark_upload_started(src_to_update)
                    p = dispatch(src_to_update)
                    src_dump.update({'_id': src_to_update}, {"$set": {"upload.pid": p.pid}})
                    p.t0 = time.time()
                    running_processes[src_to_update] = p

        jobs_finished = []
        if running_processes:
            print 'Dispatcher:  {} active job(s)'.format(len(running_processes))
            print get_process_info(running_processes)

        for src in running_processes:
            p = running_processes[src]
            returncode = p.poll()
            if returncode is not None:
                t1 = round(time.time()-p.t0, 0)
                d = {
                     'upload.returncode': returncode,
                     'upload.timestamp': datetime.now(),
                     'upload.time_in_s': t1,
                     'upload.time': timesofar(p.t0),
                     'upload.logfile': p.logfile,
                     }
                if returncode == 0:
                    print 'Dispatcher:  {} finished successfully with code {} (time: {}s)'.format(src, returncode, t1)
                    d['upload.status'] = "success"
                else:
                    print 'Dispatcher:  {} failed with code {} (time: {}s)'.format(src, returncode, t1)
                    d['upload.status'] = "failed"

                mark_upload_done(src, d)
                jobs_finished.append(src)
                p.log_f.close()
            else:
                p.log_f.flush()
        for src in jobs_finished:
            del running_processes[src]

        if running_processes:
            time.sleep(10)
        else:
            if daemon:
                #continue monitor src_dump collection
                print '\b'*50,
                for i in range(100):
                    print '\b'*2+[unichr(8212), '\\', '|', '/'][i%4],
                    time.sleep(0.1)
            else:
                break
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:57,代码来源:dispatch.py


示例17: _db_upload

    def _db_upload(self, doc_li, step=10000, verbose=True):
        import time
        from utils.common import timesofar
        from utils.dataload import list2dict, list_itemcnt, listsort

        output = []
        t0 = time.time()
        for i in range(0, len(doc_li), step):
            output.extend(self.target_db.update(doc_li[i:i+step]))
            if verbose:
                print('\t%d-%d Done [%s]...' % (i+1, min(i+step, len(doc_li)), timesofar(t0)))

        res = list2dict(list_itemcnt([x[0] for x in output]), 0)
        print("Done![%s, %d OK, %d Error]" % (timesofar(t0), res.get(True, 0), res.get(False, 0)))
        res = listsort(list_itemcnt([x[2].args[0] for x in output if x[0] is False]), 1, reverse=True)
        print('\n'.join(['\t%s\t%d' % x for x in res[:10]]))
        if len(res) > 10:
            print("\t%d lines omitted..." % (len(res)-10))
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:18,代码来源:backend.py


示例18: doc_feeder

    def doc_feeder(self, step=1000, s=None, e=None, inbatch=False, query=None, **kwargs):
        '''A iterator for returning docs in a ES index with batch query.
           additional filter query can be passed via "query", e.g.,
           doc_feeder(query='taxid:9606'}})
           other parameters can be passed via "**kwargs":
                fields, from, size etc.
        '''
        if query:
            q = StringQuery(query)
        else:
            q = MatchAllQuery()
        raw_res = None

        cnt = 0
        t0 = time.time()
        while 1:
            t1 = time.time()
            if raw_res is None:
                raw_res = self.conn.search_raw(q, self._index, self._doc_type,
                      start=s, size=step, scan=True, scroll='5m', **kwargs)
                n = raw_res['hits']['total']
                print 'Retrieving %d documents from index "%s/%s".' % (n, self._index, self._doc_type)
            else:
                raw_res = self.conn.search_scroll(raw_res._scroll_id, scroll='5m')
            hits_cnt = len(raw_res['hits']['hits'])
            if hits_cnt == 0:
                break
            else:

                print "Processing %d-%d documents..." % (cnt+1, cnt+hits_cnt) ,
                res = self._cleaned_res(raw_res)
                if inbatch:
                    yield res
                else:
                    for hit in res:
                        yield hit
                cnt += hits_cnt
                print 'Done.[%.1f%%,%s]' % (cnt*100./n, timesofar(t1))
                if e and cnt > e:
                    break

        print "="*20
        print 'Finished.[total docs: %s, total time: %s]' % (cnt, timesofar(t0))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:43,代码来源:__init__.py


示例19: _add_docs

 def _add_docs(ids):
     i = 0
     for _ids in iter_n(ids, step):
         t1 = time.time()
         _doc_li = src.mget_from_ids(_ids)
         for _doc in _doc_li:
             _doc['_timestamp'] = _timestamp
             i += 1
         target.insert(_doc_li)
         print('\t{}\t{}'.format(i, timesofar(t1)))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:10,代码来源:es_sync.py


示例20: main_cron

def main_cron():
    no_confirm = True   # set it to True for running this script automatically without intervention.

    src_dump = get_src_dump()
    print("Checking latest mart_version:\t", end=' ')
    mart_version = chk_latest_mart_version()
    print(mart_version)

    doc = src_dump.find_one({'_id': 'ensembl'})
    if doc and 'release' in doc and mart_version <= doc['release']:
        data_file = os.path.join(doc['data_folder'], 'gene_ensembl__gene__main.txt')
        if os.path.exists(data_file):
            print("No newer release found. Abort now.")
            sys.exit(0)

    DATA_FOLDER = os.path.join(ENSEMBL_FOLDER, str(mart_version))
    if not os.path.exists(DATA_FOLDER):
        os.makedirs(DATA_FOLDER)
    else:
        if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
            sys.exit(0)

    log_f, logfile = safewfile(os.path.join(DATA_FOLDER, 'ensembl_mart_%s.log' % mart_version), prompt=(not no_confirm), default='O')
    sys.stdout = LogPrint(log_f, timestamp=True)

    #mark the download starts
    doc = {'_id': 'ensembl',
           'release': mart_version,
           'timestamp': time.strftime('%Y%m%d'),
           'data_folder': DATA_FOLDER,
           'logfile': logfile,
           'status': 'downloading'}
    src_dump.save(doc)
    t0 = time.time()

    try:
        BM = BioMart()
        BM.species_li = get_all_species(mart_version)
        BM.get_gene__main(os.path.join(DATA_FOLDER, 'gene_ensembl__gene__main.txt'))
        BM.get_translation__main(os.path.join(DATA_FOLDER, 'gene_ensembl__translation__main.txt'))
        BM.get_xref_entrezgene(os.path.join(DATA_FOLDER, 'gene_ensembl__xref_entrezgene__dm.txt'))

        BM.get_profile(os.path.join(DATA_FOLDER, 'gene_ensembl__prot_profile__dm.txt'))
        BM.get_interpro(os.path.join(DATA_FOLDER, 'gene_ensembl__prot_interpro__dm.txt'))
        BM.get_pfam(os.path.join(DATA_FOLDER, 'gene_ensembl__prot_pfam__dm.txt'))
    finally:
        sys.stdout.close()

    #mark the download finished successfully
    _updates = {
        'status': 'success',
        'time': timesofar(t0),
        'pending_to_upload': True    # a flag to trigger data uploading
    }
    src_dump.update({'_id': 'ensembl'}, {'$set': _updates})
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:55,代码来源:dl_ensembl_mart.py



注:本文中的utils.common.timesofar函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python cfme_data.get函数代码示例发布时间:2022-05-26
下一篇:
Python cmd.git函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap