• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python toolz.partition_all函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中toolz.partition_all函数的典型用法代码示例。如果您正苦于以下问题:Python partition_all函数的具体用法?Python partition_all怎么用?Python partition_all使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了partition_all函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: cb_filter

def cb_filter(fastq, bc1, bc2, cores, nedit):
    ''' Filters reads with non-matching barcodes
    Expects formatted fastq files.
    '''

    bc1 = set(cb.strip() for cb in bc1)
    if bc2:
        bc2 = set(cb.strip() for cb in bc2)

    if nedit == 0:
        filter_cb = partial(exact_barcode_filter, bc1=bc1, bc2=bc2)
    else:
        bc1hash = MutationHash(bc1, nedit)
        bc2hash = None
        if bc2:
            bc2hash = MutationHash(bc2, nedit)
        filter_cb = partial(correcting_barcode_filter, bc1hash=bc1hash,
                            bc2hash=bc2hash)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, stream_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_cb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:roryk,项目名称:umis,代码行数:26,代码来源:umis.py


示例2: partial_reduce

def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None):
    """Partial reduction across multiple axes.

    Parameters
    ----------
    func : function
    x : Array
    split_every : dict
        Maximum reduction block sizes in each dimension.

    Example
    -------
    Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th
    dimension, and 3 blocks in the 2nd dimension:

    >>> partial_reduce(np.min, x, {0: 1, 2: 3})    # doctest: +SKIP
    """
    name = name or 'p_reduce-' + tokenize(func, x, split_every, keepdims, dtype)
    parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n)
             in enumerate(x.numblocks)]
    keys = product(*map(range, map(len, parts)))
    out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i
                  in split_every else c for (i, c) in enumerate(x.chunks)]
    if not keepdims:
        out_axis = [i for i in range(x.ndim) if i not in split_every]
        getter = lambda k: get(out_axis, k)
        keys = map(getter, keys)
        out_chunks = list(getter(out_chunks))
    dsk = {}
    for k, p in zip(keys, product(*parts)):
        decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1)
        dummy = dict(i for i in enumerate(p) if i[0] not in decided)
        g = lol_tuples((x.name,), range(x.ndim), decided, dummy)
        dsk[(name,) + k] = (func, g)
    return Array(merge(dsk, x.dask), name, out_chunks, dtype=dtype)
开发者ID:jcorbin,项目名称:dask,代码行数:35,代码来源:reductions.py


示例3: mb_filter

def mb_filter(fastq, cores):
    ''' Filters umis with non-ACGT bases
    Expects formatted fastq files.
    '''
    filter_mb = partial(umi_filter)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_mb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:13,代码来源:umis.py


示例4: fastqtransform

def fastqtransform(transform, fastq1, fastq2, separate_cb, demuxed_cb,
                   dual_index, cores, min_length):
    ''' Transform input reads to the tagcounts compatible read layout using
    regular expressions as defined in a transform file. Outputs new format to
    stdout.
    '''
    if dual_index and separate_cb:
        read_template = '{name}:CELL_{CB1}-{CB2}:UMI_{MB}\n{seq}\n+\n{qual}\n'
    else:
        read_template = '{name}:CELL_{CB}:UMI_{MB}\n{seq}\n+\n{qual}\n'

    transform = json.load(open(transform))
    read1_regex = re.compile(transform['read1'])
    read2_regex = re.compile(transform['read2']) if fastq2 else None

    fastq1_fh = open(fastq1)
    if fastq1.endswith('gz'):
        fastq1_fh = gzip.GzipFile(fileobj=fastq1_fh)

    fastq_file1 = stream_fastq(fastq1_fh)

    if fastq2:
        fastq2_fh = open(fastq2)
        if fastq2.endswith('gz'):
            fastq2_fh = gzip.GzipFile(fileobj=fastq2_fh)

        fastq_file2 = stream_fastq(fastq2_fh)

    else:
        fastq_file2 = itertools.cycle((None,))

    transform = partial(transformer, read1_regex=read1_regex,
                          read2_regex=read2_regex, paired=fastq2)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, itertools.izip(fastq_file1, fastq_file2))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(transform, list(bigchunk)):
            for read1_dict in chunk:
                if dual_index:
                    if not separate_cb:
                        read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']

                if demuxed_cb:
                    read1_dict['CB'] = demuxed_cb

                # Deal with spaces in read names
                read1_dict['name'] = read1_dict['name'].partition(' ')[0]
                if len(read1_dict['seq']) >= min_length:
                    sys.stdout.write(read_template.format(**read1_dict))
开发者ID:flying-sheep,项目名称:umis,代码行数:51,代码来源:umis.py


示例5: add_uid

def add_uid(fastq, cores):
    ''' Adds UID:[samplebc cellbc umi] to readname for umi-tools deduplication
    Expects formatted fastq files with correct sample and cell barcodes.
    '''

    uids = partial(append_uids)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(uids, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:14,代码来源:umis.py


示例6: iterator_to_DataFrame_chunks

def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
    seq2 = partition_all(chunksize, seq)

    if kwargs.get('add_index'):
        mkindex = _add_index
    else:
        mkindex = _ignore_index

    try:
        first, rest = next(seq2), seq2
    except StopIteration:
        def _():
            yield convert(pd.DataFrame, [], **kwargs)
    else:
        df = convert(pd.DataFrame, first, **kwargs)
        df1, n1 = mkindex(df, 0)

        def _():
            n = n1
            yield df1
            for i in rest:
                df = convert(pd.DataFrame, i, **kwargs)
                df, n = mkindex(df, n)
                yield df
    return chunks(pd.DataFrame)(_)
开发者ID:EGQM,项目名称:odo,代码行数:25,代码来源:convert.py


示例7: iterator_to_DataFrame_chunks

def iterator_to_DataFrame_chunks(seq, chunksize=1024, **kwargs):
    seq2 = partition_all(chunksize, seq)

    add_index = kwargs.get('add_index', False)
    if not add_index:
        # Simple, we can dispatch to dask...
        f = lambda d: convert(pd.DataFrame, d, **kwargs)
        data = [partial(f, d) for d in seq2]
        if not data:
            data = [convert(pd.DataFrame, [], **kwargs)]
        return chunks(pd.DataFrame)(data)

    # TODO: Decide whether we should support the `add_index` flag at all.
    # If so, we need to post-process the converted DataFrame objects sequencially,
    # so we can't parallelize the process.
    try:
        first, rest = next(seq2), seq2
    except StopIteration:
        def _():
            yield convert(pd.DataFrame, [], **kwargs)
    else:
        df = convert(pd.DataFrame, first, **kwargs)
        df1, n1 = _add_index(df, 0)

        def _():
            n = n1
            yield df1
            for i in rest:
                df = convert(pd.DataFrame, i, **kwargs)
                df, n = _add_index(df, n)
                yield df
    return chunks(pd.DataFrame)(_)
开发者ID:jdmcbr,项目名称:odo,代码行数:32,代码来源:convert.py


示例8: test_broken_worker_during_computation

def test_broken_worker_during_computation(c, s, a, b):
    n = Nanny(s.ip, s.port, ncores=2, loop=s.loop)
    n.start(0)

    start = time()
    while len(s.ncores) < 3:
        yield gen.sleep(0.01)
        assert time() < start + 5

    L = c.map(inc, range(256))
    for i in range(8):
        L = c.map(add, *zip(*partition_all(2, L)))

    from random import random
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()
    yield gen.sleep(random() / 2)
    with ignoring(OSError):
        n.process.terminate()

    result = yield c._gather(L)
    assert isinstance(result[0], int)

    yield n._close()
开发者ID:dask,项目名称:distributed,代码行数:25,代码来源:test_worker_failure.py


示例9: append_iterator_to_table

def append_iterator_to_table(t, rows, dshape=None, **kwargs):
    assert not isinstance(t, type)
    rows = iter(rows)

    # We see if the sequence is of tuples or dicts
    # If tuples then we coerce them to dicts
    try:
        row = next(rows)
    except StopIteration:
        return
    rows = chain([row], rows)
    if isinstance(row, (tuple, list)):
        if dshape and isinstance(dshape.measure, datashape.Record):
            names = dshape.measure.names
            if set(names) != set(discover(t).measure.names):
                raise ValueError("Column names of incoming data don't match "
                                 "column names of existing SQL table\n"
                                 "Names in SQL table: %s\n"
                                 "Names from incoming data: %s\n" %
                                 (discover(t).measure.names, names))
        else:
            names = discover(t).measure.names
        rows = (dict(zip(names, row)) for row in rows)

    engine = t.bind
    with engine.connect() as conn:
        for chunk in partition_all(1000, rows):  # TODO: 1000 is hardcoded
            conn.execute(t.insert(), chunk)

    return t
开发者ID:pieterdavid,项目名称:odo,代码行数:30,代码来源:sql.py


示例10: into

def into(a, b, **kwargs):
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = into(a, chunk, **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:pgnepal,项目名称:blaze,代码行数:8,代码来源:bcolz.py


示例11: into

def into(a, b, **kwargs):
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = ctable([into(np.ndarray(0), c2) for c2 in zip(*chunk)], **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:holdenk,项目名称:blaze,代码行数:8,代码来源:bcolz.py


示例12: execute

def execute(file_name):
    categories = ['distinguished', 'removal_reason']
    f = load(file_name)
    batches = partition_all(200000, f)
    df, frames = peek(map(to_df, batches))
    castra = Castra('./subreddit_dumps/'+file_name+'.castra',
                    template = df, categories = categories)
    castra.extend_sequence(frames, freq = '3h')
开发者ID:JherezTaylor,项目名称:Datamining-Reddit,代码行数:8,代码来源:make_subreddit_castra.py


示例13: cb_filter

def cb_filter(fastq, bc1, bc2, cores):
    ''' Filters reads with non-matching barcodes
    Expects formatted fastq files.
    '''

    bc1 = set(cb.strip() for cb in bc1)
    if bc2:
        bc2 = set(cb.strip() for cb in bc2)

    filter_cb = partial(cb_filterer, bc1=bc1, bc2=bc2)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, stream_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_cb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:Teichlab,项目名称:umis,代码行数:18,代码来源:umis.py


示例14: sb_filter

def sb_filter(fastq, bc, cores, nedit):
    ''' Filters reads with non-matching sample barcodes
    Expects formatted fastq files.
    '''
    barcodes = set(sb.strip() for sb in bc)
    if nedit == 0:
        filter_sb = partial(exact_sample_filter2, barcodes=barcodes)
    else:
        barcodehash = MutationHash(barcodes, nedit)
        filter_sb = partial(correcting_sample_filter2, barcodehash=barcodehash)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_sb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read)
开发者ID:vals,项目名称:umis,代码行数:18,代码来源:umis.py


示例15: into

def into(a, b, **kwargs):
    kwargs = keyfilter(carray_keywords.__contains__, kwargs)
    chunks = partition_all(1024, b)
    chunk = next(chunks)
    a = into(a, chunk, **kwargs)
    for chunk in chunks:
        a.append(list(zip(*chunk)))
    a.flush()
    return a
开发者ID:leolujuyi,项目名称:blaze,代码行数:9,代码来源:bcolz.py


示例16: main

def main():
    images = sorted(f for f in listdir('images/') if f.endswith('.JPG'))
    captions = image_captions()
    content = list(zip(images, captions, count(1)))
    step = 6
    size = len(content) // step
    for i, img_cap_idx_list in enumerate(partition_all(step, content)):
        create_slide(i, size, img_cap_idx_list)
    write_app_cache()
开发者ID:baharev,项目名称:baharev.github.io,代码行数:9,代码来源:create_slides.py


示例17: partition

def partition(grouper, sequence, npartitions, p, nelements=2**20):
    """ Partition a bag along a grouper, store partitions on disk """
    for block in partition_all(nelements, sequence):
        d = groupby(grouper, block)
        d2 = defaultdict(list)
        for k, v in d.items():
            d2[abs(hash(k)) % npartitions].extend(v)
        p.append(d2)
    return p
开发者ID:kerrywatson1,项目名称:dask,代码行数:9,代码来源:core.py


示例18: iterator_to_numpy_chunks

def iterator_to_numpy_chunks(seq, chunksize=1024, **kwargs):
    seq2 = partition_all(chunksize, seq)
    first, rest = next(seq2), seq2
    x = convert(np.ndarray, first, **kwargs)
    def _():
        yield x
        for i in rest:
            yield convert(np.ndarray, i, **kwargs)
    return chunks(np.ndarray)(_)
开发者ID:MoherX,项目名称:odo,代码行数:9,代码来源:convert.py


示例19: iter_arrays

 def iter_arrays(self, arrays_per_chunk=None):
     """Iterates over the arrays in this store."""
     if arrays_per_chunk is None:
         for key in range(self.journal().numarrays()):
             yield self.get([key])
     elif arrays_per_chunk <= 0:
         raise ValueError('arrays_per_chunk must be None or bigger than 0, it is %r' % arrays_per_chunk)
     else:
         for segments in partition_all(arrays_per_chunk, range(self.journal().numarrays())):
             yield self.get(segments)
开发者ID:sdvillal,项目名称:jagged,代码行数:10,代码来源:base.py


示例20: parallel_rebin

def parallel_rebin(K, mz_axis, imzb):
    mz_axis_chunks = list(partition_all(K, mz_axis))
    # create dask array manually using tasks
    tasks = {('x', i, 0, 0): (get_mz_images, mz_chunk, imzb) for i, mz_chunk in enumerate(mz_axis_chunks)}
    chunks_mz = [len(c) for c in mz_axis_chunks]
    chunks_x = (imzb.height,)
    chunks_y = (imzb.width,)
    arr = da.Array(tasks, 'x', chunks=(chunks_mz, chunks_x, chunks_y), dtype=float)
    print arr.shape
    return arr
开发者ID:SpatialMetabolomics,项目名称:ims-simulator,代码行数:10,代码来源:rebin_dataset.py



注:本文中的toolz.partition_all函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python toolz.pipe函数代码示例发布时间:2022-05-27
下一篇:
Python toolz.partial函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap