• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python compat.guid函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyarrow.compat.guid函数的典型用法代码示例。如果您正苦于以下问题:Python guid函数的具体用法?Python guid怎么用?Python guid使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了guid函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_native_file_TextIOWrapper

def test_native_file_TextIOWrapper(tmpdir):
    data = (u'foooo\n'
            u'barrr\n'
            u'bazzz\n')

    path = os.path.join(str(tmpdir), guid())
    with open(path, 'wb') as f:
        f.write(data.encode('utf-8'))

    with TextIOWrapper(pa.OSFile(path, mode='rb')) as fil:
        assert fil.readable()
        res = fil.read()
        assert res == data
    assert fil.closed

    with TextIOWrapper(pa.OSFile(path, mode='rb')) as fil:
        # Iteration works
        lines = list(fil)
        assert ''.join(lines) == data

    # Writing
    path2 = os.path.join(str(tmpdir), guid())
    with TextIOWrapper(pa.OSFile(path2, mode='wb')) as fil:
        assert fil.writable()
        fil.write(data)

    with TextIOWrapper(pa.OSFile(path2, mode='rb')) as fil:
        res = fil.read()
        assert res == data
开发者ID:sunchao,项目名称:arrow,代码行数:29,代码来源:test_io.py


示例2: test_native_file_modes

def test_native_file_modes(tmpdir):
    path = os.path.join(str(tmpdir), guid())
    with open(path, 'wb') as f:
        f.write(b'foooo')

    with pa.OSFile(path, mode='r') as f:
        assert f.mode == 'rb'

    with pa.OSFile(path, mode='rb') as f:
        assert f.mode == 'rb'

    with pa.OSFile(path, mode='w') as f:
        assert f.mode == 'wb'

    with pa.OSFile(path, mode='wb') as f:
        assert f.mode == 'wb'

    with open(path, 'wb') as f:
        f.write(b'foooo')

    with pa.memory_map(path, 'r') as f:
        assert f.mode == 'rb'

    with pa.memory_map(path, 'r+') as f:
        assert f.mode == 'rb+'

    with pa.memory_map(path, 'r+b') as f:
        assert f.mode == 'rb+'
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:28,代码来源:test_io.py


示例3: test_dataset_read_pandas

def test_dataset_read_pandas(tmpdir):
    import pyarrow.parquet as pq

    nfiles = 5
    size = 5

    dirpath = tmpdir.join(guid()).strpath
    os.mkdir(dirpath)

    test_data = []
    frames = []
    paths = []
    for i in range(nfiles):
        df = _test_dataframe(size, seed=i)
        df.index = np.arange(i * size, (i + 1) * size)
        df.index.name = 'index'

        path = pjoin(dirpath, '{0}.parquet'.format(i))

        table = pa.Table.from_pandas(df)
        _write_table(table, path)
        test_data.append(table)
        frames.append(df)
        paths.append(path)

    dataset = pq.ParquetDataset(dirpath)
    columns = ['uint8', 'strings']
    result = dataset.read_pandas(columns=columns).to_pandas()
    expected = pd.concat([x[columns] for x in frames])

    tm.assert_frame_equal(result, expected)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:31,代码来源:test_parquet.py


示例4: test_read_multiple_parquet_files

    def test_read_multiple_parquet_files(self):
        import pyarrow.parquet as pq

        nfiles = 10
        size = 5

        tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())

        self.hdfs.mkdir(tmpdir)

        test_data = []
        paths = []
        for i in range(nfiles):
            df = test_parquet._test_dataframe(size, seed=i)

            df['index'] = np.arange(i * size, (i + 1) * size)

            # Hack so that we don't have a dtype cast in v1 files
            df['uint32'] = df['uint32'].astype(np.int64)

            path = pjoin(tmpdir, '{0}.parquet'.format(i))

            table = pa.Table.from_pandas(df, preserve_index=False)
            with self.hdfs.open(path, 'wb') as f:
                pq.write_table(table, f)

            test_data.append(table)
            paths.append(path)

        result = self.hdfs.read_parquet(tmpdir)
        expected = pa.concat_tables(test_data)

        pdt.assert_frame_equal(result.to_pandas()
                               .sort_values(by='index').reset_index(drop=True),
                               expected.to_pandas())
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:35,代码来源:test_hdfs.py


示例5: test_native_file_raises_ValueError_after_close

def test_native_file_raises_ValueError_after_close(tmpdir):
    path = os.path.join(str(tmpdir), guid())
    with open(path, 'wb') as f:
        f.write(b'foooo')

    with pa.OSFile(path, mode='rb') as os_file:
        assert not os_file.closed
    assert os_file.closed

    with pa.memory_map(path, mode='rb') as mmap_file:
        assert not mmap_file.closed
    assert mmap_file.closed

    files = [os_file,
             mmap_file]

    methods = [('tell', ()),
               ('seek', (0,)),
               ('size', ()),
               ('flush', ()),
               ('readable', ()),
               ('writable', ()),
               ('seekable', ())]

    for f in files:
        for method, args in methods:
            with pytest.raises(ValueError):
                getattr(f, method)(*args)
开发者ID:sunchao,项目名称:arrow,代码行数:28,代码来源:test_io.py


示例6: test_read_multiple_parquet_files

    def test_read_multiple_parquet_files(self):

        tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())

        self.hdfs.mkdir(tmpdir)

        expected = self._write_multiple_hdfs_pq_files(tmpdir)
        result = self.hdfs.read_parquet(tmpdir)

        pdt.assert_frame_equal(result.to_pandas()
                               .sort_values(by='index').reset_index(drop=True),
                               expected.to_pandas())
开发者ID:CodingCat,项目名称:arrow,代码行数:12,代码来源:test_hdfs.py


示例7: sample_disk_data

def sample_disk_data(request):
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
    data = arr.tobytes()[:SIZE]

    path = guid()
    with open(path, 'wb') as f:
        f.write(data)

    def teardown():
        _try_delete(path)
    request.addfinalizer(teardown)
    return path, data
开发者ID:kiril-me,项目名称:arrow,代码行数:13,代码来源:test_io.py


示例8: test_read_multiple_parquet_files_with_uri

    def test_read_multiple_parquet_files_with_uri(self):
        import pyarrow.parquet as pq

        tmpdir = pjoin(self.tmp_path, 'multi-parquet-uri-' + guid())

        self.hdfs.mkdir(tmpdir)

        expected = self._write_multiple_hdfs_pq_files(tmpdir)
        path = _get_hdfs_uri(tmpdir)
        result = pq.read_table(path)

        pdt.assert_frame_equal(result.to_pandas()
                               .sort_values(by='index').reset_index(drop=True),
                               expected.to_pandas())
开发者ID:CodingCat,项目名称:arrow,代码行数:14,代码来源:test_hdfs.py


示例9: s3_example

def s3_example():
    access_key = os.environ['PYARROW_TEST_S3_ACCESS_KEY']
    secret_key = os.environ['PYARROW_TEST_S3_SECRET_KEY']
    bucket_name = os.environ['PYARROW_TEST_S3_BUCKET']

    import s3fs
    fs = s3fs.S3FileSystem(key=access_key, secret=secret_key)

    test_dir = guid()

    bucket_uri = 's3://{0}/{1}'.format(bucket_name, test_dir)
    fs.mkdir(bucket_uri)
    yield fs, bucket_uri
    fs.rm(bucket_uri, recursive=True)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:14,代码来源:test_parquet.py


示例10: test_dataset_read_pandas_common_metadata

def test_dataset_read_pandas_common_metadata(tmpdir):
    # ARROW-1103
    import pyarrow.parquet as pq

    nfiles = 5
    size = 5

    dirpath = tmpdir.join(guid()).strpath
    os.mkdir(dirpath)

    test_data = []
    frames = []
    paths = []
    for i in range(nfiles):
        df = _test_dataframe(size, seed=i)
        df.index = pd.Index(np.arange(i * size, (i + 1) * size))
        df.index.name = 'index'

        path = pjoin(dirpath, '{0}.parquet'.format(i))

        df_ex_index = df.reset_index(drop=True)
        df_ex_index['index'] = df.index
        table = pa.Table.from_pandas(df_ex_index,
                                     preserve_index=False)

        # Obliterate metadata
        table = table.replace_schema_metadata(None)
        assert table.schema.metadata is None

        _write_table(table, path)
        test_data.append(table)
        frames.append(df)
        paths.append(path)

    # Write _metadata common file
    table_for_metadata = pa.Table.from_pandas(df)
    pq.write_metadata(table_for_metadata.schema,
                      pjoin(dirpath, '_metadata'))

    dataset = pq.ParquetDataset(dirpath)
    columns = ['uint8', 'strings']
    result = dataset.read_pandas(columns=columns).to_pandas()
    expected = pd.concat([x[columns] for x in frames])

    tm.assert_frame_equal(result, expected)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:45,代码来源:test_parquet.py


示例11: test_os_file_writer

def test_os_file_writer(tmpdir):
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
    data = arr.tobytes()[:SIZE]

    path = os.path.join(str(tmpdir), guid())
    with open(path, 'wb') as f:
        f.write(data)

    # Truncates file
    f2 = pa.OSFile(path, mode='w')
    f2.write('foo')

    with pa.OSFile(path) as f3:
        assert f3.size() == 3

    with pytest.raises(IOError):
        f2.read(5)
开发者ID:sunchao,项目名称:arrow,代码行数:18,代码来源:test_io.py


示例12: test_read_write_parquet_files_with_uri

    def test_read_write_parquet_files_with_uri(self):
        import pyarrow.parquet as pq

        tmpdir = pjoin(self.tmp_path, 'uri-parquet-' + guid())
        self.hdfs.mkdir(tmpdir)
        path = _get_hdfs_uri(pjoin(tmpdir, 'test.parquet'))

        size = 5
        df = test_parquet._test_dataframe(size, seed=0)
        # Hack so that we don't have a dtype cast in v1 files
        df['uint32'] = df['uint32'].astype(np.int64)
        table = pa.Table.from_pandas(df, preserve_index=False)

        pq.write_table(table, path)

        result = pq.read_table(path).to_pandas()

        pdt.assert_frame_equal(result, df)
开发者ID:CodingCat,项目名称:arrow,代码行数:18,代码来源:test_hdfs.py


示例13: test_memory_map_resize

def test_memory_map_resize(tmpdir):
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
    data1 = arr.tobytes()[:(SIZE // 2)]
    data2 = arr.tobytes()[(SIZE // 2):]

    path = os.path.join(str(tmpdir), guid())

    mmap = pa.create_memory_map(path, SIZE / 2)
    mmap.write(data1)

    mmap.resize(SIZE)
    mmap.write(data2)

    mmap.close()

    with open(path, 'rb') as f:
        assert f.read() == arr.tobytes()
开发者ID:rok,项目名称:arrow,代码行数:18,代码来源:test_io.py


示例14: test_memory_map_writer

def test_memory_map_writer():
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
    data = arr.tobytes()[:SIZE]

    path = guid()
    try:
        with open(path, 'wb') as f:
            f.write(data)

        f = io.MemoryMappedFile(path, mode='r+w')

        f.seek(10)
        f.write('peekaboo')
        assert f.tell() == 18

        f.seek(10)
        assert f.read(8) == b'peekaboo'

        f2 = io.MemoryMappedFile(path, mode='r+w')

        f2.seek(10)
        f2.write(b'booapeak')
        f2.seek(10)

        f.seek(10)
        assert f.read(8) == b'booapeak'

        # Does not truncate file
        f3 = io.MemoryMappedFile(path, mode='w')
        f3.write('foo')

        with io.MemoryMappedFile(path) as f4:
            assert f4.size() == SIZE

        with pytest.raises(IOError):
            f3.read(5)

        f.seek(0)
        assert f.read(3) == b'foo'
    finally:
        _try_delete(path)
开发者ID:kiril-me,项目名称:arrow,代码行数:42,代码来源:test_io.py


示例15: test_os_file_writer

def test_os_file_writer():
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
    data = arr.tobytes()[:SIZE]

    path = guid()
    try:
        with open(path, 'wb') as f:
            f.write(data)

        # Truncates file
        f2 = io.OSFile(path, mode='w')
        f2.write('foo')

        with io.OSFile(path) as f3:
            assert f3.size() == 3

        with pytest.raises(IOError):
            f2.read(5)
    finally:
        _try_delete(path)
开发者ID:kiril-me,项目名称:arrow,代码行数:21,代码来源:test_io.py


示例16: test_memory_map_writer

def test_memory_map_writer(tmpdir):
    SIZE = 4096
    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
    data = arr.tobytes()[:SIZE]

    path = os.path.join(str(tmpdir), guid())
    with open(path, 'wb') as f:
        f.write(data)

    f = pa.memory_map(path, mode='r+b')

    f.seek(10)
    f.write('peekaboo')
    assert f.tell() == 18

    f.seek(10)
    assert f.read(8) == b'peekaboo'

    f2 = pa.memory_map(path, mode='r+b')

    f2.seek(10)
    f2.write(b'booapeak')
    f2.seek(10)

    f.seek(10)
    assert f.read(8) == b'booapeak'

    # Does not truncate file
    f3 = pa.memory_map(path, mode='w')
    f3.write('foo')

    with pa.memory_map(path) as f4:
        assert f4.size() == SIZE

    with pytest.raises(IOError):
        f3.read(5)

    f.seek(0)
    assert f.read(3) == b'foo'
开发者ID:sunchao,项目名称:arrow,代码行数:39,代码来源:test_io.py


示例17: _visit_level

    def _visit_level(base_dir, level, part_keys):
        name, values = partition_spec[level]
        for value in values:
            this_part_keys = part_keys + [(name, value)]

            level_dir = pjoin(base_dir, '{0}={1}'.format(name, value))
            fs.mkdir(level_dir)

            if level == DEPTH - 1:
                # Generate example data
                file_path = pjoin(level_dir, guid())

                filtered_df = _filter_partition(df, this_part_keys)
                part_table = pa.Table.from_pandas(filtered_df)
                with fs.open(file_path, 'wb') as f:
                    _write_table(part_table, f)
                assert fs.exists(file_path)

                _touch(pjoin(level_dir, '_SUCCESS'))
            else:
                _visit_level(level_dir, level + 1, this_part_keys)
                _touch(pjoin(level_dir, '_SUCCESS'))
开发者ID:giantwhale,项目名称:arrow,代码行数:22,代码来源:test_parquet.py


示例18: test_ignore_private_directories

def test_ignore_private_directories(tmpdir):
    import pyarrow.parquet as pq

    nfiles = 10
    size = 5

    dirpath = tmpdir.join(guid()).strpath
    os.mkdir(dirpath)

    test_data = []
    paths = []
    for i in range(nfiles):
        df = _test_dataframe(size, seed=i)
        path = pjoin(dirpath, '{0}.parquet'.format(i))

        test_data.append(_write_table(df, path))
        paths.append(path)

    # private directory
    os.mkdir(pjoin(dirpath, '_impala_staging'))

    dataset = pq.ParquetDataset(dirpath)
    assert set(paths) == set(x.path for x in dataset.pieces)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:23,代码来源:test_parquet.py


示例19: random_path

def random_path():
    return 'feather_{}'.format(guid())
开发者ID:giantwhale,项目名称:arrow,代码行数:2,代码来源:test_feather.py


示例20: write_to_dataset

def write_to_dataset(table, root_path, partition_cols=None,
                     filesystem=None, preserve_index=True, **kwargs):
    """
    Wrapper around parquet.write_table for writing a Table to
    Parquet format by partitions.
    For each combination of partition columns and values,
    a subdirectories are created in the following
    manner:

    root_dir/
      group1=value1
        group2=value1
          <uuid>.parquet
        group2=value2
          <uuid>.parquet
      group1=valueN
        group2=value1
          <uuid>.parquet
        group2=valueN
          <uuid>.parquet

    Parameters
    ----------
    table : pyarrow.Table
    root_path : string,
        The root directory of the dataset
    filesystem : FileSystem, default None
        If nothing passed, paths assumed to be found in the local on-disk
        filesystem
    partition_cols : list,
        Column names by which to partition the dataset
        Columns are partitioned in the order they are given
    preserve_index : bool,
        Parameter for instantiating Table; preserve pandas index or not.
    **kwargs : dict, kwargs for write_table function.
    """
    from pyarrow import (
        Table,
        compat
    )

    if filesystem is None:
        fs = _get_fs_from_path(root_path)
    else:
        fs = _ensure_filesystem(filesystem)

    _mkdir_if_not_exists(fs, root_path)

    if partition_cols is not None and len(partition_cols) > 0:
        df = table.to_pandas()
        partition_keys = [df[col] for col in partition_cols]
        data_df = df.drop(partition_cols, axis='columns')
        data_cols = df.columns.drop(partition_cols)
        if len(data_cols) == 0:
            raise ValueError("No data left to save outside partition columns")
        for keys, subgroup in data_df.groupby(partition_keys):
            if not isinstance(keys, tuple):
                keys = (keys,)
            subdir = "/".join(
                ["{colname}={value}".format(colname=name, value=val)
                 for name, val in zip(partition_cols, keys)])
            subtable = Table.from_pandas(subgroup,
                                         preserve_index=preserve_index)
            prefix = "/".join([root_path, subdir])
            _mkdir_if_not_exists(fs, prefix)
            outfile = compat.guid() + ".parquet"
            full_path = "/".join([prefix, outfile])
            with fs.open(full_path, 'wb') as f:
                write_table(subtable, f, **kwargs)
    else:
        outfile = compat.guid() + ".parquet"
        full_path = "/".join([root_path, outfile])
        with fs.open(full_path, 'wb') as f:
            write_table(table, f, **kwargs)
开发者ID:sunchao,项目名称:arrow,代码行数:74,代码来源:parquet.py



注:本文中的pyarrow.compat.guid函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python parquet.write_table函数代码示例发布时间:2022-05-25
下一篇:
Python pyarrow.uint8函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap