本文整理汇总了Java中org.apache.parquet.column.ColumnDescriptor类的典型用法代码示例。如果您正苦于以下问题:Java ColumnDescriptor类的具体用法?Java ColumnDescriptor怎么用?Java ColumnDescriptor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ColumnDescriptor类属于org.apache.parquet.column包,在下文中一共展示了ColumnDescriptor类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: addColumn
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
public void addColumn(ColumnDescriptor descriptor, ColumnChunkMetaData metaData) throws IOException {
final FSDataInputStream in;
if (useSingleStream) {
if (streams.isEmpty()) {
in = fs.open(path);
streams.add(in);
} else {
in = streams.get(0);
}
in.seek(metaData.getStartingPos());
columns.put(descriptor, new SingleStreamColumnChunkIncPageReader(metaData, descriptor, in));
} else {
// create new stream per column
in = fs.open(path);
streams.add(in);
in.seek(metaData.getStartingPos());
columns.put(descriptor, new ColumnChunkIncPageReader(metaData, descriptor, in));
}
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:20,代码来源:ColumnChunkIncReadStore.java
示例2: createGlobalDictionaries
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* Builds a global dictionary for parquet table for BINARY or FIXED_LEN_BYTE_ARRAY column types.
* It will remove exiting dictionaries if present and create new ones.
* @param fs filesystem
* @param tableDir root directory for given table that has parquet files
* @param bufferAllocator memory allocator
* @return GlobalDictionariesInfo that has dictionary version, root path and columns along with path to dictionary files.
* @throws IOException
*/
public static GlobalDictionariesInfo createGlobalDictionaries(FileSystem fs, Path tableDir, BufferAllocator bufferAllocator) throws IOException {
final FileStatus[] statuses = fs.listStatus(tableDir, PARQUET_FILES_FILTER);
final Map<ColumnDescriptor, Path> globalDictionaries = Maps.newHashMap();
final Map<ColumnDescriptor, List<Dictionary>> allDictionaries = readLocalDictionaries(fs, statuses, bufferAllocator);
final long dictionaryVersion = getDictionaryVersion(fs, tableDir) + 1;
final Path tmpDictionaryRootDir = createTempRootDir(fs, tableDir, dictionaryVersion);
logger.debug("Building global dictionaries for columns {} with version {}", allDictionaries.keySet(), dictionaryVersion);
// Sort all local dictionaries and write it to file with an index if needed
for (Map.Entry<ColumnDescriptor, List<Dictionary>> entry : allDictionaries.entrySet()) {
final ColumnDescriptor columnDescriptor = entry.getKey();
final Path dictionaryFile = dictionaryFilePath(tmpDictionaryRootDir, columnDescriptor);
logger.debug("Creating a new global dictionary for {} with version {}", columnDescriptor.toString(), dictionaryVersion);
createDictionaryFile(fs, dictionaryFile, columnDescriptor, entry.getValue(), null, bufferAllocator);
globalDictionaries.put(columnDescriptor, dictionaryFile);
}
final Path finalDictionaryRootDir = createDictionaryVersionedRootPath(fs, tableDir, dictionaryVersion, tmpDictionaryRootDir);
return new GlobalDictionariesInfo(globalDictionaries, finalDictionaryRootDir, dictionaryVersion);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:29,代码来源:GlobalDictionaryBuilder.java
示例3: main
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
public static void main(String []args) {
try (final BufferAllocator bufferAllocator = new RootAllocator(SabotConfig.getMaxDirectMemory())) {
final Path tableDir = new Path(args[0]);
final FileSystem fs = tableDir.getFileSystem(new Configuration());
if (fs.exists(tableDir) && fs.isDirectory(tableDir)) {
Map<ColumnDescriptor, Path> dictionaryEncodedColumns = createGlobalDictionaries(fs, tableDir, bufferAllocator).getColumnsToDictionaryFiles();
long version = getDictionaryVersion(fs, tableDir);
Path dictionaryRootDir = getDictionaryVersionedRootPath(fs, tableDir, version);
for (ColumnDescriptor columnDescriptor: dictionaryEncodedColumns.keySet()) {
final VectorContainer data = readDictionary(fs, dictionaryRootDir, columnDescriptor, bufferAllocator);
System.out.println("Dictionary for column [" + columnDescriptor.toString() + " size " + data.getRecordCount());
BatchPrinter.printBatch(data);
data.clear();
}
}
} catch (IOException ioe) {
logger.error("Failed ", ioe);
}
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:20,代码来源:GlobalDictionaryBuilder.java
示例4: readDictionaries
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* Return dictionary per row group for all binary columns in given parquet file.
* @param fs filesystem object.
* @param filePath parquet file to scan
* @return pair of dictionaries found for binary fields and list of binary fields which are not dictionary encoded.
* @throws IOException
*/
public static Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> readDictionaries(FileSystem fs, Path filePath, CodecFactory codecFactory) throws IOException {
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(fs.getConf(), filePath, ParquetMetadataConverter.NO_FILTER);
if (parquetMetadata.getBlocks().size() > 1) {
throw new IOException(
format("Global dictionaries can only be built on a parquet file with a single row group, found %d row groups for file %s",
parquetMetadata.getBlocks().size(), filePath));
}
final BlockMetaData rowGroupMetadata = parquetMetadata.getBlocks().get(0);
final Map<ColumnPath, ColumnDescriptor> columnDescriptorMap = Maps.newHashMap();
for (ColumnDescriptor columnDescriptor : parquetMetadata.getFileMetaData().getSchema().getColumns()) {
columnDescriptorMap.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
}
final Set<ColumnDescriptor> columnsToSkip = Sets.newHashSet(); // columns which are found in parquet file but are not dictionary encoded
final Map<ColumnDescriptor, Dictionary> dictionaries = Maps.newHashMap();
try(final FSDataInputStream in = fs.open(filePath)) {
for (ColumnChunkMetaData columnChunkMetaData : rowGroupMetadata.getColumns()) {
if (isBinaryType(columnChunkMetaData.getType())) {
final ColumnDescriptor column = columnDescriptorMap.get(columnChunkMetaData.getPath());
// if first page is dictionary encoded then load dictionary, otherwise skip this column.
final PageHeaderWithOffset pageHeader = columnChunkMetaData.getPageHeaders().get(0);
if (PageType.DICTIONARY_PAGE == pageHeader.getPageHeader().getType()) {
dictionaries.put(column, readDictionary(in, column, pageHeader, codecFactory.getDecompressor(columnChunkMetaData.getCodec())));
} else {
columnsToSkip.add(column);
}
}
}
}
return new ImmutablePair<>(dictionaries, columnsToSkip);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:40,代码来源:LocalDictionariesReader.java
示例5: convertColumnDescriptor
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* Converts {@link ColumnDescriptor} to {@link SchemaPath} and converts any parquet LOGICAL LIST to something
* the execution engine can understand (removes the extra 'list' and 'element' fields from the name)
*/
private static SchemaPath convertColumnDescriptor(final MessageType schema, final ColumnDescriptor columnDescriptor) {
List<String> path = Lists.newArrayList(columnDescriptor.getPath());
// go through the path and find all logical lists
int index = 0;
Type type = schema;
while (!type.isPrimitive()) { // don't bother checking the last element in the path as it is a primitive type
type = type.asGroupType().getType(path.get(index));
if (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
// remove 'list'
type = type.asGroupType().getType(path.get(index+1));
path.remove(index+1);
// remove 'element'
type = type.asGroupType().getType(path.get(index+1));
path.remove(index+1);
}
index++;
}
String[] schemaColDesc = new String[path.size()];
path.toArray(schemaColDesc);
return SchemaPath.getCompoundPath(schemaColDesc);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:ParquetRowiseReader.java
示例6: ColumnReader
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
protected ColumnReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = (parentReader.getSingleStream() != null)?
new DeprecatedSingleStreamPageReader(this, parentReader.getSingleStream(), parentReader.getHadoopPath(), columnChunkMetaData) :
new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else if (columnDescriptor.getType() == PrimitiveTypeName.INT96
&& (valueVec instanceof TimeStampMilliVector || valueVec instanceof NullableTimeStampMilliVector)) {
// if int 96 column is being read as a Timestamp, this truncates the time format used by Impala
// dataTypeLengthInBits is only ever used when computing offsets into the destination vector, so it
// needs to be set to the bit width of the resulting Arrow type, usually this matches the input length
dataTypeLengthInBits = 64;
} else {
dataTypeLengthInBits = DeprecatedParquetVectorizedReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:27,代码来源:ColumnReader.java
示例7: testLocalDictionaries
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void testLocalDictionaries() throws IOException {
try (final BufferAllocator bufferAllocator = new RootAllocator(SabotConfig.getMaxDirectMemory())) {
final CodecFactory codecFactory = CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(bufferAllocator), 0);
Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> dictionaries1 =
LocalDictionariesReader.readDictionaries(fs, new Path(tableDirPath, "phonebook1.parquet"), codecFactory);
Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> dictionaries2 =
LocalDictionariesReader.readDictionaries(fs, new Path(tableDirPath, "phonebook2.parquet"), codecFactory);
Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> dictionaries3 =
LocalDictionariesReader.readDictionaries(fs, new Path(tableDirPath, "phonebook3.parquet"), codecFactory);
Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> dictionaries4 =
LocalDictionariesReader.readDictionaries(fs, new Path(partitionDirPath, "phonebook4.parquet"), codecFactory);
assertEquals(2, dictionaries1.getKey().size()); // name and kind have dictionaries
assertEquals(1, dictionaries2.getKey().size());
assertEquals(1, dictionaries3.getKey().size());
assertEquals(1, dictionaries4.getKey().size());
assertEquals(0, dictionaries1.getValue().size());
assertEquals(1, dictionaries2.getValue().size()); // skip name
assertEquals(1, dictionaries3.getValue().size()); // skip name
assertEquals(1, dictionaries4.getValue().size()); // skip name
}
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:25,代码来源:TestGlobalDictionaryBuilder.java
示例8: read
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void read(String fileName) throws IOException
{
Path path = new Path(fileName);
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
ParquetMetadata metadata = ParquetFileReader.readFooter(conf, path, NO_FILTER);
ParquetFileReader reader = new ParquetFileReader(conf, metadata.getFileMetaData(), path, metadata.getBlocks(), metadata.getFileMetaData().getSchema().getColumns());
PageReadStore pageReadStore;
PageReader pageReader;
DataPage page;
while ((pageReadStore = reader.readNextRowGroup()) != null) {
for (ColumnDescriptor cd: metadata.getFileMetaData().getSchema().getColumns()) {
pageReader = pageReadStore.getPageReader(cd);
page = pageReader.readPage();
}
}
}
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:20,代码来源:ParquetFileReaderTest.java
示例9: loadParquetSchema
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* Scan the Parquet footer, then map each Parquet column to the list of columns
* we want to read. Track those to be read.
*/
private void loadParquetSchema() {
// TODO - figure out how to deal with this better once we add nested reading, note also look where this map is used below
// store a map from column name to converted types if they are non-null
Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer);
// loop to add up the length of the fixed width columns and build the schema
for (ColumnDescriptor column : footer.getFileMetaData().getSchema().getColumns()) {
ParquetColumnMetadata columnMetadata = new ParquetColumnMetadata(column);
columnMetadata.resolveDrillType(schemaElements, options);
if (! fieldSelected(columnMetadata.field)) {
continue;
}
selectedColumnMetadata.add(columnMetadata);
}
}
开发者ID:axbaretto,项目名称:drill,代码行数:21,代码来源:ParquetSchema.java
示例10: initialize
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
public void initialize(FileMetaData parquetFileMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
// initialize a ReadContext for this file
Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
configuration, toSetMultiMap(fileMetadata), fileSchema));
this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
this.requestedSchema = readContext.getRequestedSchema();
this.fileSchema = parquetFileMetadata.getSchema();
this.file = file;
this.columnCount = requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, fileMetadata, fileSchema, readContext);
this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
开发者ID:apache,项目名称:tajo,代码行数:24,代码来源:InternalParquetRecordReader.java
示例11: initialize
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
public void initialize(MessageType fileSchema,
FileMetaData parquetFileMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
// initialize a ReadContext for this file
Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
configuration, toSetMultiMap(fileMetadata), fileSchema));
this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
this.requestedSchema = readContext.getRequestedSchema();
this.fileSchema = fileSchema;
this.file = file;
this.columnCount = requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, fileMetadata, fileSchema, readContext);
this.strictTypeChecking = true;
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
Log.info("RecordReader initialized will read a total of " + total + " records.");
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:H2OInternalParquetReader.java
示例12: showDetails
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
String name = Strings.repeat(".", depth) + type.getName();
OriginalType otype = type.getOriginalType();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (otype != null) out.format(" O:%s", otype);
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[cpath.size()]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:MetadataUtils.java
示例13: newValuesWriter
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Override
public ValuesWriter newValuesWriter(ColumnDescriptor descriptor) {
switch (descriptor.getType()) {
case BOOLEAN:
return getBooleanValuesWriter();
case FIXED_LEN_BYTE_ARRAY:
return getFixedLenByteArrayValuesWriter(descriptor);
case BINARY:
return getBinaryValuesWriter(descriptor);
case INT32:
return getInt32ValuesWriter(descriptor);
case INT64:
return getInt64ValuesWriter(descriptor);
case INT96:
return getInt96ValuesWriter(descriptor);
case DOUBLE:
return getDoubleValuesWriter(descriptor);
case FLOAT:
return getFloatValuesWriter(descriptor);
default:
throw new IllegalArgumentException("Unknown type " + descriptor.getType());
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:24,代码来源:DefaultV2ValuesWriterFactory.java
示例14: dictionaryWriter
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
static DictionaryValuesWriter dictionaryWriter(ColumnDescriptor path, ParquetProperties properties, Encoding dictPageEncoding, Encoding dataPageEncoding) {
switch (path.getType()) {
case BOOLEAN:
throw new IllegalArgumentException("no dictionary encoding for BOOLEAN");
case BINARY:
return new DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT32:
return new DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT64:
return new DictionaryValuesWriter.PlainLongDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT96:
return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), 12, dataPageEncoding, dictPageEncoding, properties.getAllocator());
case DOUBLE:
return new DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case FLOAT:
return new DictionaryValuesWriter.PlainFloatDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case FIXED_LEN_BYTE_ARRAY:
return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), path.getTypeLength(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
default:
throw new IllegalArgumentException("Unknown type " + path.getType());
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:DefaultValuesWriterFactory.java
示例15: ColumnReaderImpl
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* creates a reader for triplets
* @param path the descriptor for the corresponding column
* @param pageReader the underlying store to read from
*/
public ColumnReaderImpl(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
this.path = checkNotNull(path, "path");
this.pageReader = checkNotNull(pageReader, "pageReader");
this.converter = checkNotNull(converter, "converter");
this.writerVersion = writerVersion;
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
if (converter.hasDictionarySupport()) {
converter.setDictionary(dictionary);
}
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
}
} else {
this.dictionary = null;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount <= 0) {
throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
}
consume();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:ColumnReaderImpl.java
示例16: ColumnWriteStoreV2
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
public ColumnWriteStoreV2(
MessageType schema,
PageWriteStore pageWriteStore,
ParquetProperties props) {
this.props = props;
this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
for (ColumnDescriptor path : schema.getColumns()) {
PageWriter pageWriter = pageWriteStore.getPageWriter(path);
mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
}
this.columns = unmodifiableMap(mcolumns);
this.writers = this.columns.values();
this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:ColumnWriteStoreV2.java
示例17: test
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void test() throws IOException {
MemPageStore memPageStore = new MemPageStore(10);
ColumnDescriptor col = new ColumnDescriptor(path , PrimitiveTypeName.INT64, 2, 2);
LongStatistics stats = new LongStatistics();
PageWriter pageWriter = memPageStore.getPageWriter(col);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
PageReader pageReader = memPageStore.getPageReader(col);
long totalValueCount = pageReader.getTotalValueCount();
System.out.println(totalValueCount);
int total = 0;
do {
DataPage readPage = pageReader.readPage();
total += readPage.getValueCount();
System.out.println(readPage);
// TODO: assert
} while (total < totalValueCount);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:TestMemPageStore.java
示例18: testMemColumn
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void testMemColumn() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
ColumnDescriptor path = schema.getColumnDescription(new String[] {"foo", "bar"});
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(42l, 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestMemColumn.java
示例19: testMemColumnBinary
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void testMemColumnBinary() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
String[] col = new String[]{"foo", "bar"};
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(Binary.fromString("42"), 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TestMemColumn.java
示例20: testMemColumnSeveralPages
import org.apache.parquet.column.ColumnDescriptor; //导入依赖的package包/类
@Test
public void testMemColumnSeveralPages() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
String[] col = new String[]{"foo", "bar"};
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
for (int i = 0; i < 2000; i++) {
columnWriter.write(42l, 0, 0);
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:24,代码来源:TestMemColumn.java
注:本文中的org.apache.parquet.column.ColumnDescriptor类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论