本文整理汇总了Java中org.apache.parquet.format.converter.ParquetMetadataConverter类的典型用法代码示例。如果您正苦于以下问题:Java ParquetMetadataConverter类的具体用法?Java ParquetMetadataConverter怎么用?Java ParquetMetadataConverter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParquetMetadataConverter类属于org.apache.parquet.format.converter包,在下文中一共展示了ParquetMetadataConverter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: ParquetReadOptions
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
ParquetReadOptions(boolean useSignedStringMinMax,
boolean useStatsFilter,
boolean useDictionaryFilter,
boolean useRecordFilter,
FilterCompat.Filter recordFilter,
ParquetMetadataConverter.MetadataFilter metadataFilter,
CompressionCodecFactory codecFactory,
ByteBufferAllocator allocator,
Map<String, String> properties) {
this.useSignedStringMinMax = useSignedStringMinMax;
this.useStatsFilter = useStatsFilter;
this.useDictionaryFilter = useDictionaryFilter;
this.useRecordFilter = useRecordFilter;
this.recordFilter = recordFilter;
this.metadataFilter = metadataFilter;
this.codecFactory = codecFactory;
this.allocator = allocator;
this.properties = Collections.unmodifiableMap(properties);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:20,代码来源:ParquetReadOptions.java
示例2: test
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
@Override
public void test() throws IOException {
Configuration configuration = new Configuration();
ParquetMetadata metadata = ParquetFileReader.readFooter(configuration,
super.fsPath, ParquetMetadataConverter.NO_FILTER);
ParquetFileReader reader = new ParquetFileReader(configuration,
metadata.getFileMetaData(),
super.fsPath,
metadata.getBlocks(),
metadata.getFileMetaData().getSchema().getColumns());
PageStatsValidator validator = new PageStatsValidator();
PageReadStore pageReadStore;
while ((pageReadStore = reader.readNextRowGroup()) != null) {
validator.validate(metadata.getFileMetaData().getSchema(), pageReadStore);
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestStatistics.java
示例3: getParquetSchema
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
private String getParquetSchema(String source) throws IOException {
Formats.Format format;
try (SeekableInput in = openSeekable(source)) {
format = Formats.detectFormat((InputStream) in);
in.seek(0);
switch (format) {
case PARQUET:
return new ParquetFileReader(
getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER)
.getFileMetaData().getSchema().toString();
default:
throw new IllegalArgumentException(String.format(
"Could not get a Parquet schema for format %s: %s", format, source));
}
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:SchemaCommand.java
示例4: readDictionaries
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
/**
* Return dictionary per row group for all binary columns in given parquet file.
* @param fs filesystem object.
* @param filePath parquet file to scan
* @return pair of dictionaries found for binary fields and list of binary fields which are not dictionary encoded.
* @throws IOException
*/
public static Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> readDictionaries(FileSystem fs, Path filePath, CodecFactory codecFactory) throws IOException {
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(fs.getConf(), filePath, ParquetMetadataConverter.NO_FILTER);
if (parquetMetadata.getBlocks().size() > 1) {
throw new IOException(
format("Global dictionaries can only be built on a parquet file with a single row group, found %d row groups for file %s",
parquetMetadata.getBlocks().size(), filePath));
}
final BlockMetaData rowGroupMetadata = parquetMetadata.getBlocks().get(0);
final Map<ColumnPath, ColumnDescriptor> columnDescriptorMap = Maps.newHashMap();
for (ColumnDescriptor columnDescriptor : parquetMetadata.getFileMetaData().getSchema().getColumns()) {
columnDescriptorMap.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
}
final Set<ColumnDescriptor> columnsToSkip = Sets.newHashSet(); // columns which are found in parquet file but are not dictionary encoded
final Map<ColumnDescriptor, Dictionary> dictionaries = Maps.newHashMap();
try(final FSDataInputStream in = fs.open(filePath)) {
for (ColumnChunkMetaData columnChunkMetaData : rowGroupMetadata.getColumns()) {
if (isBinaryType(columnChunkMetaData.getType())) {
final ColumnDescriptor column = columnDescriptorMap.get(columnChunkMetaData.getPath());
// if first page is dictionary encoded then load dictionary, otherwise skip this column.
final PageHeaderWithOffset pageHeader = columnChunkMetaData.getPageHeaders().get(0);
if (PageType.DICTIONARY_PAGE == pageHeader.getPageHeader().getType()) {
dictionaries.put(column, readDictionary(in, column, pageHeader, codecFactory.getDecompressor(columnChunkMetaData.getCodec())));
} else {
columnsToSkip.add(column);
}
}
}
}
return new ImmutablePair<>(dictionaries, columnsToSkip);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:40,代码来源:LocalDictionariesReader.java
示例5: run
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() >= 1,
"A Parquet file is required.");
Preconditions.checkArgument(targets.size() == 1,
"Cannot process multiple Parquet files.");
String source = targets.get(0);
ParquetMetadata footer = ParquetFileReader.readFooter(
getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER);
console.info("\nFile path: {}", source);
console.info("Created by: {}", footer.getFileMetaData().getCreatedBy());
Map<String, String> kv = footer.getFileMetaData().getKeyValueMetaData();
if (kv != null && !kv.isEmpty()) {
console.info("Properties:");
String format = " %" + maxSize(kv.keySet()) + "s: %s";
for (Map.Entry<String, String> entry : kv.entrySet()) {
console.info(String.format(format, entry.getKey(), entry.getValue()));
}
} else {
console.info("Properties: (none)");
}
MessageType schema = footer.getFileMetaData().getSchema();
console.info("Schema:\n{}", schema);
List<BlockMetaData> rowGroups = footer.getBlocks();
for (int index = 0, n = rowGroups.size(); index < n; index += 1) {
printRowGroup(console, index, rowGroups.get(index), schema);
}
console.info("");
return 0;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:39,代码来源:ParquetMetadataCommand.java
示例6: getParquetFileMetadata
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
private ParquetFileMetadata getParquetFileMetadata(FileStatus file) throws IOException {
final ParquetMetadata metadata;
metadata = SingletonParquetFooterCache.readFooter(fs, file, ParquetMetadataConverter.NO_FILTER);
MessageType schema = metadata.getFileMetaData().getSchema();
Map<SchemaPath, OriginalType> originalTypeMap = Maps.newHashMap();
schema.getPaths();
for (String[] path : schema.getPaths()) {
originalTypeMap.put(SchemaPath.getCompoundPath(path), getOriginalType(schema, path, 0));
}
List<RowGroupMetadata> rowGroupMetadataList = Lists.newArrayList();
ArrayList<SchemaPath> ALL_COLS = new ArrayList<>();
ALL_COLS.add(AbstractRecordReader.STAR_COLUMN);
boolean autoCorrectCorruptDates = formatConfig.autoCorrectCorruptDates;
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(metadata, ALL_COLS, autoCorrectCorruptDates);
if(logger.isDebugEnabled()){
logger.debug(containsCorruptDates.toString());
}
final Map<ColumnTypeMetadata.Key, ColumnTypeMetadata> columnTypeInfo = Maps.newHashMap();
for (BlockMetaData rowGroup : metadata.getBlocks()) {
List<ColumnMetadata> columnMetadataList = Lists.newArrayList();
long length = 0;
for (ColumnChunkMetaData col : rowGroup.getColumns()) {
ColumnMetadata columnMetadata;
boolean statsAvailable = (col.getStatistics() != null && !col.getStatistics().isEmpty());
Statistics<?> stats = col.getStatistics();
String[] columnName = col.getPath().toArray();
SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName);
ColumnTypeMetadata columnTypeMetadata =
new ColumnTypeMetadata(columnName, col.getType(), originalTypeMap.get(columnSchemaName));
columnTypeInfo.put(new ColumnTypeMetadata.Key(columnTypeMetadata.name), columnTypeMetadata);
if (statsAvailable) {
// Write stats only if minVal==maxVal. Also, we then store only maxVal
Object mxValue = null;
if (stats.genericGetMax() != null && stats.genericGetMin() != null &&
stats.genericGetMax().equals(stats.genericGetMin())) {
mxValue = stats.genericGetMax();
if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION
&& columnTypeMetadata.originalType == OriginalType.DATE) {
mxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) mxValue);
}
}
columnMetadata =
new ColumnMetadata(columnTypeMetadata.name, mxValue, stats.getNumNulls());
} else {
columnMetadata = new ColumnMetadata(columnTypeMetadata.name,null, null);
}
columnMetadataList.add(columnMetadata);
length += col.getTotalSize();
}
RowGroupMetadata rowGroupMeta =
new RowGroupMetadata(rowGroup.getStartingPos(), length, rowGroup.getRowCount(),
getHostAffinity(file, rowGroup.getStartingPos(), length), columnMetadataList);
rowGroupMetadataList.add(rowGroupMeta);
}
return new ParquetFileMetadata(file, file.getLen(), rowGroupMetadataList, columnTypeInfo);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:68,代码来源:Metadata.java
示例7: getColNameToSchemaElementMapping
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public static Map<String, SchemaElement> getColNameToSchemaElementMapping(ParquetMetadata footer) {
HashMap<String, SchemaElement> schemaElements = new HashMap<>();
FileMetaData fileMetaData = new ParquetMetadataConverter().toParquetMetadata(ParquetFileWriter.CURRENT_VERSION, footer);
for (SchemaElement se : fileMetaData.getSchema()) {
schemaElements.put(se.getName(), se);
}
return schemaElements;
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:9,代码来源:ParquetReaderUtility.java
示例8: getFooter
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public ParquetMetadata getFooter(FileSystemWrapper fs, Path file) {
if (footer == null || !lastFile.equals(file)) {
try {
footer = readFooter(fs, file, ParquetMetadataConverter.NO_FILTER);
} catch (IOException ioe) {
throw new RuntimeException("Failed to read parquet footer for file " + file, ioe);
}
lastFile = file;
}
return footer;
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:12,代码来源:SingletonParquetFooterCache.java
示例9: readFooter
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public static ParquetMetadata readFooter(final FileSystem fs, final Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException {
return readFooter(fs, fs.getFileStatus(file), filter);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:4,代码来源:SingletonParquetFooterCache.java
示例10: getBatchSchema
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
@Override
public BatchSchema getBatchSchema(final FileSelection selection, final FileSystemWrapper fs) {
final SabotContext context = ((ParquetFormatPlugin)formatPlugin).getContext();
try (
BufferAllocator sampleAllocator = context.getAllocator().newChildAllocator("sample-alloc", 0, Long.MAX_VALUE);
OperatorContextImpl operatorContext = new OperatorContextImpl(context.getConfig(), sampleAllocator, context.getOptionManager(), 1000);
SampleMutator mutator = new SampleMutator(context)
){
final Optional<FileStatus> firstFileO = selection.getFirstFile();
if(!firstFileO.isPresent()) {
throw UserException.dataReadError().message("Unable to find any files for datasets.").build(logger);
}
final FileStatus firstFile = firstFileO.get();
final ParquetMetadata footer = ParquetFileReader.readFooter(fsPlugin.getFsConf(), firstFile, ParquetMetadataConverter.NO_FILTER);
final ParquetReaderUtility.DateCorruptionStatus dateStatus = ParquetReaderUtility.detectCorruptDates(footer, GroupScan.ALL_COLUMNS,
((ParquetFormatPlugin)formatPlugin).getConfig().autoCorrectCorruptDates);
final boolean readInt96AsTimeStamp = operatorContext.getOptions().getOption(PARQUET_READER_INT96_AS_TIMESTAMP).bool_val;
final ImplicitFilesystemColumnFinder finder = new ImplicitFilesystemColumnFinder(context.getOptionManager(), fs, GroupScan.ALL_COLUMNS);
try(RecordReader reader =
new AdditionalColumnsRecordReader(
new ParquetRowiseReader(operatorContext, footer, 0, firstFile.getPath().toString(), GroupScan.ALL_COLUMNS, fs, dateStatus, readInt96AsTimeStamp, true),
finder.getImplicitFieldsForSample(selection)
)) {
reader.setup(mutator);
mutator.allocate(100);
//TODO DX-3873: remove the next() call here. We need this for now since we don't populate inner list types until next.
reader.next();
mutator.getContainer().buildSchema(BatchSchema.SelectionVectorMode.NONE);
return mutator.getContainer().getSchema();
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:39,代码来源:ParquetFormatDatasetAccessor.java
示例11: ParquetRowReader
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public ParquetRowReader(Configuration configuration, Path filePath, ReadSupport<T> readSupport) throws IOException
{
this.filePath = filePath;
ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, filePath, ParquetMetadataConverter.NO_FILTER);
List<BlockMetaData> blocks = parquetMetadata.getBlocks();
FileMetaData fileMetadata = parquetMetadata.getFileMetaData();
this.fileSchema = fileMetadata.getSchema();
Map<String, String> keyValueMetadata = fileMetadata.getKeyValueMetaData();
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
configuration, toSetMultiMap(keyValueMetadata), fileSchema));
this.columnIOFactory = new ColumnIOFactory(fileMetadata.getCreatedBy());
this.requestedSchema = readContext.getRequestedSchema();
this.recordConverter = readSupport.prepareForRead(
configuration, fileMetadata.getKeyValueMetaData(), fileSchema, readContext);
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, fileMetadata, filePath, blocks, columns);
long total = 0;
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
this.total = total;
this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
logger.info("ParquetRowReader initialized will read a total of " + total + " records.");
}
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:32,代码来源:ParquetRowReader.java
示例12: load
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public ITable load() {
try {
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "/");
conf.set("hadoop.security.authentication", "simple");
conf.set("hadoop.security.authorization", "false");
Path path = new Path(this.filename);
ParquetMetadata md = ParquetFileReader.readFooter(conf, path,
ParquetMetadataConverter.NO_FILTER);
MessageType schema = md.getFileMetaData().getSchema();
ParquetFileReader r = new ParquetFileReader(conf, path, md);
IAppendableColumn[] cols = this.createColumns(md);
MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
PageReadStore pages;
while (null != (pages = r.readNextRowGroup())) {
final long rows = pages.getRowCount();
RecordReader<Group> recordReader = columnIO.getRecordReader(
pages, new GroupRecordConverter(schema));
for (int i = 0; i < rows; i++) {
Group g = recordReader.read();
appendGroup(cols, g, md.getFileMetaData().getSchema().getColumns());
}
}
for (IAppendableColumn c: cols)
c.seal();
return new Table(cols);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
开发者ID:vmware,项目名称:hillview,代码行数:33,代码来源:ParquetReader.java
示例13: parseChunk
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
if (! (din instanceof FVecParseReader)) {
// TODO: Should we modify the interface to expose the underlying chunk for non-streaming parsers?
throw new IllegalStateException("We only accept parser readers backed by a Vec (no streaming support!).");
}
Chunk chunk = ((FVecParseReader) din).getChunk();
Vec vec = chunk.vec();
// extract metadata, we want to read only the row groups that have centers in this chunk
ParquetMetadataConverter.MetadataFilter chunkFilter = ParquetMetadataConverter.range(
chunk.start(), chunk.start() + chunk.len());
ParquetMetadata metadata = VecParquetReader.readFooter(_metadata, chunkFilter);
if (metadata.getBlocks().isEmpty()) {
Log.trace("Chunk #", cidx, " doesn't contain any Parquet block center.");
return dout;
}
Log.info("Processing ", metadata.getBlocks().size(), " blocks of chunk #", cidx);
VecParquetReader reader = new VecParquetReader(vec, metadata, dout, _setup.getColumnTypes());
try {
Integer recordNumber;
do {
recordNumber = reader.read();
} while (recordNumber != null);
} catch (IOException e) {
throw new RuntimeException("Failed to parse records", e);
}
long skipped = reader.getInvalidRecordCount();
if (skipped > 0)
dout.addError(new ParseWriter.ParseErr("Some records were corrupt (skipped " + skipped + ").", cidx, -1, chunk.start()));
return dout;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:32,代码来源:ParquetParser.java
示例14: correctTypeConversions
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
/**
* Overrides unsupported type conversions/mappings specified by the user.
* @param vec byte vec holding bin\ary parquet data
* @param requestedTypes user-specified target types
* @return corrected types
*/
public static byte[] correctTypeConversions(ByteVec vec, byte[] requestedTypes) {
byte[] metadataBytes = VecParquetReader.readFooterAsBytes(vec);
ParquetMetadata metadata = VecParquetReader.readFooter(metadataBytes, ParquetMetadataConverter.NO_FILTER);
byte[] roughTypes = roughGuessTypes(metadata.getFileMetaData().getSchema());
return correctTypeConversions(roughTypes, requestedTypes);
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:13,代码来源:ParquetParser.java
示例15: readFooter
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f) throws IOException {
ParquetMetadataConverter converter = new ParquetMetadataConverter(options);
return readFooter(file, options, f, converter);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:5,代码来源:ParquetFileReader.java
示例16: ParquetFileReader
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
/**
* @param configuration the Hadoop conf
* @param fileMetaData fileMetaData for parquet file
* @param blocks the blocks to read
* @param columns the columns to read (their path)
* @throws IOException if the file can not be opened
*/
@Deprecated
public ParquetFileReader(
Configuration configuration, FileMetaData fileMetaData,
Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException {
this.converter = new ParquetMetadataConverter(configuration);
this.file = HadoopInputFile.fromPath(filePath, configuration);
this.fileMetaData = fileMetaData;
this.f = file.newStream();
this.options = HadoopReadOptions.builder(configuration).build();
this.blocks = filterRowGroups(blocks);
for (ColumnDescriptor col : columns) {
paths.put(ColumnPath.get(col.getPath()), col);
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:ParquetFileReader.java
示例17: mergeMetadataFiles
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
/**
* Given a list of metadata files, merge them into a single ParquetMetadata
* Requires that the schemas be compatible, and the extraMetadata be exactly equal.
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
public static ParquetMetadata mergeMetadataFiles(List<Path> files, Configuration conf) throws IOException {
Preconditions.checkArgument(!files.isEmpty(), "Cannot merge an empty list of metadata");
GlobalMetaData globalMetaData = null;
List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
for (Path p : files) {
ParquetMetadata pmd = ParquetFileReader.readFooter(conf, p, ParquetMetadataConverter.NO_FILTER);
FileMetaData fmd = pmd.getFileMetaData();
globalMetaData = mergeInto(fmd, globalMetaData, true);
blocks.addAll(pmd.getBlocks());
}
// collapse GlobalMetaData into a single FileMetaData, which will throw if they are not compatible
return new ParquetMetadata(globalMetaData.merge(), blocks);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:ParquetFileWriter.java
示例18: getMetadataFilter
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public ParquetMetadataConverter.MetadataFilter getMetadataFilter() {
return metadataFilter;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:4,代码来源:ParquetReadOptions.java
示例19: withRange
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
public Builder withRange(long start, long end) {
this.metadataFilter = ParquetMetadataConverter.range(start, end);
return this;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:5,代码来源:ParquetReadOptions.java
示例20: testMergeMetadataFiles
import org.apache.parquet.format.converter.ParquetMetadataConverter; //导入依赖的package包/类
@Test
public void testMergeMetadataFiles() throws Exception {
WrittenFileInfo info = writeFiles(false);
ParquetMetadata commonMeta1 = ParquetFileReader.readFooter(info.conf, info.commonMetaPath1, ParquetMetadataConverter.NO_FILTER);
ParquetMetadata commonMeta2 = ParquetFileReader.readFooter(info.conf, info.commonMetaPath2, ParquetMetadataConverter.NO_FILTER);
ParquetMetadata meta1 = ParquetFileReader.readFooter(info.conf, info.metaPath1, ParquetMetadataConverter.NO_FILTER);
ParquetMetadata meta2 = ParquetFileReader.readFooter(info.conf, info.metaPath2, ParquetMetadataConverter.NO_FILTER);
assertTrue(commonMeta1.getBlocks().isEmpty());
assertTrue(commonMeta2.getBlocks().isEmpty());
assertEquals(commonMeta1.getFileMetaData().getSchema(), commonMeta2.getFileMetaData().getSchema());
assertFalse(meta1.getBlocks().isEmpty());
assertFalse(meta2.getBlocks().isEmpty());
assertEquals(meta1.getFileMetaData().getSchema(), meta2.getFileMetaData().getSchema());
assertEquals(commonMeta1.getFileMetaData().getKeyValueMetaData(), commonMeta2.getFileMetaData().getKeyValueMetaData());
assertEquals(meta1.getFileMetaData().getKeyValueMetaData(), meta2.getFileMetaData().getKeyValueMetaData());
// test file serialization
Path mergedOut = new Path(new File(temp.getRoot(), "merged_meta").getAbsolutePath());
Path mergedCommonOut = new Path(new File(temp.getRoot(), "merged_common_meta").getAbsolutePath());
ParquetFileWriter.writeMergedMetadataFile(Arrays.asList(info.metaPath1, info.metaPath2), mergedOut, info.conf);
ParquetFileWriter.writeMergedMetadataFile(Arrays.asList(info.commonMetaPath1, info.commonMetaPath2), mergedCommonOut, info.conf);
ParquetMetadata mergedMeta = ParquetFileReader.readFooter(info.conf, mergedOut, ParquetMetadataConverter.NO_FILTER);
ParquetMetadata mergedCommonMeta = ParquetFileReader.readFooter(info.conf, mergedCommonOut, ParquetMetadataConverter.NO_FILTER);
// ideally we'd assert equality here, but BlockMetaData and it's references don't implement equals
assertEquals(meta1.getBlocks().size() + meta2.getBlocks().size(), mergedMeta.getBlocks().size());
assertTrue(mergedCommonMeta.getBlocks().isEmpty());
assertEquals(meta1.getFileMetaData().getSchema(), mergedMeta.getFileMetaData().getSchema());
assertEquals(commonMeta1.getFileMetaData().getSchema(), mergedCommonMeta.getFileMetaData().getSchema());
assertEquals(meta1.getFileMetaData().getKeyValueMetaData(), mergedMeta.getFileMetaData().getKeyValueMetaData());
assertEquals(commonMeta1.getFileMetaData().getKeyValueMetaData(), mergedCommonMeta.getFileMetaData().getKeyValueMetaData());
}
开发者ID:apache,项目名称:parquet-mr,代码行数:41,代码来源:TestMergeMetadataFiles.java
注:本文中的org.apache.parquet.format.converter.ParquetMetadataConverter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论