本文整理汇总了Java中org.apache.parquet.schema.OriginalType类的典型用法代码示例。如果您正苦于以下问题:Java OriginalType类的具体用法?Java OriginalType怎么用?Java OriginalType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OriginalType类属于org.apache.parquet.schema包,在下文中一共展示了OriginalType类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: convertColumnDescriptor
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
/**
* Converts {@link ColumnDescriptor} to {@link SchemaPath} and converts any parquet LOGICAL LIST to something
* the execution engine can understand (removes the extra 'list' and 'element' fields from the name)
*/
private static SchemaPath convertColumnDescriptor(final MessageType schema, final ColumnDescriptor columnDescriptor) {
List<String> path = Lists.newArrayList(columnDescriptor.getPath());
// go through the path and find all logical lists
int index = 0;
Type type = schema;
while (!type.isPrimitive()) { // don't bother checking the last element in the path as it is a primitive type
type = type.asGroupType().getType(path.get(index));
if (type.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(type.asGroupType())) {
// remove 'list'
type = type.asGroupType().getType(path.get(index+1));
path.remove(index+1);
// remove 'element'
type = type.asGroupType().getType(path.get(index+1));
path.remove(index+1);
}
index++;
}
String[] schemaColDesc = new String[path.size()];
path.toArray(schemaColDesc);
return SchemaPath.getCompoundPath(schemaColDesc);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:ParquetRowiseReader.java
示例2: newFieldConverter
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private Converter newFieldConverter(Type parquetType, ParentContainerUpdater updater)
{
if (parquetType.isRepetition(Type.Repetition.REPEATED) && parquetType.getOriginalType() != OriginalType.LIST) {
// A repeated field that is neither contained by a `LIST`- or `MAP`-annotated group nor
// annotated by `LIST` or `MAP` should be interpreted as a required list of required
// elements where the element type is the type of the field.
if (parquetType.isPrimitive()) {
return new RepeatedPrimitiveConverter(parquetType, updater);
}
else {
return new RepeatedGroupConverter(parquetType, updater);
}
}
else {
return newConverter(parquetType, updater);
}
}
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:18,代码来源:ParquetValueConverter.java
示例3: convertLogicalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private OriginalType convertLogicalType(String logicalType) {
if (logicalType == null) {
return null;
} else if (LOGICAL_TYPE_DECIMAL.equals(logicalType)) {
return OriginalType.DECIMAL;
} else if (LOGICAL_TYPE_DATE.equals(logicalType)) {
return OriginalType.DATE;
} else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalType)) {
return OriginalType.TIME_MILLIS;
// } else if (LOGICAL_TYPE_TIME_MICROS.equals(logicalType)) {
// return OriginalType.TIME_MICROS;
} else if (LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType)) {
return OriginalType.TIMESTAMP_MILLIS;
// } else if (LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType)) {
// return OriginalType.TIMESTAMP_MICROS;
}
return null;
}
开发者ID:streamsets,项目名称:datacollector,代码行数:19,代码来源:AvroSchemaConverterLogicalTypesPre19.java
示例4: convertOriginalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private Map<String, String> convertOriginalType(OriginalType annotation, DecimalMetadata meta) {
if (annotation == null) {
return null;
}
switch (annotation) {
case DECIMAL:
return ImmutableMap.of(
LOGICAL_TYPE, LOGICAL_TYPE_DECIMAL,
LOGICAL_PROP_PRECISION, Integer.toString(meta.getPrecision()),
LOGICAL_PROP_SCALE, Integer.toString(meta.getScale())
);
case DATE:
return ImmutableMap.of(LOGICAL_TYPE, LOGICAL_TYPE_DATE);
case TIME_MILLIS:
return ImmutableMap.of(LOGICAL_TYPE, LOGICAL_TYPE_TIME_MILLIS);
// case TIME_MICROS:
// return ImmutableMap.of(LOGICAL_TYPE, LOGICAL_TYPE_TIME_MICROS);
case TIMESTAMP_MILLIS:
return ImmutableMap.of(LOGICAL_TYPE, LOGICAL_TYPE_TIMESTAMP_MILLIS);
// case TIMESTAMP_MICROS:
// return ImmutableMap.of(LOGICAL_TYPE, LOGICAL_TYPE_TIMESTAMP_MICROS);
default:
return null;
}
}
开发者ID:streamsets,项目名称:datacollector,代码行数:26,代码来源:AvroSchemaConverterLogicalTypesPre19.java
示例5: newConverter
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private PrimitiveConverter newConverter(int colIdx, byte vecType, PrimitiveType parquetType) {
switch (vecType) {
case Vec.T_BAD:
case Vec.T_CAT:
case Vec.T_STR:
case Vec.T_UUID:
case Vec.T_TIME:
if (OriginalType.TIMESTAMP_MILLIS.equals(parquetType.getOriginalType()) || parquetType.getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) {
return new TimestampConverter(colIdx, _writer);
} else {
boolean dictSupport = parquetType.getOriginalType() == OriginalType.UTF8 || parquetType.getOriginalType() == OriginalType.ENUM;
return new StringConverter(_writer, colIdx, dictSupport);
}
case Vec.T_NUM:
return new NumberConverter(colIdx, _writer);
default:
throw new UnsupportedOperationException("Unsupported type " + vecType);
}
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:20,代码来源:ChunkConverter.java
示例6: showDetails
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
String name = Strings.repeat(".", depth) + type.getName();
OriginalType otype = type.getOriginalType();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (otype != null) out.format(" O:%s", otype);
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[cpath.size()]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:MetadataUtils.java
示例7: testIDs
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testIDs() {
String message =
"message Message {\n" +
" required binary string (UTF8) = 6;\n" +
" required int32 i=1;\n" +
" required binary s2= 3;\n" +
" required binary s3 =4;\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(BINARY).as(OriginalType.UTF8).id(6).named("string")
.required(INT32).id(1).named("i")
.required(BINARY).id(3).named("s2")
.required(BINARY).id(4).named("s3")
.named("Message");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TestParquetParser.java
示例8: testIgnoreStatsWithSignedSortOrder
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testIgnoreStatsWithSignedSortOrder() {
ParquetMetadataConverter converter = new ParquetMetadataConverter();
BinaryStatistics stats = new BinaryStatistics();
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("z"));
stats.incrementNumNulls();
PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
.as(OriginalType.UTF8).named("b");
Statistics convertedStats = converter.fromParquetStatistics(
Version.FULL_VERSION,
StatsHelper.V1.toParquetStatistics(stats),
binaryType);
Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty());
}
开发者ID:apache,项目名称:parquet-mr,代码行数:20,代码来源:TestParquetMetadataConverter.java
示例9: testStillUseStatsWithSignedSortOrderIfSingleValue
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
ParquetMetadataConverter converter = new ParquetMetadataConverter();
BinaryStatistics stats = new BinaryStatistics();
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
Statistics convertedStats = converter.fromParquetStatistics(
Version.FULL_VERSION,
ParquetMetadataConverter.toParquetStatistics(stats),
binaryType);
Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestParquetMetadataConverter.java
示例10: testV2OnlyStats
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testV2OnlyStats() {
testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
0x7F,
0x80);
testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
0x7FFF,
0x8000);
testV2OnlyStats(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
0x7FFFFFFF,
0x80000000);
testV2OnlyStats(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
0x7FFFFFFFFFFFFFFFL,
0x8000000000000000L);
testV2OnlyStats(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
new BigInteger("-765875"),
new BigInteger("876856"));
testV2OnlyStats(
Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
.named(""),
new BigInteger("-6769643"),
new BigInteger("9864675"));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:24,代码来源:TestParquetMetadataConverter.java
示例11: testV2StatsEqualMinMax
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testV2StatsEqualMinMax() {
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_8).named(""),
93,
93);
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_16).named(""),
-5892,
-5892);
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT32).as(OriginalType.UINT_32).named(""),
234998934,
234998934);
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT64).as(OriginalType.UINT_64).named(""),
-2389943895984985L,
-2389943895984985L);
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.DECIMAL).precision(6).named(""),
new BigInteger("823749"),
new BigInteger("823749"));
testV2StatsEqualMinMax(
Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(14).as(OriginalType.DECIMAL).precision(7)
.named(""),
new BigInteger("-8752832"),
new BigInteger("-8752832"));
testV2StatsEqualMinMax(Types.optional(PrimitiveTypeName.INT96).named(""),
new BigInteger("81032984"),
new BigInteger("81032984"));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:27,代码来源:TestParquetMetadataConverter.java
示例12: testMapOriginalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testMapOriginalType() throws Exception {
final String hiveColumnTypes = "map<string,string>";
final String hiveColumnNames = "mapCol";
final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
// this messageType only has one optional field, whose name is mapCol, original Type is MAP
assertEquals(1, messageTypeFound.getFieldCount());
org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
assertEquals("mapCol",topLevel.getName());
assertEquals(OriginalType.MAP, topLevel.getOriginalType());
assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
assertEquals(1, topLevel.asGroupType().getFieldCount());
org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
//there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
assertEquals("map", secondLevel.getName());
assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
}
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:TestHiveSchemaConverter.java
示例13: testListsOfPrimitive
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
@Test
public void testListsOfPrimitive() throws Exception {
for (Type.Repetition repetition : Type.Repetition.values()) {
for (Type.Repetition valueRepetition : Type.Repetition.values()) {
for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values()) {
if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName, valueRepetition);
if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
value.length(1);
GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
pigSchemaConverter.convertField(type); // no exceptions, please
}
}
}
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestPigSchemaConverter.java
示例14: convertLogicalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private OriginalType convertLogicalType(LogicalType logicalType) {
if (logicalType == null) {
return null;
} else if (logicalType instanceof LogicalTypes.Decimal) {
return OriginalType.DECIMAL;
} else if (logicalType instanceof LogicalTypes.Date) {
return OriginalType.DATE;
} else if (logicalType instanceof LogicalTypes.TimeMillis) {
return OriginalType.TIME_MILLIS;
} else if (logicalType instanceof LogicalTypes.TimeMicros) {
return OriginalType.TIME_MICROS;
} else if (logicalType instanceof LogicalTypes.TimestampMillis) {
return OriginalType.TIMESTAMP_MILLIS;
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
return OriginalType.TIMESTAMP_MICROS;
}
return null;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:AvroSchemaConverter.java
示例15: convertOriginalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private LogicalType convertOriginalType(OriginalType annotation, DecimalMetadata meta) {
if (annotation == null) {
return null;
}
switch (annotation) {
case DECIMAL:
return LogicalTypes.decimal(meta.getPrecision(), meta.getScale());
case DATE:
return LogicalTypes.date();
case TIME_MILLIS:
return LogicalTypes.timeMillis();
case TIME_MICROS:
return LogicalTypes.timeMicros();
case TIMESTAMP_MILLIS:
return LogicalTypes.timestampMillis();
case TIMESTAMP_MICROS:
return LogicalTypes.timestampMicros();
}
return null;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:21,代码来源:AvroSchemaConverter.java
示例16: groupConverterFromArrowSchema
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
Converter groupConverterFromArrowSchema(String nameForChild, String fieldName, GroupType groupType, Collection<SchemaPath> c) {
final Field arrowField = Schema.findField(arrowSchema, fieldName);
final ArrowTypeID arrowTypeType = arrowField.getType().getTypeID();
final List<Field> arrowChildren = arrowField.getChildren();
if (arrowTypeType == ArrowTypeID.Union) {
// if it's a union we will add the children directly to the parent
return new UnionGroupConverter(mutator, getWriterProvider(), groupType, c, options, arrowChildren, nameForChild, containsCorruptedDates, readInt96AsTimeStamp);
} else if (arrowTypeType == ArrowTypeID.List) {
// make sure the parquet schema matches the arrow schema and delegate handling the logical list to defaultGroupConverter()
Preconditions.checkState(groupType.getOriginalType() == OriginalType.LIST, "parquet schema doesn't match the arrow schema for LIST " + nameForChild);
}
return defaultGroupConverter(mutator, groupType, nameForChild, c, arrowChildren);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:15,代码来源:ParquetGroupConverter.java
示例17: defaultGroupConverter
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
Converter defaultGroupConverter(OutputMutator mutator, GroupType groupType, final String nameForChild,
Collection<SchemaPath> c, List<Field> arrowSchema) {
if (groupType.getOriginalType() == OriginalType.LIST && LogicalListL1Converter.isSupportedSchema(groupType)) {
return new LogicalListL1Converter(
nameForChild,
mutator,
getWriterProvider(),
groupType,
c,
options,
arrowSchema,
containsCorruptedDates,
readInt96AsTimeStamp);
}
final MapWriter map;
if (groupType.isRepetition(REPEATED)) {
if (arrowSchema != null) {
//TODO assert this should never occur at this level
// only parquet writer that writes arrowSchema doesn't write repeated fields except
// as part of a LOGICAL LIST, thus this scenario (repeated + arrow schema present) can
// only happen in LogicalList converter
arrowSchema = handleRepeatedField(arrowSchema, groupType);
}
map = list(nameForChild).map();
} else {
map = getWriterProvider().map(nameForChild);
}
return new StructGroupConverter(mutator, map, groupType, c, options, arrowSchema, containsCorruptedDates, readInt96AsTimeStamp);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:32,代码来源:ParquetGroupConverter.java
示例18: getOriginalType
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private OriginalType getOriginalType(Type type, String[] path, int depth) {
if (type.isPrimitive()) {
return type.getOriginalType();
}
Type t = ((GroupType) type).getType(path[depth]);
return getOriginalType(t, path, depth + 1);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:8,代码来源:Metadata.java
示例19: getParquetFileMetadata
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
private ParquetFileMetadata getParquetFileMetadata(FileStatus file) throws IOException {
final ParquetMetadata metadata;
metadata = SingletonParquetFooterCache.readFooter(fs, file, ParquetMetadataConverter.NO_FILTER);
MessageType schema = metadata.getFileMetaData().getSchema();
Map<SchemaPath, OriginalType> originalTypeMap = Maps.newHashMap();
schema.getPaths();
for (String[] path : schema.getPaths()) {
originalTypeMap.put(SchemaPath.getCompoundPath(path), getOriginalType(schema, path, 0));
}
List<RowGroupMetadata> rowGroupMetadataList = Lists.newArrayList();
ArrayList<SchemaPath> ALL_COLS = new ArrayList<>();
ALL_COLS.add(AbstractRecordReader.STAR_COLUMN);
boolean autoCorrectCorruptDates = formatConfig.autoCorrectCorruptDates;
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(metadata, ALL_COLS, autoCorrectCorruptDates);
if(logger.isDebugEnabled()){
logger.debug(containsCorruptDates.toString());
}
final Map<ColumnTypeMetadata.Key, ColumnTypeMetadata> columnTypeInfo = Maps.newHashMap();
for (BlockMetaData rowGroup : metadata.getBlocks()) {
List<ColumnMetadata> columnMetadataList = Lists.newArrayList();
long length = 0;
for (ColumnChunkMetaData col : rowGroup.getColumns()) {
ColumnMetadata columnMetadata;
boolean statsAvailable = (col.getStatistics() != null && !col.getStatistics().isEmpty());
Statistics<?> stats = col.getStatistics();
String[] columnName = col.getPath().toArray();
SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName);
ColumnTypeMetadata columnTypeMetadata =
new ColumnTypeMetadata(columnName, col.getType(), originalTypeMap.get(columnSchemaName));
columnTypeInfo.put(new ColumnTypeMetadata.Key(columnTypeMetadata.name), columnTypeMetadata);
if (statsAvailable) {
// Write stats only if minVal==maxVal. Also, we then store only maxVal
Object mxValue = null;
if (stats.genericGetMax() != null && stats.genericGetMin() != null &&
stats.genericGetMax().equals(stats.genericGetMin())) {
mxValue = stats.genericGetMax();
if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION
&& columnTypeMetadata.originalType == OriginalType.DATE) {
mxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) mxValue);
}
}
columnMetadata =
new ColumnMetadata(columnTypeMetadata.name, mxValue, stats.getNumNulls());
} else {
columnMetadata = new ColumnMetadata(columnTypeMetadata.name,null, null);
}
columnMetadataList.add(columnMetadata);
length += col.getTotalSize();
}
RowGroupMetadata rowGroupMeta =
new RowGroupMetadata(rowGroup.getStartingPos(), length, rowGroup.getRowCount(),
getHostAffinity(file, rowGroup.getStartingPos(), length), columnMetadataList);
rowGroupMetadataList.add(rowGroupMeta);
}
return new ParquetFileMetadata(file, file.getLen(), rowGroupMetadataList, columnTypeInfo);
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:68,代码来源:Metadata.java
示例20: checkForPartitionColumn
import org.apache.parquet.schema.OriginalType; //导入依赖的package包/类
/**
* When reading the very first footer, any column is a potential partition column. So for the first footer, we check
* every column to see if it is single valued, and if so, add it to the list of potential partition columns. For the
* remaining footers, we will not find any new partition columns, but we may discover that what was previously a
* potential partition column now no longer qualifies, so it needs to be removed from the list.
* @return whether column is a potential partition column
*/
private boolean checkForPartitionColumn(ParquetFileMetadata fileMetadata, ColumnMetadata columnMetadata, boolean first, long rowCount) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
if (schemaPath.getAsUnescapedPath().equals(UPDATE_COLUMN)) {
return true;
}
final PrimitiveTypeName primitiveType = fileMetadata.getPrimitiveType(columnMetadata.getName());
final OriginalType originalType = fileMetadata.getOriginalType(columnMetadata.getName());
if (first) {
if (hasSingleValue(columnMetadata, rowCount)) {
columnTypeMap.put(schemaPath, getType(primitiveType, originalType));
return true;
} else {
return false;
}
} else {
if (!columnTypeMap.keySet().contains(schemaPath)) {
return false;
} else {
if (!hasSingleValue(columnMetadata, rowCount)) {
columnTypeMap.remove(schemaPath);
return false;
}
if (!getType(primitiveType, originalType).equals(columnTypeMap.get(schemaPath))) {
columnTypeMap.remove(schemaPath);
return false;
}
}
}
return true;
}
开发者ID:dremio,项目名称:dremio-oss,代码行数:39,代码来源:ParquetGroupScanUtils.java
注:本文中的org.apache.parquet.schema.OriginalType类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论