本文整理汇总了Java中org.apache.parquet.schema.MessageTypeParser类的典型用法代码示例。如果您正苦于以下问题:Java MessageTypeParser类的具体用法?Java MessageTypeParser怎么用?Java MessageTypeParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
MessageTypeParser类属于org.apache.parquet.schema包,在下文中一共展示了MessageTypeParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: testToMessageType
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testToMessageType() throws Exception {
String expected =
"message ParquetSchema {\n" +
" optional group persons (LIST) = 1 {\n" +
" repeated group persons_tuple {\n" +
" required group name = 1 {\n" +
" optional binary first_name (UTF8) = 1;\n" +
" optional binary last_name (UTF8) = 2;\n" +
" }\n" +
" optional int32 id = 2;\n" +
" optional binary email (UTF8) = 3;\n" +
" optional group phones (LIST) = 4 {\n" +
" repeated group phones_tuple {\n" +
" optional binary number (UTF8) = 1;\n" +
" optional binary type (ENUM) = 2;\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
final MessageType converted = schemaConverter.convert(AddressBook.class);
assertEquals(MessageTypeParser.parseMessageType(expected), converted);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:26,代码来源:TestThriftSchemaConverter.java
示例2: testTimeAnnotations
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testTimeAnnotations() {
String message = "message TimeMessage {" +
" required int32 date (DATE);" +
" required int32 time (TIME_MILLIS);" +
" required int64 timestamp (TIMESTAMP_MILLIS);" +
" required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(INT32).as(DATE).named("date")
.required(INT32).as(TIME_MILLIS).named("time")
.required(INT64).as(TIMESTAMP_MILLIS).named("timestamp")
.required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval")
.named("TimeMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:TestParquetParser.java
示例3: testEmbeddedAnnotations
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testEmbeddedAnnotations() {
String message = "message EmbeddedMessage {" +
" required binary json (JSON);" +
" required binary bson (BSON);" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(BINARY).as(JSON).named("json")
.required(BINARY).as(BSON).named("bson")
.named("EmbeddedMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:TestParquetParser.java
示例4: testMemColumn
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumn() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
ColumnDescriptor path = schema.getColumnDescription(new String[] {"foo", "bar"});
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(42l, 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestMemColumn.java
示例5: testMemColumnBinary
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumnBinary() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
String[] col = new String[]{"foo", "bar"};
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(Binary.fromString("42"), 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TestMemColumn.java
示例6: testMemColumnSeveralPages
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumnSeveralPages() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
String[] col = new String[]{"foo", "bar"};
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
for (int i = 0; i < 2000; i++) {
columnWriter.write(42l, 0, 0);
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:24,代码来源:TestMemColumn.java
示例7: testOneOfEach
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testOneOfEach() {
MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
Group g1 = gf.newGroup()
.append("a", 1l)
.append("b", 2)
.append("c", 3.0f)
.append("d", 4.0d)
.append("e", true)
.append("f", Binary.fromString("6"))
.append("g", new NanoTime(1234, System.currentTimeMillis() * 1000))
.append("h", Binary.fromString("abc"));
testSchema(oneOfEachSchema, Arrays.asList(g1));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestColumnIO.java
示例8: runMapReduceJob
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
jobConf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(jobConf, inputPath);
jobConf.setNumReduceTasks(0);
jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);
jobConf.setMapperClass(DeprecatedMapper.class);
mapRedJob = JobClient.runJob(jobConf);
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:21,代码来源:DeprecatedOutputFormatTest.java
示例9: run
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
public void run() {
Configuration conf = new Configuration();
int blockSize = 1 * 1024;
int pageSize = 1 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
Path basePath = new Path("file:///Users/Jelly/Developer/test");
MessageType schema = MessageTypeParser.parseMessageType("message test {" +
"required binary id; " +
"required binary content; " +
"required int64 int64_field; " +
"}");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema, conf);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
try {
ParquetWriter<Group> parquetWriter = new ParquetWriter(
basePath,
writeSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize, pageSize, dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
for (int i = 0; i < 50000; i++) {
parquetWriter.write(groupFactory.newGroup()
.append("id", "10")
.append("content", "test" + i)
.append("int64_field", Long.valueOf(i)));
}
parquetWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java
示例10: writeParquetFile
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static void writeParquetFile(String rawSchema, File outputParquetFile, List<EventRecord> data)
throws IOException
{
Path path = new Path(outputParquetFile.toURI());
MessageType schema = MessageTypeParser.parseMessageType(rawSchema);
ParquetPOJOWriter writer = new ParquetPOJOWriter(path, schema, EventRecord.class, true);
for (EventRecord eventRecord : data) {
writer.write(eventRecord);
}
writer.close();
}
开发者ID:apache,项目名称:apex-malhar,代码行数:12,代码来源:ParquetFilePOJOReaderTest.java
示例11: testTajoToParquetConversion
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testTajoToParquetConversion(
Schema tajoSchema, String schemaString) throws Exception {
TajoSchemaConverter converter = new TajoSchemaConverter();
MessageType schema = converter.convert(tajoSchema);
MessageType expected = MessageTypeParser.parseMessageType(schemaString);
assertEquals("converting " + schema + " to " + schemaString,
expected.toString(), schema.toString());
}
开发者ID:apache,项目名称:tajo,代码行数:9,代码来源:TestSchemaConverter.java
示例12: testParquetToTajoConversion
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testParquetToTajoConversion(
Schema tajoSchema, String schemaString) throws Exception {
TajoSchemaConverter converter = new TajoSchemaConverter();
Schema schema = converter.convert(
MessageTypeParser.parseMessageType(schemaString));
assertEquals("converting " + schemaString + " to " + tajoSchema,
tajoSchema.toString(), schema.toString());
}
开发者ID:apache,项目名称:tajo,代码行数:9,代码来源:TestSchemaConverter.java
示例13: testIntAnnotations
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testIntAnnotations() {
String message = "message IntMessage {" +
" required int32 i8 (INT_8);" +
" required int32 i16 (INT_16);" +
" required int32 i32 (INT_32);" +
" required int64 i64 (INT_64);" +
" required int32 u8 (UINT_8);" +
" required int32 u16 (UINT_16);" +
" required int32 u32 (UINT_32);" +
" required int64 u64 (UINT_64);" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(INT32).as(INT_8).named("i8")
.required(INT32).as(INT_16).named("i16")
.required(INT32).as(INT_32).named("i32")
.required(INT64).as(INT_64).named("i64")
.required(INT32).as(UINT_8).named("u8")
.required(INT32).as(UINT_16).named("u16")
.required(INT32).as(UINT_32).named("u32")
.required(INT64).as(UINT_64).named("u64")
.named("IntMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:TestParquetParser.java
示例14: test
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void test() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
ColumnDescriptor col = schema.getColumns().get(0);
MemPageWriter pageWriter = new MemPageWriter();
ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
ParquetProperties.builder()
.withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
.withPageSize(2048).build());
for (int i = 0; i < rows; i++) {
columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
if ((i + 1) % 1000 == 0) {
columnWriterV2.writePage(i);
}
}
columnWriterV2.writePage(rows);
columnWriterV2.finalizeColumnChunk();
List<DataPage> pages = pageWriter.getPages();
int valueCount = 0;
int rowCount = 0;
for (DataPage dataPage : pages) {
valueCount += dataPage.getValueCount();
rowCount += ((DataPageV2)dataPage).getRowCount();
}
assertEquals(rows, rowCount);
assertEquals(rows, valueCount);
MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
ValidatingConverter converter = new ValidatingConverter();
ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
for (int i = 0; i < rows; i++) {
assertEquals(0, columnReader.getCurrentRepetitionLevel());
assertEquals(0, columnReader.getCurrentDefinitionLevel());
columnReader.writeCurrentValueToConverter();
columnReader.consume();
}
assertEquals(rows, converter.count);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:38,代码来源:TestColumnReaderImpl.java
示例15: testOptional
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testOptional() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
ColumnDescriptor col = schema.getColumns().get(0);
MemPageWriter pageWriter = new MemPageWriter();
ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
ParquetProperties.builder()
.withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
.withPageSize(2048).build());
for (int i = 0; i < rows; i++) {
columnWriterV2.writeNull(0, 0);
if ((i + 1) % 1000 == 0) {
columnWriterV2.writePage(i);
}
}
columnWriterV2.writePage(rows);
columnWriterV2.finalizeColumnChunk();
List<DataPage> pages = pageWriter.getPages();
int valueCount = 0;
int rowCount = 0;
for (DataPage dataPage : pages) {
valueCount += dataPage.getValueCount();
rowCount += ((DataPageV2)dataPage).getRowCount();
}
assertEquals(rows, rowCount);
assertEquals(rows, valueCount);
MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
ValidatingConverter converter = new ValidatingConverter();
ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
for (int i = 0; i < rows; i++) {
assertEquals(0, columnReader.getCurrentRepetitionLevel());
assertEquals(0, columnReader.getCurrentDefinitionLevel());
columnReader.consume();
}
assertEquals(0, converter.count);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:37,代码来源:TestColumnReaderImpl.java
示例16: testRequiredOfRequired
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testRequiredOfRequired() {
MessageType reqreqSchema = MessageTypeParser.parseMessageType(
"message Document {\n"
+ " required group foo {\n"
+ " required int64 bar;\n"
+ " }\n"
+ "}\n");
GroupFactory gf = new SimpleGroupFactory(reqreqSchema);
Group g1 = gf.newGroup();
g1.addGroup("foo").append("bar", 2l);
testSchema(reqreqSchema, Arrays.asList(g1));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:16,代码来源:TestColumnIO.java
示例17: getParquetInputSplit
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:31,代码来源:ParquetInputFormat.java
示例18: end
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : blocks) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
return length;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:15,代码来源:ParquetInputSplit.java
示例19: testInitWithoutSpecifyingRequestSchema
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
GroupReadSupport s = new GroupReadSupport();
Configuration configuration = new Configuration();
Map<String, String> keyValueMetaData = new HashMap<String, String>();
MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
assertEquals(context.getRequestedSchema(), fileSchema);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:GroupReadSupportTest.java
示例20: setUp
import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
blocks = new ArrayList<BlockMetaData>();
for (int i = 0; i < 10; i++) {
blocks.add(newBlock(i * 10, 10));
}
schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
}
开发者ID:apache,项目名称:parquet-mr,代码行数:10,代码来源:TestInputFormat.java
注:本文中的org.apache.parquet.schema.MessageTypeParser类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论