• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java MessageTypeParser类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.apache.parquet.schema.MessageTypeParser的典型用法代码示例。如果您正苦于以下问题:Java MessageTypeParser类的具体用法?Java MessageTypeParser怎么用?Java MessageTypeParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



MessageTypeParser类属于org.apache.parquet.schema包,在下文中一共展示了MessageTypeParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: testToMessageType

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testToMessageType() throws Exception {
  String expected =
          "message ParquetSchema {\n" +
                  "  optional group persons (LIST) = 1 {\n" +
                  "    repeated group persons_tuple {\n" +
                  "      required group name = 1 {\n" +
                  "        optional binary first_name (UTF8) = 1;\n" +
                  "        optional binary last_name (UTF8) = 2;\n" +
                  "      }\n" +
                  "      optional int32 id = 2;\n" +
                  "      optional binary email (UTF8) = 3;\n" +
                  "      optional group phones (LIST) = 4 {\n" +
                  "        repeated group phones_tuple {\n" +
                  "          optional binary number (UTF8) = 1;\n" +
                  "          optional binary type (ENUM) = 2;\n" +
                  "        }\n" +
                  "      }\n" +
                  "    }\n" +
                  "  }\n" +
                  "}";
  ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
  final MessageType converted = schemaConverter.convert(AddressBook.class);
  assertEquals(MessageTypeParser.parseMessageType(expected), converted);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:26,代码来源:TestThriftSchemaConverter.java


示例2: testTimeAnnotations

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testTimeAnnotations() {
  String message = "message TimeMessage {" +
      "  required int32 date (DATE);" +
      "  required int32 time (TIME_MILLIS);" +
      "  required int64 timestamp (TIMESTAMP_MILLIS);" +
      "  required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(INT32).as(DATE).named("date")
      .required(INT32).as(TIME_MILLIS).named("time")
      .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp")
      .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval")
      .named("TimeMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:TestParquetParser.java


示例3: testEmbeddedAnnotations

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testEmbeddedAnnotations() {
  String message = "message EmbeddedMessage {" +
      "  required binary json (JSON);" +
      "  required binary bson (BSON);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(BINARY).as(JSON).named("json")
      .required(BINARY).as(BSON).named("bson")
      .named("EmbeddedMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:TestParquetParser.java


示例4: testMemColumn

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumn() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
  ColumnDescriptor path = schema.getColumnDescription(new String[] {"foo", "bar"});
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  columnWriter.write(42l, 0, 0);
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getLong(), 42);
    columnReader.consume();
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestMemColumn.java


示例5: testMemColumnBinary

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumnBinary() throws Exception {
  MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
  String[] col = new String[]{"foo", "bar"};
  MemPageStore memPageStore = new MemPageStore(10);

  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnDescriptor path1 = mt.getColumnDescription(col);
  ColumnDescriptor path = path1;

  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  columnWriter.write(Binary.fromString("42"), 0, 0);
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
    columnReader.consume();
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TestMemColumn.java


示例6: testMemColumnSeveralPages

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testMemColumnSeveralPages() throws Exception {
  MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
  String[] col = new String[]{"foo", "bar"};
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnDescriptor path1 = mt.getColumnDescription(col);
  ColumnDescriptor path = path1;

  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  for (int i = 0; i < 2000; i++) {
    columnWriter.write(42l, 0, 0);
  }
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getLong(), 42);
    columnReader.consume();
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:24,代码来源:TestMemColumn.java


示例7: testOneOfEach

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testOneOfEach() {
  MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
  GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
  Group g1 = gf.newGroup()
      .append("a", 1l)
      .append("b", 2)
      .append("c", 3.0f)
      .append("d", 4.0d)
      .append("e", true)
      .append("f", Binary.fromString("6"))
      .append("g", new NanoTime(1234, System.currentTimeMillis() * 1000))
      .append("h", Binary.fromString("abc"));

  testSchema(oneOfEachSchema, Arrays.asList(g1));
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestColumnIO.java


示例8: runMapReduceJob

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
    fileSystem.delete(parquetPath, true);
    fileSystem.delete(outputPath, true);
    {
      jobConf.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(jobConf, inputPath);
      jobConf.setNumReduceTasks(0);

      jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
      DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
      DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
      DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
      GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);

      jobConf.setMapperClass(DeprecatedMapper.class);
      mapRedJob = JobClient.runJob(jobConf);
    }
  }
 
开发者ID:apache,项目名称:parquet-mr,代码行数:21,代码来源:DeprecatedOutputFormatTest.java


示例9: run

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
public void run() {
    Configuration conf = new Configuration();
    int blockSize = 1 * 1024;
    int pageSize = 1 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;
    Path basePath = new Path("file:///Users/Jelly/Developer/test");
    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
            "required binary id; " +
            "required binary content; " +
            "required int64 int64_field; " +
            "}");
    GroupWriteSupport writeSupport = new GroupWriteSupport();
    writeSupport.setSchema(schema, conf);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);

    try {
        ParquetWriter<Group> parquetWriter = new ParquetWriter(
                basePath,
                writeSupport,
                CompressionCodecName.UNCOMPRESSED,
                blockSize, pageSize, dictionaryPageSize,
                enableDictionary,
                validating,
                ParquetProperties.WriterVersion.PARQUET_2_0,
                conf);
        for (int i = 0; i < 50000; i++) {
            parquetWriter.write(groupFactory.newGroup()
                    .append("id", "10")
                    .append("content", "test" + i)
                    .append("int64_field", Long.valueOf(i)));
        }
        parquetWriter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java


示例10: writeParquetFile

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static void writeParquetFile(String rawSchema, File outputParquetFile, List<EventRecord> data)
  throws IOException
{
  Path path = new Path(outputParquetFile.toURI());
  MessageType schema = MessageTypeParser.parseMessageType(rawSchema);
  ParquetPOJOWriter writer = new ParquetPOJOWriter(path, schema, EventRecord.class, true);
  for (EventRecord eventRecord : data) {
    writer.write(eventRecord);
  }
  writer.close();
}
 
开发者ID:apache,项目名称:apex-malhar,代码行数:12,代码来源:ParquetFilePOJOReaderTest.java


示例11: testTajoToParquetConversion

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testTajoToParquetConversion(
    Schema tajoSchema, String schemaString) throws Exception {
  TajoSchemaConverter converter = new TajoSchemaConverter();
  MessageType schema = converter.convert(tajoSchema);
  MessageType expected = MessageTypeParser.parseMessageType(schemaString);
  assertEquals("converting " + schema + " to " + schemaString,
               expected.toString(), schema.toString());
}
 
开发者ID:apache,项目名称:tajo,代码行数:9,代码来源:TestSchemaConverter.java


示例12: testParquetToTajoConversion

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testParquetToTajoConversion(
    Schema tajoSchema, String schemaString) throws Exception {
  TajoSchemaConverter converter = new TajoSchemaConverter();
  Schema schema = converter.convert(
      MessageTypeParser.parseMessageType(schemaString));
  assertEquals("converting " + schemaString + " to " + tajoSchema,
               tajoSchema.toString(), schema.toString());
}
 
开发者ID:apache,项目名称:tajo,代码行数:9,代码来源:TestSchemaConverter.java


示例13: testIntAnnotations

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testIntAnnotations() {
  String message = "message IntMessage {" +
      "  required int32 i8 (INT_8);" +
      "  required int32 i16 (INT_16);" +
      "  required int32 i32 (INT_32);" +
      "  required int64 i64 (INT_64);" +
      "  required int32 u8 (UINT_8);" +
      "  required int32 u16 (UINT_16);" +
      "  required int32 u32 (UINT_32);" +
      "  required int64 u64 (UINT_64);" +
      "}\n";

  MessageType parsed = MessageTypeParser.parseMessageType(message);
  MessageType expected = Types.buildMessage()
      .required(INT32).as(INT_8).named("i8")
      .required(INT32).as(INT_16).named("i16")
      .required(INT32).as(INT_32).named("i32")
      .required(INT64).as(INT_64).named("i64")
      .required(INT32).as(UINT_8).named("u8")
      .required(INT32).as(UINT_16).named("u16")
      .required(INT32).as(UINT_32).named("u32")
      .required(INT64).as(UINT_64).named("u64")
      .named("IntMessage");

  assertEquals(expected, parsed);
  MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
  assertEquals(expected, reparsed);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:TestParquetParser.java


示例14: test

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void test() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
  ColumnDescriptor col = schema.getColumns().get(0);
  MemPageWriter pageWriter = new MemPageWriter();
  ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
      ParquetProperties.builder()
          .withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
          .withPageSize(2048).build());
  for (int i = 0; i < rows; i++) {
    columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
    if ((i + 1) % 1000 == 0) {
      columnWriterV2.writePage(i);
    }
  }
  columnWriterV2.writePage(rows);
  columnWriterV2.finalizeColumnChunk();
  List<DataPage> pages = pageWriter.getPages();
  int valueCount = 0;
  int rowCount = 0;
  for (DataPage dataPage : pages) {
    valueCount += dataPage.getValueCount();
    rowCount += ((DataPageV2)dataPage).getRowCount();
  }
  assertEquals(rows, rowCount);
  assertEquals(rows, valueCount);
  MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
  ValidatingConverter converter = new ValidatingConverter();
  ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
  for (int i = 0; i < rows; i++) {
    assertEquals(0, columnReader.getCurrentRepetitionLevel());
    assertEquals(0, columnReader.getCurrentDefinitionLevel());
    columnReader.writeCurrentValueToConverter();
    columnReader.consume();
  }
  assertEquals(rows, converter.count);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:38,代码来源:TestColumnReaderImpl.java


示例15: testOptional

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testOptional() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
  ColumnDescriptor col = schema.getColumns().get(0);
  MemPageWriter pageWriter = new MemPageWriter();
  ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
      ParquetProperties.builder()
          .withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
          .withPageSize(2048).build());
  for (int i = 0; i < rows; i++) {
    columnWriterV2.writeNull(0, 0);
    if ((i + 1) % 1000 == 0) {
      columnWriterV2.writePage(i);
    }
  }
  columnWriterV2.writePage(rows);
  columnWriterV2.finalizeColumnChunk();
  List<DataPage> pages = pageWriter.getPages();
  int valueCount = 0;
  int rowCount = 0;
  for (DataPage dataPage : pages) {
    valueCount += dataPage.getValueCount();
    rowCount += ((DataPageV2)dataPage).getRowCount();
  }
  assertEquals(rows, rowCount);
  assertEquals(rows, valueCount);
  MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
  ValidatingConverter converter = new ValidatingConverter();
  ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
  for (int i = 0; i < rows; i++) {
    assertEquals(0, columnReader.getCurrentRepetitionLevel());
    assertEquals(0, columnReader.getCurrentDefinitionLevel());
    columnReader.consume();
  }
  assertEquals(0, converter.count);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:37,代码来源:TestColumnReaderImpl.java


示例16: testRequiredOfRequired

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testRequiredOfRequired() {
  MessageType reqreqSchema = MessageTypeParser.parseMessageType(
        "message Document {\n"
      + "  required group foo {\n"
      + "    required int64 bar;\n"
      + "  }\n"
      + "}\n");

  GroupFactory gf = new SimpleGroupFactory(reqreqSchema);
  Group g1 = gf.newGroup();
  g1.addGroup("foo").append("bar", 2l);

  testSchema(reqreqSchema, Arrays.asList(g1));
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:16,代码来源:TestColumnIO.java


示例17: getParquetInputSplit

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
  MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
  long length = 0;

  for (BlockMetaData block : this.getRowGroups()) {
    List<ColumnChunkMetaData> columns = block.getColumns();
    for (ColumnChunkMetaData column : columns) {
      if (requested.containsPath(column.getPath().toArray())) {
        length += column.getTotalSize();
      }
    }
  }

  BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
  long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();

  long[] rowGroupOffsets = new long[this.getRowGroupCount()];
  for (int i = 0; i < rowGroupOffsets.length; i++) {
    rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
  }

  return new ParquetInputSplit(
          fileStatus.getPath(),
          hdfsBlock.getOffset(),
          end,
          length,
          hdfsBlock.getHosts(),
          rowGroupOffsets
  );
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:31,代码来源:ParquetInputFormat.java


示例18: end

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
  MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
  long length = 0;

  for (BlockMetaData block : blocks) {
    List<ColumnChunkMetaData> columns = block.getColumns();
    for (ColumnChunkMetaData column : columns) {
      if (requested.containsPath(column.getPath().toArray())) {
        length += column.getTotalSize();
      }
    }
  }
  return length;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:15,代码来源:ParquetInputSplit.java


示例19: testInitWithoutSpecifyingRequestSchema

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
  GroupReadSupport s = new GroupReadSupport();
  Configuration configuration = new Configuration();
  Map<String, String> keyValueMetaData = new HashMap<String, String>();
  MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);

  ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
  assertEquals(context.getRequestedSchema(), fileSchema);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:GroupReadSupportTest.java


示例20: setUp

import org.apache.parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
  blocks = new ArrayList<BlockMetaData>();
  for (int i = 0; i < 10; i++) {
    blocks.add(newBlock(i * 10, 10));
  }
  schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
  fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:10,代码来源:TestInputFormat.java



注:本文中的org.apache.parquet.schema.MessageTypeParser类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java RequestInfoProto类代码示例发布时间:2022-05-22
下一篇:
Java CBCBlockCipherMac类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap