本文整理汇总了Java中org.apache.parquet.hadoop.example.GroupWriteSupport类的典型用法代码示例。如果您正苦于以下问题:Java GroupWriteSupport类的具体用法?Java GroupWriteSupport怎么用?Java GroupWriteSupport使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
GroupWriteSupport类属于org.apache.parquet.hadoop.example包,在下文中一共展示了GroupWriteSupport类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: generateEmptyWithSchema
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
writer.close();
return f;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:21,代码来源:ParseTestParquet.java
示例2: generateSparseParquetFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) { g = g.append("int32_field", i); }
if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
if (i % 10 == 0) { g = g.append("int32_field2", i); }
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:ParseTestParquet.java
示例3: generateParquetFileWithNullCharacters
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional binary cat_field (UTF8); } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
writer.write(g.append("cat_field", value));
}
} finally {
writer.close();
}
return f;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:22,代码来源:ParseTestParquet.java
示例4: runMapReduceJob
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
jobConf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(jobConf, inputPath);
jobConf.setNumReduceTasks(0);
jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);
jobConf.setMapperClass(DeprecatedMapper.class);
mapRedJob = JobClient.runJob(jobConf);
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:21,代码来源:DeprecatedOutputFormatTest.java
示例5: prepareFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@BeforeClass
public static void prepareFile() throws IOException {
cleanup();
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = ExampleParquetWriter.builder(file)
.withWriterVersion(PARQUET_1_0)
.withCompressionCodec(GZIP)
.withRowGroupSize(1024*1024)
.withPageSize(1024)
.enableDictionaryEncoding()
.withDictionaryPageSize(2*1024)
.withConf(conf)
.build();
writeData(f, writer);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:DictionaryFilterTest.java
示例6: readFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static List<Group> readFile(File f, Filter filter) throws IOException {
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(schema, conf);
ParquetReader<Group> reader =
ParquetReader.builder(new GroupReadSupport(), new Path(f.getAbsolutePath()))
.withConf(conf)
.withFilter(filter)
.build();
Group current;
List<Group> users = new ArrayList<Group>();
current = reader.read();
while (current != null) {
users.add(current);
current = reader.read();
}
return users;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:PhoneBookWriter.java
示例7: writeAndTest
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static void writeAndTest(WriteContext context) throws IOException {
// Create the configuration, and then apply the schema to our configuration.
Configuration configuration = new Configuration();
GroupWriteSupport.setSchema(context.schema, configuration);
GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
// Create the writer properties
final int blockSize = context.blockSize;
final int pageSize = context.pageSize;
final int dictionaryPageSize = pageSize;
final boolean enableDictionary = context.enableDictionary;
final boolean enableValidation = context.enableValidation;
ParquetProperties.WriterVersion writerVersion = context.version;
CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath,
groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize,
enableDictionary, enableValidation, writerVersion, configuration);
context.write(writer);
writer.close();
context.test();
context.path.delete();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:27,代码来源:TestStatistics.java
示例8: run
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public void run() {
Configuration conf = new Configuration();
int blockSize = 1 * 1024;
int pageSize = 1 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
Path basePath = new Path("file:///Users/Jelly/Developer/test");
MessageType schema = MessageTypeParser.parseMessageType("message test {" +
"required binary id; " +
"required binary content; " +
"required int64 int64_field; " +
"}");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema, conf);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
try {
ParquetWriter<Group> parquetWriter = new ParquetWriter(
basePath,
writeSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize, pageSize, dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
for (int i = 0; i < 50000; i++) {
parquetWriter.write(groupFactory.newGroup()
.append("id", "10")
.append("content", "test" + i)
.append("int64_field", Long.valueOf(i)));
}
parquetWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java
示例9: test
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void test() throws IOException
{
Type name = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, "name");
Type age = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, "age");
Type score = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, "score");
Type student = new MessageType("student", Arrays.asList(name, age, score));
MessageType schema = new MessageType("student", student);
int blockSize = 256 * 1024 * 1024;
int pageSize = 6 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
Path path = new Path("hdfs://127.0.0.1:9000/student.parquet");
groupWriteSupport.setSchema(schema, conf);
ParquetWriter parquetWriter = new ParquetWriter(
path,
groupWriteSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize,
pageSize,
dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
}
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:36,代码来源:ParquetWriterTest.java
示例10: initWriter
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static ParquetWriter<Group> initWriter(MessageType schema, String fileName) throws IOException {
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer =
new ParquetWriter<Group>(initFile(fileName),
ParquetFileWriter.Mode.OVERWRITE,
new GroupWriteSupport(),
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true, // enable dictionary encoding,
false,
ParquetProperties.WriterVersion.PARQUET_1_0, conf
);
return writer;
}
开发者ID:axbaretto,项目名称:drill,代码行数:20,代码来源:ParquetSimpleTestFileGenerator.java
示例11: generateParquetFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
writer.write(fact.newGroup()
.append("int32_field", 32 + i)
.append("int64_field", 64L + i)
.append("float_field", 1.0f + i)
.append("double_field", 2.0d + i)
.append("timestamp_field", date.getTime() + (i * 117))
);
}
} finally {
writer.close();
}
return f;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:32,代码来源:ParseTestParquet.java
示例12: testWriteReadStatisticsAllNulls
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void testWriteReadStatisticsAllNulls() throws Exception {
// this test assumes statistics will be read
Assume.assumeTrue(!shouldIgnoreStatistics(Version.FULL_VERSION, BINARY));
File testFile = temp.newFile();
testFile.delete();
writeSchema = "message example {\n" +
"required binary content (UTF8);\n" +
"}";
Path path = new Path(testFile.toURI());
MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
Configuration configuration = new Configuration();
configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
GroupWriteSupport.setSchema(schema, configuration);
ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport());
Group r1 = new SimpleGroup(schema);
writer.write(r1);
writer.close();
ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
// assert the statistics object is not empty
org.apache.parquet.column.statistics.Statistics stats = readFooter.getBlocks().get(0).getColumns().get(0).getStatistics();
assertFalse("is empty: " + stats, stats.isEmpty());
// assert the number of nulls are correct for the first block
assertEquals("nulls: " + stats, 1, stats.getNumNulls());
}
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:TestParquetFileWriter.java
示例13: writeFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
private static void writeFile(File out, Configuration conf, boolean useSchema2) throws IOException {
if (!useSchema2) {
GroupWriteSupport.setSchema(schema, conf);
} else {
GroupWriteSupport.setSchema(schema2, conf);
}
SimpleGroupFactory f = new SimpleGroupFactory(schema);
Map<String, String> extraMetaData = new HashMap<String, String>();
extraMetaData.put("schema_num", useSchema2 ? "2" : "1" );
ParquetWriter<Group> writer = ExampleParquetWriter
.builder(new Path(out.getAbsolutePath()))
.withConf(conf)
.withExtraMetaData(extraMetaData)
.build();
for (int i = 0; i < 1000; i++) {
Group g = f.newGroup()
.append("binary_field", "test" + i)
.append("int32_field", i)
.append("int64_field", (long) i)
.append("boolean_field", i % 2 == 0)
.append("float_field", (float) i)
.append("double_field", (double)i)
.append("flba_field", "foo");
if (!useSchema2) {
g = g.append("int96_field", Binary.fromConstantByteArray(new byte[12]));
}
writer.write(g);
}
writer.close();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:36,代码来源:TestMergeMetadataFiles.java
示例14: setUp
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Before
public void setUp() throws Exception {
parquetOutputFormat = new ParquetOutputFormat(new GroupWriteSupport());
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), conf);
expectedPoolSize = Math.round((double)
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() *
MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
long rowGroupSize = expectedPoolSize / 2;
conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);
// the memory manager is not initialized until a writer is created
createWriter(0).close(null);
}
开发者ID:apache,项目名称:parquet-mr,代码行数:16,代码来源:TestMemoryManager.java
示例15: writeToFile
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static void writeToFile(File f, List<User> users) throws IOException {
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport());
for (User u : users) {
writer.write(groupFromUser(u));
}
writer.close();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:PhoneBookWriter.java
示例16: write
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void write() {
ParquetWriter<Group> writer = null;
Configuration configuration = new Configuration();
try {
GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
GroupWriteSupport.setSchema(schema, configuration);
writer = new ParquetWriter<Group>(new Path(PARQUETPATH), groupWriteSupport,
ParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
ParquetProperties.WriterVersion.PARQUET_1_0, configuration);
GroupFactory groupFactory = new SimpleGroupFactory(schema);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(CLASSPATH_PATH + "financeNews.json")));
String readline = null;
while ((readline = bufferedReader.readLine()) != null) {
Map map = JSON.parseObject(readline, Map.class);
System.out.println(map);
Group group = groupFactory.newGroup()
.append("htitle", String.valueOf(map.get("htitle")))
.append("keywords", String.valueOf(map.get("keywords")))
.append("description", String.valueOf(map.get("description")))
.append("url", String.valueOf(map.get("url")))
.append("sumary", String.valueOf(map.get("sumary")))
.append("content", String.valueOf(map.get("content")))
.append("logo", String.valueOf(map.get("logo")))
.append("title", String.valueOf(map.get("title")))
.append("pubDate", String.valueOf(map.get("pubDate")))
.append("mediaName", String.valueOf(map.get("mediaName")))
.append("mediaUrl", String.valueOf(map.get("mediaUrl")))
.append("category", String.valueOf(map.get("category")))
.append("type", String.valueOf(map.get("type")));
writer.write(group);
}
writer.close();
bufferedReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
开发者ID:mumuhadoop,项目名称:mumu-parquet,代码行数:44,代码来源:JMHBenchmarkTest.java
示例17: generateData
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public void generateData(Path outFile, Configuration configuration, ParquetProperties.WriterVersion version,
int blockSize, int pageSize, int fixedLenByteArraySize, CompressionCodecName codec, int nRows)
throws IOException
{
if (exists(configuration, outFile)) {
System.out.println("File already exists " + outFile);
return;
}
System.out.println("Generating data @ " + outFile);
MessageType schema = parseMessageType(
"message test { "
+ "required binary binary_field; "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required boolean boolean_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required fixed_len_byte_array(" + fixedLenByteArraySize +") flba_field; "
+ "required int96 int96_field; "
+ "} ");
GroupWriteSupport.setSchema(schema, configuration);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(outFile, new GroupWriteSupport(), codec, blockSize,
pageSize, DICT_PAGE_SIZE, true, false, version, configuration);
//generate some data for the fixed len byte array field
char[] chars = new char[fixedLenByteArraySize];
Arrays.fill(chars, '*');
for (int i = 0; i < nRows; i++) {
writer.write(
f.newGroup()
.append("binary_field", randomUUID().toString())
.append("int32_field", i)
.append("int64_field", 64l)
.append("boolean_field", true)
.append("float_field", 1.0f)
.append("double_field", 2.0d)
.append("flba_field", new String(chars))
.append("int96_field", Binary.fromConstantByteArray(new byte[12]))
);
}
writer.close();
}
开发者ID:apache,项目名称:parquet-mr,代码行数:48,代码来源:DataGenerator.java
示例18: test
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void test() throws Exception {
Configuration conf = new Configuration();
Path root = new Path("target/tests/TestParquetWriter/");
FileSystem fs = root.getFileSystem(conf);
if (fs.exists(root)) {
fs.delete(root, true);
}
fs.mkdirs(root);
MessageType schema = parseMessageType(
"message test { "
+ "required binary binary_field; "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required boolean boolean_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required fixed_len_byte_array(3) flba_field; "
+ "required int96 int96_field; "
+ "optional binary null_field; "
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
Map<String, Encoding> expected = new HashMap<String, Encoding>();
expected.put("10-" + PARQUET_1_0, PLAIN_DICTIONARY);
expected.put("1000-" + PARQUET_1_0, PLAIN);
expected.put("10-" + PARQUET_2_0, RLE_DICTIONARY);
expected.put("1000-" + PARQUET_2_0, DELTA_BYTE_ARRAY);
for (int modulo : asList(10, 1000)) {
for (WriterVersion version : WriterVersion.values()) {
Path file = new Path(root, version.name() + "_" + modulo);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
for (int i = 0; i < 1000; i++) {
writer.write(
f.newGroup()
.append("binary_field", "test" + (i % modulo))
.append("int32_field", 32)
.append("int64_field", 64l)
.append("boolean_field", true)
.append("float_field", 1.0f)
.append("double_field", 2.0d)
.append("flba_field", "foo")
.append("int96_field", Binary.fromConstantByteArray(new byte[12])));
}
writer.close();
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).build();
for (int i = 0; i < 1000; i++) {
Group group = reader.read();
assertEquals("test" + (i % modulo), group.getBinary("binary_field", 0).toStringUsingUTF8());
assertEquals(32, group.getInteger("int32_field", 0));
assertEquals(64l, group.getLong("int64_field", 0));
assertEquals(true, group.getBoolean("boolean_field", 0));
assertEquals(1.0f, group.getFloat("float_field", 0), 0.001);
assertEquals(2.0d, group.getDouble("double_field", 0), 0.001);
assertEquals("foo", group.getBinary("flba_field", 0).toStringUsingUTF8());
assertEquals(Binary.fromConstantByteArray(new byte[12]), group.getInt96("int96_field",
0));
assertEquals(0, group.getFieldRepetitionCount("null_field"));
}
reader.close();
ParquetMetadata footer = readFooter(conf, file, NO_FILTER);
for (BlockMetaData blockMetaData : footer.getBlocks()) {
for (ColumnChunkMetaData column : blockMetaData.getColumns()) {
if (column.getPath().toDotString().equals("binary_field")) {
String key = modulo + "-" + version;
Encoding expectedEncoding = expected.get(key);
assertTrue(
key + ":" + column.getEncodings() + " should contain " + expectedEncoding,
column.getEncodings().contains(expectedEncoding));
}
}
}
}
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:80,代码来源:TestParquetWriterNewPage.java
示例19: test
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void test() throws Exception {
Configuration conf = new Configuration();
Path root = new Path("target/tests/TestParquetWriter/");
enforceEmptyDir(conf, root);
MessageType schema = parseMessageType(
"message test { "
+ "required binary binary_field; "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required boolean boolean_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required fixed_len_byte_array(3) flba_field; "
+ "required int96 int96_field; "
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
Map<String, Encoding> expected = new HashMap<String, Encoding>();
expected.put("10-" + PARQUET_1_0, PLAIN_DICTIONARY);
expected.put("1000-" + PARQUET_1_0, PLAIN);
expected.put("10-" + PARQUET_2_0, RLE_DICTIONARY);
expected.put("1000-" + PARQUET_2_0, DELTA_BYTE_ARRAY);
for (int modulo : asList(10, 1000)) {
for (WriterVersion version : WriterVersion.values()) {
Path file = new Path(root, version.name() + "_" + modulo);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
for (int i = 0; i < 1000; i++) {
writer.write(
f.newGroup()
.append("binary_field", "test" + (i % modulo))
.append("int32_field", 32)
.append("int64_field", 64l)
.append("boolean_field", true)
.append("float_field", 1.0f)
.append("double_field", 2.0d)
.append("flba_field", "foo")
.append("int96_field", Binary.fromConstantByteArray(new byte[12])));
}
writer.close();
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).build();
for (int i = 0; i < 1000; i++) {
Group group = reader.read();
assertEquals("test" + (i % modulo), group.getBinary("binary_field", 0).toStringUsingUTF8());
assertEquals(32, group.getInteger("int32_field", 0));
assertEquals(64l, group.getLong("int64_field", 0));
assertEquals(true, group.getBoolean("boolean_field", 0));
assertEquals(1.0f, group.getFloat("float_field", 0), 0.001);
assertEquals(2.0d, group.getDouble("double_field", 0), 0.001);
assertEquals("foo", group.getBinary("flba_field", 0).toStringUsingUTF8());
assertEquals(Binary.fromConstantByteArray(new byte[12]),
group.getInt96("int96_field",0));
}
reader.close();
ParquetMetadata footer = readFooter(conf, file, NO_FILTER);
for (BlockMetaData blockMetaData : footer.getBlocks()) {
for (ColumnChunkMetaData column : blockMetaData.getColumns()) {
if (column.getPath().toDotString().equals("binary_field")) {
String key = modulo + "-" + version;
Encoding expectedEncoding = expected.get(key);
assertTrue(
key + ":" + column.getEncodings() + " should contain " + expectedEncoding,
column.getEncodings().contains(expectedEncoding));
}
}
}
assertEquals("Object model property should be example",
"example", footer.getFileMetaData().getKeyValueMetaData()
.get(ParquetWriter.OBJECT_MODEL_NAME_PROP));
}
}
}
开发者ID:apache,项目名称:parquet-mr,代码行数:76,代码来源:TestParquetWriter.java
示例20: configure
import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public void configure(JobConf job) {
factory = new SimpleGroupFactory(GroupWriteSupport.getSchema(job));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:4,代码来源:DeprecatedOutputFormatTest.java
注:本文中的org.apache.parquet.hadoop.example.GroupWriteSupport类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论