本文整理汇总了Java中parquet.hadoop.ParquetOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java ParquetOutputFormat类的具体用法?Java ParquetOutputFormat怎么用?Java ParquetOutputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParquetOutputFormat类属于parquet.hadoop包,在下文中一共展示了ParquetOutputFormat类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: writeAvro
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
// Set up the Hadoop Input Format
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
// Output & Execute
data.output(hadoopOutputFormat);
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:17,代码来源:ParquetAvroExample.java
示例2: writeThrift
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
// Set up the Hadoop Input Format
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
ParquetThriftOutputFormat.setThriftClass(job, Person.class);
// Output & Execute
data.output(hadoopOutputFormat);
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:18,代码来源:ParquetThriftExample.java
示例3: newPhysicalProperties
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static Options newPhysicalProperties(CatalogProtos.StoreType type) {
Options options = new Options();
if (CatalogProtos.StoreType.CSV == type) {
options.put(CSVFILE_DELIMITER, DEFAULT_FIELD_DELIMITER);
} else if (CatalogProtos.StoreType.RCFILE == type) {
options.put(RCFILE_SERDE, DEFAULT_BINARY_SERDE);
} else if (CatalogProtos.StoreType.SEQUENCEFILE == type) {
options.put(SEQUENCEFILE_SERDE, DEFAULT_TEXT_SERDE);
options.put(SEQUENCEFILE_DELIMITER, DEFAULT_FIELD_DELIMITER);
} else if (type == CatalogProtos.StoreType.PARQUET) {
options.put(ParquetOutputFormat.BLOCK_SIZE, PARQUET_DEFAULT_BLOCK_SIZE);
options.put(ParquetOutputFormat.PAGE_SIZE, PARQUET_DEFAULT_PAGE_SIZE);
options.put(ParquetOutputFormat.COMPRESSION, PARQUET_DEFAULT_COMPRESSION_CODEC_NAME);
options.put(ParquetOutputFormat.ENABLE_DICTIONARY, PARQUET_DEFAULT_IS_DICTIONARY_ENABLED);
options.put(ParquetOutputFormat.VALIDATION, PARQUET_DEFAULT_IS_VALIDATION_ENABLED);
}
return options;
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:20,代码来源:StorageUtil.java
示例4: setVersion
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setVersion( VERSION version ) throws Exception {
inClassloader( () -> {
ParquetProperties.WriterVersion writerVersion;
switch ( version ) {
case VERSION_1_0:
writerVersion = ParquetProperties.WriterVersion.PARQUET_1_0;
break;
case VERSION_2_0:
writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
break;
default:
writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
break;
}
job.getConfiguration().set( ParquetOutputFormat.WRITER_VERSION, writerVersion.toString() );
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:19,代码来源:PentahoParquetOutputFormat.java
示例5: setCompression
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setCompression( COMPRESSION comp ) throws Exception {
inClassloader( () -> {
CompressionCodecName codec;
switch ( comp ) {
case SNAPPY:
codec = CompressionCodecName.SNAPPY;
break;
case GZIP:
codec = CompressionCodecName.GZIP;
break;
case LZO:
codec = CompressionCodecName.LZO;
break;
default:
codec = CompressionCodecName.UNCOMPRESSED;
break;
}
ParquetOutputFormat.setCompression( job, codec );
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:22,代码来源:PentahoParquetOutputFormat.java
示例6: RecordWriterWrapper
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public RecordWriterWrapper(ParquetOutputFormat<V> realOutputFormat,
FileSystem fs, JobConf conf, String name, Progressable progress) throws IOException {
CompressionCodecName codec = getCodec(conf);
String extension = codec.getExtension() + ".parquet";
Path file = getDefaultWorkFile(conf, name, extension);
try {
realWriter = (ParquetRecordWriter<V>) realOutputFormat.getRecordWriter(conf, file, codec);
} catch (InterruptedException e) {
Thread.interrupted();
throw new IOException(e);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:DeprecatedParquetOutputFormat.java
示例7: shouldUseParquetFlagToSetCodec
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public void shouldUseParquetFlagToSetCodec(String codecNameStr, CompressionCodecName expectedCodec) throws IOException {
//Test mapreduce API
Job job = new Job();
Configuration conf = job.getConfiguration();
conf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf, new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
Assert.assertEquals(CodecConfig.from(task).getCodec(), expectedCodec);
//Test mapred API
JobConf jobConf = new JobConf();
jobConf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:CodecConfigTest.java
示例8: writeProtobuf
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeProtobuf(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ProtoParquetOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
ProtoParquetOutputFormat.setProtobufClass(job, Person.class);
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
// Output & Execute
data.output(hadoopOutputFormat);
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:16,代码来源:ParquetProtobufExample.java
示例9: ParquetAppender
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
/**
* Creates a new ParquetAppender.
*
* @param conf Configuration properties.
* @param schema The table schema.
* @param meta The table metadata.
* @param path The path of the Parquet file to write to.
*/
public ParquetAppender(Configuration conf, Schema schema, TableMeta meta,
Path path) throws IOException {
super(conf, schema, meta, path);
this.blockSize = Integer.parseInt(
meta.getOption(ParquetOutputFormat.BLOCK_SIZE));
this.pageSize = Integer.parseInt(
meta.getOption(ParquetOutputFormat.PAGE_SIZE));
this.compressionCodecName = CompressionCodecName.fromConf(
meta.getOption(ParquetOutputFormat.COMPRESSION));
this.enableDictionary = Boolean.parseBoolean(
meta.getOption(ParquetOutputFormat.ENABLE_DICTIONARY));
this.validating = Boolean.parseBoolean(
meta.getOption(ParquetOutputFormat.VALIDATION));
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:23,代码来源:ParquetAppender.java
示例10: PentahoParquetOutputFormat
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public PentahoParquetOutputFormat() throws Exception {
logger.info( "We are initializing parquet output format" );
inClassloader( () -> {
ConfigurationProxy conf = new ConfigurationProxy();
job = Job.getInstance( conf );
job.getConfiguration().set( ParquetOutputFormat.ENABLE_JOB_SUMMARY, "false" );
ParquetOutputFormat.setEnableDictionary( job, false );
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:13,代码来源:PentahoParquetOutputFormat.java
示例11: setOutputFile
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setOutputFile( String file, boolean override ) throws Exception {
inClassloader( () -> {
outputFile = new Path( file );
FileSystem fs = FileSystem.get( outputFile.toUri(), job.getConfiguration() );
if ( fs.exists( outputFile ) ) {
if ( override ) {
fs.delete( outputFile, true );
} else {
throw new FileAlreadyExistsException( file );
}
}
ParquetOutputFormat.setOutputPath( job, outputFile.getParent() );
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:16,代码来源:PentahoParquetOutputFormat.java
示例12: recordWriterCreateFileWithData
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithData() throws Exception {
WriteSupport support =
new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );
ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );
ParquetRecordWriter<RowMetaAndData> recordWriter =
(ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );
PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );
RowMetaAndData
row = new RowMetaAndData();
RowMeta rowMeta = new RowMeta();
rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
row.setRowMeta( rowMeta );
row.setData( new Object[] { "Alex", "87" } );
writer.write( row );
recordWriter.close( task );
Files.walk( Paths.get( tempFile.toString() ) )
.filter( Files::isRegularFile )
.forEach( ( f ) -> {
String file = f.toString();
if ( file.endsWith( "parquet" ) ) {
IPentahoInputFormat.IPentahoRecordReader recordReader =
readCreatedParquetFile( Paths.get( file ).toUri().toString() );
recordReader.forEach(
rowMetaAndData -> Assert.assertTrue( rowMetaAndData.size() == 2 ) );
}
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:37,代码来源:PentahoParquetRecordWriterTest.java
示例13: recordWriterCreateFileWithoutData
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithoutData() throws Exception {
WriteSupport support =
new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );
ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );
ParquetRecordWriter<RowMetaAndData> recordWriter =
(ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );
PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );
RowMetaAndData
row = new RowMetaAndData();
RowMeta rowMeta = new RowMeta();
rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
row.setRowMeta( rowMeta );
row.setData( new Object[] { null, null } );
writer.write( row );
recordWriter.close( task );
Files.walk( Paths.get( tempFile.toString() ) )
.filter( Files::isRegularFile )
.forEach( ( f ) -> {
String file = f.toString();
if ( file.endsWith( "parquet" ) ) {
try {
Assert.assertTrue( Files.size( Paths.get( file ) ) > 0 );
} catch ( IOException e ) {
e.printStackTrace();
}
}
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:38,代码来源:PentahoParquetRecordWriterTest.java
示例14: setWriteSupportClass
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setWriteSupportClass(Configuration configuration, Class<?> writeSupportClass) {
configuration.set(ParquetOutputFormat.WRITE_SUPPORT_CLASS, writeSupportClass.getName());
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java
示例15: setBlockSize
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setBlockSize(Configuration configuration, int blockSize) {
configuration.setInt(ParquetOutputFormat.BLOCK_SIZE, blockSize);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java
示例16: setPageSize
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setPageSize(Configuration configuration, int pageSize) {
configuration.setInt(ParquetOutputFormat.PAGE_SIZE, pageSize);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java
示例17: setCompression
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setCompression(Configuration configuration, CompressionCodecName compression) {
configuration.set(ParquetOutputFormat.COMPRESSION, compression.name());
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java
示例18: setEnableDictionary
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setEnableDictionary(Configuration configuration, boolean enableDictionary) {
configuration.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, enableDictionary);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java
示例19: isParquetCompressionSet
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static boolean isParquetCompressionSet(Configuration conf) {
return conf.get(ParquetOutputFormat.COMPRESSION) != null;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:CodecConfig.java
示例20: getParquetCompressionCodec
import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static CompressionCodecName getParquetCompressionCodec(Configuration configuration) {
return CompressionCodecName.fromConf(configuration.get(ParquetOutputFormat.COMPRESSION, UNCOMPRESSED.name()));
}
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:CodecConfig.java
注:本文中的parquet.hadoop.ParquetOutputFormat类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论