本文整理汇总了Java中org.apache.hadoop.hive.ql.io.RCFile类的典型用法代码示例。如果您正苦于以下问题:Java RCFile类的具体用法?Java RCFile怎么用?Java RCFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
RCFile类属于org.apache.hadoop.hive.ql.io包,在下文中一共展示了RCFile类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getSampleData
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path path) throws IOException {
SampleDataRecord sampleDataRecord = null;
List<Object> sampleData = null;
if (!fs.exists(path))
LOG.error(" File Path: " + path.toUri().getPath() + " is not exist in HDFS");
else {
try {
RCFile.Reader reader = new RCFile.Reader(fs, path, fs.getConf());
sampleData = getSampleData(reader);
sampleDataRecord = new SampleDataRecord(path.toUri().getPath(), sampleData);
} catch (Exception e) {
LOG.error("path : {} content " + " is not RC File format content ", path.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
}
return sampleDataRecord;
}
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:19,代码来源:RCFileAnalyzer.java
示例2: createRCFile
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private void createRCFile(final String fileName, final int numRecords,
final int maxColumns) throws IOException {
// Write the sequence file
SequenceFile.Metadata metadata = getMetadataForRCFile();
Configuration conf = new Configuration();
conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null,
metadata, null);
for (int row = 0; row < numRecords; row++) {
BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
dataWrite.resetValid(maxColumns);
for (int column = 0; column < maxColumns; column++) {
Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
sampleText.write(dataOutput);
dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
}
rcFileWriter.append(dataWrite);
}
rcFileWriter.close();
}
开发者ID:cloudera,项目名称:cdk,代码行数:23,代码来源:ReadRCFileTest.java
示例3: getSchema
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path path) throws IOException {
DatasetJsonRecord record = null;
if (!fs.exists(path))
LOG.error("file path : {} not in hdfs", path);
else {
try {
RCFile.Reader reader = new RCFile.Reader(fs, path, fs.getConf());
Map<Text, Text> meta = reader.getMetadata().getMetadata();
/** rcfile column number */
int columnNumber = Integer.parseInt(meta.get(new Text(COLUMN_NUMBER_KEY)).toString());
FileStatus status = fs.getFileStatus(path);
String schemaString = getRCFileSchema(columnNumber);
String storage = STORAGE_TYPE;
String abstractPath = path.toUri().getPath();
String codec = "rc.codec";
record = new DatasetJsonRecord(schemaString, abstractPath, status.getModificationTime(), status.getOwner(), status.getGroup(),
status.getPermission().toString(), codec, storage, "");
LOG.info("rc file : {} schema is {}", path.toUri().getPath(), schemaString);
} catch (Exception e) {
LOG.error("path : {} content " + " is not RC File format content ", path.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
}
return record;
}
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:28,代码来源:RCFileAnalyzer.java
示例4: createRCFileWriter
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
protected RCFile.Writer createRCFileWriter(TaskAttemptContext job,
Text columnMetadata)
throws IOException {
Configuration conf = job.getConfiguration();
// override compression codec if set.
String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
if (codecOverride != null) {
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec", codecOverride);
}
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, conf);
}
Metadata metadata = null;
String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);
LOG.info("writing to rcfile " + file.toString());
return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:28,代码来源:HiveRCOutputFormat.java
示例5: writeRCFileTest
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:29,代码来源:TestHiveColumnarLoader.java
示例6: writeRCFileTest
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
bytes.resetValid(columnRandom.length);
for (int j = 0; j < columnRandom.length; j++) {
columnRandom[j]= "Sample value".getBytes();
bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
}
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:33,代码来源:TestHiveColumnarStorage.java
示例7: createRCFileWriter
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
protected RCFile.Writer createRCFileWriter(TaskAttemptContext job,
Text columnMetadata)
throws IOException {
Configuration conf = job.getConfiguration();
// override compression codec if set.
String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
if (codecOverride != null) {
conf.setBoolean(MRConfiguration.OUTPUT_COMPRESS, true);
conf.set(MRConfiguration.OUTPUT_COMPRESSION_CODEC, codecOverride);
}
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, conf);
}
Metadata metadata = null;
String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);
LOG.info("writing to rcfile " + file.toString());
return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:28,代码来源:HiveRCOutputFormat.java
示例8: doProcess
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
@Override
protected boolean doProcess(Record record, InputStream in) throws IOException {
Path attachmentPath = getAttachmentPath(record);
SingleStreamFileSystem fs = new SingleStreamFileSystem(in, attachmentPath);
RCFile.Reader reader = null;
try {
reader = new RCFile.Reader(fs, attachmentPath, conf);
Record template = record.copy();
removeAttachments(template);
template.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
if (includeMetaData) {
SequenceFile.Metadata metadata = reader.getMetadata();
if (metadata != null) {
template.put(RC_FILE_META_DATA, metadata);
}
}
switch (readMode) {
case row:
return readRowWise(reader, template);
case column:
return readColumnWise(reader, template);
default :
throw new IllegalStateException();
}
} catch (IOException e) {
throw new MorphlineRuntimeException("IOException while processing attachment "
+ attachmentPath.getName(), e);
} finally {
if (reader != null) {
reader.close();
}
}
}
开发者ID:cloudera,项目名称:cdk,代码行数:35,代码来源:ReadRCFileBuilder.java
示例9: readRowWise
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private boolean readRowWise(final RCFile.Reader reader, final Record record)
throws IOException {
LongWritable rowID = new LongWritable();
while (true) {
boolean next;
try {
next = reader.next(rowID);
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable();
rowBatchBytes.resetValid(columns.size());
reader.getCurrentRow(rowBatchBytes);
// Read all the columns configured and set it in the output record
for (RCFileColumn rcColumn : columns) {
BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField());
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes));
}
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:38,代码来源:ReadRCFileBuilder.java
示例10: readColumnWise
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private boolean readColumnWise(RCFile.Reader reader, Record record) throws IOException {
for (RCFileColumn rcColumn : columns) {
reader.sync(0);
reader.resetBuffer();
while (true) {
boolean next;
try {
next = reader.nextBlock();
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null);
for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) {
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex);
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, rowBytes));
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:35,代码来源:ReadRCFileBuilder.java
示例11: writeRCFileTest
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file,
int columnNum, CompressionCodec codec, int columnCount)
throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:kaituo,项目名称:sedge,代码行数:30,代码来源:TestHiveColumnarLoader.java
示例12: getFormatMinSplitSize
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
/**
* The input split size should never be smaller than the
* RCFile.SYNC_INTERVAL
*/
@Override
protected long getFormatMinSplitSize() {
return RCFile.SYNC_INTERVAL;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:9,代码来源:HiveRCInputFormat.java
示例13: getMetadataForRCFile
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
private SequenceFile.Metadata getMetadataForRCFile() {
return RCFile.createMetadata(new Text("metaField"), new Text("metaValue"));
}
开发者ID:cloudera,项目名称:cdk,代码行数:4,代码来源:ReadRCFileTest.java
示例14: createRCFileWriter
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
/**
* Create a RCFile output stream based on job configuration Uses user supplied compression flag
* (rather than obtaining it from the Job Configuration).
*
* @param jc
* Job configuration
* @param fs
* File System to create file in
* @param file
* Path to be created
* @return output stream over the created rcfile
*/
public static RCFile.Writer createRCFileWriter(JobConf jc, FileSystem fs, Path file,
boolean isCompressed, Progressable progressable) throws IOException {
CompressionCodec codec = null;
if (isCompressed) {
Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
codec = (CompressionCodec) ReflectionUtil.newInstance(codecClass, jc);
}
return new RCFile.Writer(fs, jc, file, progressable, codec);
}
开发者ID:mini666,项目名称:hive-phoenix-handler,代码行数:22,代码来源:Utilities.java
示例15: getColumnNumber
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
/**
* Returns the number of columns set in the conf for writers.
*
* @param conf
* @return number of columns for RCFile's writer
*/
public static int getColumnNumber(Configuration conf) {
return conf.getInt(RCFile.COLUMN_NUMBER_CONF_STR, 0);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:10,代码来源:HiveRCOutputFormat.java
示例16: writeTestData
import org.apache.hadoop.hive.ql.io.RCFile; //导入依赖的package包/类
@Override
public void writeTestData(File file, int recordCounts, int columnCount,
String colSeparator) throws IOException {
// write random test data
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
RCFileOutputFormat.setColumnNumber(conf, columnCount);
RCFile.Writer writer = new RCFile.Writer(fs, conf, new Path(
file.getAbsolutePath()));
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnCount);
for (int c = 0; c < columnCount; c++) {
bytes.set(c, new BytesRefWritable());
}
try {
for (int r = 0; r < recordCounts; r++) {
// foreach row write n columns
for (int c = 0; c < columnCount; c++) {
byte[] stringbytes = String.valueOf(Math.random())
.getBytes();
bytes.get(c).set(stringbytes, 0, stringbytes.length);
}
writer.append(bytes);
}
} finally {
writer.close();
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:42,代码来源:TestAllLoader.java
注:本文中的org.apache.hadoop.hive.ql.io.RCFile类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论