本文整理汇总了Java中org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable类的典型用法代码示例。如果您正苦于以下问题:Java BytesRefArrayWritable类的具体用法?Java BytesRefArrayWritable怎么用?Java BytesRefArrayWritable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
BytesRefArrayWritable类属于org.apache.hadoop.hive.serde2.columnar包,在下文中一共展示了BytesRefArrayWritable类的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getSampleData
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private List<Object> getSampleData(RCFile.Reader reader) throws Exception {
List<Object> sampleData = new ArrayList<Object>();
LongWritable rowID = new LongWritable(0);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
BytesRefWritable brw = null;
StringBuilder builder = new StringBuilder();
builder.append("{");
for (int i = 0; i < cols.size() - 1; i++) {
brw = cols.get(i);
builder.append("\"col" + i + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\",");
}
brw = cols.get(cols.size() - 1);
builder.append("\"col" + (cols.size() - 1) + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\"}");
sampleData.add(builder.toString());
}
return sampleData;
}
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:22,代码来源:RCFileAnalyzer.java
示例2: writeUsingRecordWriter_Array
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@SuppressWarnings("unchecked")
public static String writeUsingRecordWriter_Array(List<String> lines, Configuration conf) throws IOException {
final MonarchRecordWriter mrw = new MonarchRecordWriter(conf);
final BytesRefArrayWritable bytesWritable = new BytesRefArrayWritable(10);
final Function[] toObject = new Function[]{e -> Long.valueOf(e.toString()), e -> e, e -> Integer
.valueOf(e.toString()), e -> e};
final DataType[] objectTypes = new DataType[]{
BasicTypes.LONG, BasicTypes.STRING, BasicTypes.INT, BasicTypes.STRING
};
byte[] bytes;
String[] cols;
for (final String line : lines) {
int i = 0;
cols = line.split(",");
bytesWritable.resetValid(cols.length);
for (final String col : cols) {
bytes = objectTypes[i].serialize(toObject[i].apply(col));
bytesWritable.get(i++).set(bytes, 0, bytes.length);
}
mrw.write(bytesWritable);
}
mrw.close(true);
return mrw.getKeyPrefix();
}
开发者ID:ampool,项目名称:monarch,代码行数:25,代码来源:TestHelper.java
示例3: initialize
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@SuppressWarnings("deprecation")
@Override
public void initialize(InputSplit split, TaskAttemptContext ctx)
throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) split;
Configuration conf = ctx.getConfiguration();
splitPath = fileSplit.getPath();
rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>(
conf, new org.apache.hadoop.mapred.FileSplit(splitPath,
fileSplit.getStart(), fileSplit.getLength(),
new org.apache.hadoop.mapred.JobConf(conf)));
key = rcFileRecordReader.createKey();
value = rcFileRecordReader.createValue();
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:19,代码来源:HiveRCRecordReader.java
示例4: getNext
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@Override
public Tuple getNext() throws IOException {
Tuple tuple = null;
try {
if (reader.nextKeyValue()) {
BytesRefArrayWritable buff = reader.getCurrentValue();
ColumnarStruct struct = readColumnarStruct(buff);
tuple = readColumnarTuple(struct, reader.getSplitPath());
}
} catch (InterruptedException e) {
throw new IOException(e.toString(), e);
}
return tuple;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:20,代码来源:HiveColumnarLoader.java
示例5: readRow
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private ColumnarStruct readRow(File outputFile, Path outputPath, String schema) throws IOException,
InterruptedException, SerDeException {
FileSplit fileSplit = new FileSplit(outputPath, 0L, outputFile.length(), (String[])null);
Path splitPath = fileSplit.getPath();
RCFileRecordReader<LongWritable, BytesRefArrayWritable> rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>(
new Configuration(false), new org.apache.hadoop.mapred.FileSplit(splitPath,
fileSplit.getStart(), fileSplit.getLength(),
new org.apache.hadoop.mapred.JobConf(conf)));
LongWritable key = rcFileRecordReader.createKey();
BytesRefArrayWritable value = rcFileRecordReader.createValue();
rcFileRecordReader.next(key, value);
rcFileRecordReader.close();
ColumnarStruct struct = readColumnarStruct(value, schema);
return struct;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:22,代码来源:TestHiveColumnarStorage.java
示例6: readColumnarStruct
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);
List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
cols.size());
for (int i = 0; i < cols.size(); i++) {
fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
.findPigDataType(types.get(i))));
}
Properties props = new Properties();
props.setProperty(Constants.LIST_COLUMNS,
HiveRCSchemaUtil.listToString(cols));
props.setProperty(Constants.LIST_COLUMN_TYPES,
HiveRCSchemaUtil.listToString(types));
Configuration hiveConf = new HiveConf(conf, SessionState.class);
ColumnarSerDe serde = new ColumnarSerDe();
serde.initialize(hiveConf, props);
return (ColumnarStruct) serde.deserialize(buff);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:27,代码来源:TestHiveColumnarStorage.java
示例7: createRCFile
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private void createRCFile(final String fileName, final int numRecords,
final int maxColumns) throws IOException {
// Write the sequence file
SequenceFile.Metadata metadata = getMetadataForRCFile();
Configuration conf = new Configuration();
conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null,
metadata, null);
for (int row = 0; row < numRecords; row++) {
BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
dataWrite.resetValid(maxColumns);
for (int column = 0; column < maxColumns; column++) {
Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
sampleText.write(dataOutput);
dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
}
rcFileWriter.append(dataWrite);
}
rcFileWriter.close();
}
开发者ID:cloudera,项目名称:cdk,代码行数:23,代码来源:ReadRCFileTest.java
示例8: deserialize
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@Override
public Object deserialize(Writable blob) throws SerDeException {
if(!(blob instanceof BytesRefArrayWritable)) {
throw new SerDeException(this.getClass().toString() + ": expects BytesRefArrayWritable!");
} else {
BytesRefArrayWritable cols = (BytesRefArrayWritable)blob;
this.cachedLazyStruct.init(cols);
this.lastOperationSerialize = false;
this.lastOperationDeserialize = true;
return this.cachedLazyStruct;
}
}
开发者ID:ampool,项目名称:monarch,代码行数:13,代码来源:MonarchSerDe.java
示例9: readAndAssertOnEmptyCols
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
/**
* Read using record reader and assert that the columns not requested have 0 length.
* <p>
* @param conf the reader configuration -- must have the region name
* @param split the input-split containing the records to be read
* @param predicates the predicates to filter out unwanted results
* @param readColIds the column ids to retrieve
* @return total number of records read
*/
private long readAndAssertOnEmptyCols(final Configuration conf, final InputSplit split,
final String readColIds, final Filter[] predicates) throws IOException{
MonarchRecordReader mrr = new MonarchRecordReader(conf);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
if (predicates != null) {
for (int i = 0; i < predicates.length; i++) {
filterList.addFilter(predicates[i]);
}
mrr.pushDownfilters = filterList;
}
// mrr.readColIds = readColIds;
/*List<Integer> readColIdList = readColIds == null ? Collections.emptyList() :
Arrays.stream(readColIds.split(",")).mapToInt(Integer::valueOf)
.collect(ArrayList::new, ArrayList::add, ArrayList::addAll);*/
List<Integer> readColIdList = ColumnProjectionUtils.getReadColumnIDs(conf);
long size = 0;
try {
mrr.initialize(split, conf);
Writable key = mrr.createKey();
Writable value = mrr.createValue();
while (mrr.next(key, value)) {
BytesRefArrayWritable braw = (BytesRefArrayWritable) value;
/** assert that skipped (not read) columns have 0 length **/
for (int i = 0; i < braw.size(); i++) {
if (!readColIdList.isEmpty() && !readColIdList.contains(i)) {
assertEquals(0, braw.get(i).getLength());
}
}
++size;
}
mrr.close();
} catch (IOException e) {
e.printStackTrace();
}
return size;
}
开发者ID:ampool,项目名称:monarch,代码行数:47,代码来源:MonarchRecordReaderTest.java
示例10: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int batchSize = block.getPositionCount();
boolean[] isNull = new boolean[batchSize];
long[] vector = new long[batchSize];
for (int i = 0; i < batchSize; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
byte[] bytes = writable.getData();
int start = writable.getStart();
int length = writable.getLength();
if (length == 0 || isNull(bytes, start, length)) {
isNull[i] = true;
}
else {
vector[i] = parseLong(bytes, start, length);
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedLongArray(vector));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:39,代码来源:RcTextBlockLoader.java
示例11: getColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
public BytesRefArrayWritable getColumn(int fieldId)
throws IOException
{
checkState(pageId == expectedBatchId);
if (!columnBatchLoaded[fieldId]) {
columnBatch[fieldId] = recordReader.getColumn(hiveColumnIndexes[fieldId], columnBatch[fieldId]);
columnBatchLoaded[fieldId] = true;
}
return columnBatch[fieldId];
}
开发者ID:y-lan,项目名称:presto,代码行数:11,代码来源:RcFilePageSource.java
示例12: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int positionCount = block.getPositionCount();
boolean[] isNull = new boolean[positionCount];
boolean[] vector = new boolean[positionCount];
for (int i = 0; i < positionCount; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
int length = writable.getLength();
if (length != 0) {
byte[] bytes = writable.getData();
int start = writable.getStart();
vector[i] = bytes[start] != 0;
}
else {
isNull[i] = true;
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedBooleanArray(vector, 0, positionCount));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:39,代码来源:RcBinaryBlockLoader.java
示例13: createRecordReader
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
/**
* Initialises an instance of HiveRCRecordReader.
*/
@Override
public RecordReader<LongWritable, BytesRefArrayWritable> createRecordReader(
InputSplit split, TaskAttemptContext ctx) throws IOException,
InterruptedException {
HiveRCRecordReader reader = new HiveRCRecordReader();
return reader;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:13,代码来源:HiveRCInputFormat.java
示例14: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:29,代码来源:TestHiveColumnarLoader.java
示例15: nextRandomRow
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private static void nextRandomRow(byte[][] row, BytesRefArrayWritable bytes, int columnCount) {
bytes.resetValid(row.length);
for (int i = 0; i < row.length; i++) {
row[i] = new byte[columnCount];
for (int j = 0; j < columnCount; j++)
row[i][j] = getRandomChar(randomCharGenerator);
bytes.get(i).set(row[i], 0, columnCount);
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:11,代码来源:TestHiveColumnarLoader.java
示例16: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
bytes.resetValid(columnRandom.length);
for (int j = 0; j < columnRandom.length; j++) {
columnRandom[j]= "Sample value".getBytes();
bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
}
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:33,代码来源:TestHiveColumnarStorage.java
示例17: readRowWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private boolean readRowWise(final RCFile.Reader reader, final Record record)
throws IOException {
LongWritable rowID = new LongWritable();
while (true) {
boolean next;
try {
next = reader.next(rowID);
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable();
rowBatchBytes.resetValid(columns.size());
reader.getCurrentRow(rowBatchBytes);
// Read all the columns configured and set it in the output record
for (RCFileColumn rcColumn : columns) {
BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField());
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes));
}
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:38,代码来源:ReadRCFileBuilder.java
示例18: readColumnWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; //导入依赖的package包/类
private boolean readColumnWise(RCFile.Reader reader, Record record) throws IOException {
for (RCFileColumn rcColumn : columns) {
reader.sync(0);
reader.resetBuffer();
while (true) {
boolean next;
try {
next = reader.nextBlock();
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null);
for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) {
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex);
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, rowBytes));
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:35,代码来源:ReadRCFileBuilder.java
注:本文中的org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论