本文整理汇总了Java中org.apache.hadoop.hive.serde2.columnar.BytesRefWritable类的典型用法代码示例。如果您正苦于以下问题:Java BytesRefWritable类的具体用法?Java BytesRefWritable怎么用?Java BytesRefWritable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
BytesRefWritable类属于org.apache.hadoop.hive.serde2.columnar包,在下文中一共展示了BytesRefWritable类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getSampleData
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private List<Object> getSampleData(RCFile.Reader reader) throws Exception {
List<Object> sampleData = new ArrayList<Object>();
LongWritable rowID = new LongWritable(0);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
BytesRefWritable brw = null;
StringBuilder builder = new StringBuilder();
builder.append("{");
for (int i = 0; i < cols.size() - 1; i++) {
brw = cols.get(i);
builder.append("\"col" + i + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\",");
}
brw = cols.get(cols.size() - 1);
builder.append("\"col" + (cols.size() - 1) + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\"}");
sampleData.add(builder.toString());
}
return sampleData;
}
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:22,代码来源:RCFileAnalyzer.java
示例2: createRCFile
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void createRCFile(final String fileName, final int numRecords,
final int maxColumns) throws IOException {
// Write the sequence file
SequenceFile.Metadata metadata = getMetadataForRCFile();
Configuration conf = new Configuration();
conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null,
metadata, null);
for (int row = 0; row < numRecords; row++) {
BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
dataWrite.resetValid(maxColumns);
for (int column = 0; column < maxColumns; column++) {
Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
sampleText.write(dataOutput);
dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
}
rcFileWriter.append(dataWrite);
}
rcFileWriter.close();
}
开发者ID:cloudera,项目名称:cdk,代码行数:23,代码来源:ReadRCFileTest.java
示例3: parseBooleanColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseBooleanColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseBooleanColumn(column, bytes, start, length);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:30,代码来源:ColumnarTextHiveRecordCursor.java
示例4: parseLongColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseLongColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseLongColumn(column, bytes, start, length);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:30,代码来源:ColumnarTextHiveRecordCursor.java
示例5: parseDoubleColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseDoubleColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseDoubleColumn(column, bytes, start, length);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:30,代码来源:ColumnarTextHiveRecordCursor.java
示例6: parseStringColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseStringColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseStringColumn(column, bytes, start, length);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:30,代码来源:ColumnarTextHiveRecordCursor.java
示例7: parseObjectColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseObjectColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseObjectColumn(column, bytes, start, length);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:30,代码来源:ColumnarTextHiveRecordCursor.java
示例8: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int batchSize = block.getPositionCount();
boolean[] isNull = new boolean[batchSize];
long[] vector = new long[batchSize];
for (int i = 0; i < batchSize; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
byte[] bytes = writable.getData();
int start = writable.getStart();
int length = writable.getLength();
if (length == 0 || isNull(bytes, start, length)) {
isNull[i] = true;
}
else {
vector[i] = parseLong(bytes, start, length);
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedLongArray(vector));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:39,代码来源:RcTextBlockLoader.java
示例9: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int positionCount = block.getPositionCount();
boolean[] isNull = new boolean[positionCount];
boolean[] vector = new boolean[positionCount];
for (int i = 0; i < positionCount; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
int length = writable.getLength();
if (length != 0) {
byte[] bytes = writable.getData();
int start = writable.getStart();
vector[i] = bytes[start] != 0;
}
else {
isNull[i] = true;
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedBooleanArray(vector, 0, positionCount));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
开发者ID:y-lan,项目名称:presto,代码行数:39,代码来源:RcBinaryBlockLoader.java
示例10: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:29,代码来源:TestHiveColumnarLoader.java
示例11: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
bytes.resetValid(columnRandom.length);
for (int j = 0; j < columnRandom.length; j++) {
columnRandom[j]= "Sample value".getBytes();
bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
}
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:33,代码来源:TestHiveColumnarStorage.java
示例12: readRowWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private boolean readRowWise(final RCFile.Reader reader, final Record record)
throws IOException {
LongWritable rowID = new LongWritable();
while (true) {
boolean next;
try {
next = reader.next(rowID);
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable();
rowBatchBytes.resetValid(columns.size());
reader.getCurrentRow(rowBatchBytes);
// Read all the columns configured and set it in the output record
for (RCFileColumn rcColumn : columns) {
BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField());
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes));
}
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:38,代码来源:ReadRCFileBuilder.java
示例13: readColumnWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private boolean readColumnWise(RCFile.Reader reader, Record record) throws IOException {
for (RCFileColumn rcColumn : columns) {
reader.sync(0);
reader.resetBuffer();
while (true) {
boolean next;
try {
next = reader.nextBlock();
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null);
for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) {
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex);
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, rowBytes));
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
}
}
return true;
}
开发者ID:cloudera,项目名称:cdk,代码行数:35,代码来源:ReadRCFileBuilder.java
示例14: updateColumnValue
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private Writable updateColumnValue(RCFileColumn column, BytesRefWritable bytesRef) throws IOException {
Writable newColumnValue = column.newWritable();
// Small optimization to bypass DataInput read if the column writable is
// BytesRefWritable
if (newColumnValue.getClass() == BytesRefWritable.class) {
newColumnValue = bytesRef;
} else {
byte[] currentRowBytes = Arrays.copyOfRange(bytesRef.getData(),
bytesRef.getStart(), bytesRef.getStart() + bytesRef.getLength());
DataInput dataInput = ByteStreams.newDataInput(currentRowBytes);
newColumnValue.readFields(dataInput);
}
return newColumnValue;
}
开发者ID:cloudera,项目名称:cdk,代码行数:15,代码来源:ReadRCFileBuilder.java
示例15: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file,
int columnNum, CompressionCodec codec, int columnCount)
throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
开发者ID:kaituo,项目名称:sedge,代码行数:30,代码来源:TestHiveColumnarLoader.java
示例16: initialize
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
String[] cols = tbl.getProperty("columns").split(",");
String types = tbl.getProperty("columns.types");
if (types == null) {
types = Collections.nCopies(cols.length, "string").stream().collect(Collectors.joining(","));
}
this.columnList = Arrays.asList(cols);
this.typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(types);
/** initialize storage for fields **/
int size = columnList.size();
field = new BytesRefWritable[size];
for (int i = 0; i < size; i++) {
field[i] = new BytesRefWritable();
serializeCache.set(i, field[i]);
}
serializedSize = 0;
/** the columns to skip **/
List notSkipIDs = new ArrayList();
if(conf != null && !ColumnProjectionUtils.isReadAllColumns(conf)) {
notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
} else {
for(int i = 0; i < typeInfoList.size(); ++i) {
notSkipIDs.add(i);
}
}
/**
* create the object inspector for row.. use native Java object inspectors for
* the objects for which deserialization is done by us and not Hive.
* Cache Monarch object types as well.. for all rows (serialize)..
*/
List<ObjectInspector> oiList = new ArrayList<>(columnList.size());
this.objectTypeList = new ArrayList<>(columnList.size());
for (final TypeInfo ti : typeInfoList) {
DataType type = null;
try {
type = MonarchPredicateHandler.getMonarchFieldType(ti.getTypeName());
} catch (Exception e) {
//
}
if (type != null) {
oiList.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(ti));
} else {
oiList.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(ti));
}
this.objectTypeList.add(type);
}
this.rowOI = ObjectInspectorFactory.getColumnarStructObjectInspector(columnList, oiList);
/** Initialize the lazy structure for on-demand de-serialization **/
this.cachedLazyStruct = new MonarchColumnarStruct(rowOI, notSkipIDs);
}
开发者ID:ampool,项目名称:monarch,代码行数:58,代码来源:MonarchSerDe.java
示例17: writeTestData
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
public void writeTestData(File file, int recordCounts, int columnCount,
String colSeparator) throws IOException {
// write random test data
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
RCFileOutputFormat.setColumnNumber(conf, columnCount);
RCFile.Writer writer = new RCFile.Writer(fs, conf, new Path(
file.getAbsolutePath()));
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnCount);
for (int c = 0; c < columnCount; c++) {
bytes.set(c, new BytesRefWritable());
}
try {
for (int r = 0; r < recordCounts; r++) {
// foreach row write n columns
for (int c = 0; c < columnCount; c++) {
byte[] stringbytes = String.valueOf(Math.random())
.getBytes();
bytes.get(c).set(stringbytes, 0, stringbytes.length);
}
writer.append(bytes);
}
} finally {
writer.close();
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:42,代码来源:TestAllLoader.java
注:本文中的org.apache.hadoop.hive.serde2.columnar.BytesRefWritable类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论