本文整理汇总了Java中org.apache.hive.hcatalog.mapreduce.HCatInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java HCatInputFormat类的具体用法?Java HCatInputFormat怎么用?Java HCatInputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HCatInputFormat类属于org.apache.hive.hcatalog.mapreduce包,在下文中一共展示了HCatInputFormat类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException {
super.publishConfiguration(context.getConfiguration());
Configuration conf = context.getConfiguration();
KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
IIManager mgr = IIManager.getInstance(config);
IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
this.info = new TableRecordInfo(seg);
this.rec = this.info.createTableRecord();
outputKey = new LongWritable();
outputValue = new ImmutableBytesWritable(rec.getBytes());
schema = HCatInputFormat.getTableSchema(context.getConfiguration());
fields = schema.getFields();
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:21,代码来源:InvertedIndexMapper.java
示例2: readHCatRecords
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
/**
* Run a local map reduce job to read records from HCatalog table.
* @param readCount
* @param filter
* @return
* @throws Exception
*/
public List<HCatRecord> readHCatRecords(String dbName,
String tableName, String filter) throws Exception {
HCatReaderMapper.setReadRecordCount(0);
recsRead.clear();
// Configuration conf = new Configuration();
Job job = new Job(conf, "HCatalog reader job");
job.setJarByClass(this.getClass());
job.setMapperClass(HCatReaderMapper.class);
job.getConfiguration()
.setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
// input/output settings
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
Path path = new Path(fs.getWorkingDirectory(),
"mapreduce/HCatTableIndexOutput");
if (fs.exists(path)) {
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, path);
job.waitForCompletion(true);
LOG.info("Read " + HCatReaderMapper.readRecordCount + " records");
return recsRead;
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:44,代码来源:HCatalogTestUtils.java
示例3: run
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D" + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY + "=true] <hive_table_name> <output_path> <hbase_table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
int i = args[0].indexOf('.');
HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
job.setJarByClass(HalyardHiveLoad.class);
job.setMapperClass(HiveMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(HCatInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
开发者ID:Merck,项目名称:Halyard,代码行数:40,代码来源:HalyardHiveLoad.java
示例4: configureJob
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
public void configureJob(Job job) {
try {
job.getConfiguration().addResource("hive-site.xml");
HCatInputFormat.setInput(job, dbName, tableName);
job.setInputFormatClass(HCatInputFormat.class);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
开发者ID:apache,项目名称:kylin,代码行数:12,代码来源:HiveMRInput.java
示例5: setupMapper
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
private void setupMapper(String intermediateTable) throws IOException {
// FileInputFormat.setInputPaths(job, input);
String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
HCatInputFormat.setInput(job, dbTableNames[0],
dbTableNames[1]);
job.setInputFormatClass(HCatInputFormat.class);
job.setMapperClass(FactDistinctColumnsMapper.class);
job.setCombinerClass(FactDistinctColumnsCombiner.class);
job.setMapOutputKeyClass(ShortWritable.class);
job.setMapOutputValueClass(Text.class);
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:14,代码来源:FactDistinctColumnsJob.java
示例6: setup
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException {
super.publishConfiguration(context.getConfiguration());
Configuration conf = context.getConfiguration();
KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
cube = CubeManager.getInstance(config).getCube(cubeName);
cubeDesc = cube.getDescriptor();
intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
List<TblColRef> columns = baseCuboid.getColumns();
ArrayList<Integer> factDictCols = new ArrayList<Integer>();
RowKeyDesc rowkey = cubeDesc.getRowkey();
DictionaryManager dictMgr = DictionaryManager.getInstance(config);
for (int i = 0; i < columns.size(); i++) {
TblColRef col = columns.get(i);
if (rowkey.isUseDictionary(col) == false)
continue;
String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
if (cubeDesc.getModel().isFactTable(scanTable)) {
factDictCols.add(i);
}
}
this.factDictCols = new int[factDictCols.size()];
for (int i = 0; i < factDictCols.size(); i++)
this.factDictCols[i] = factDictCols.get(i);
schema = HCatInputFormat.getTableSchema(context.getConfiguration());
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:36,代码来源:FactDistinctColumnsMapper.java
示例7: setupMapper
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
private void setupMapper(String intermediateTable) throws IOException {
String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
HCatInputFormat.setInput(job, dbTableNames[0],
dbTableNames[1]);
job.setInputFormatClass(HCatInputFormat.class);
job.setMapperClass(InvertedIndexMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(ImmutableBytesWritable.class);
job.setPartitionerClass(InvertedIndexPartitioner.class);
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:14,代码来源:InvertedIndexJob.java
示例8: createHiveTableRDD
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
/**
* Creates the hive table rdd.
*
* @param javaSparkContext the java spark context
* @param conf the conf
* @param db the db
* @param table the table
* @param partitionFilter the partition filter
* @return the java pair rdd
* @throws IOException Signals that an I/O exception has occurred.
*/
public static JavaPairRDD<WritableComparable, HCatRecord> createHiveTableRDD(JavaSparkContext javaSparkContext,
Configuration conf, String db, String table, String partitionFilter) throws IOException {
HCatInputFormat.setInput(conf, db, table, partitionFilter);
JavaPairRDD<WritableComparable, HCatRecord> rdd = javaSparkContext.newAPIHadoopRDD(conf,
HCatInputFormat.class, // Input
WritableComparable.class, // input key class
HCatRecord.class); // input value class
return rdd;
}
开发者ID:apache,项目名称:lens,代码行数:23,代码来源:HiveTableRDD.java
示例9: run
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
final String jobName = args[0];
final String dbName = args[1];
final String inTableName = args[2];
final String outPath = args[3];
System.out.println("jobname: " + jobName);
System.out.println("dbName: " + dbName);
System.out.println("inTableName: " + inTableName);
System.out.println("outPath: " + outPath);
Job job = Job.getInstance(conf, jobName);
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(CMSStatePaymentsTool.class);
job.setMapperClass(CMSStatePaymentsMapper.class);
job.setReducerClass(CMSStatePaymentsReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
HCatInputFormat.setInput(job, dbName, inTableName);
FileOutputFormat.setOutputPath(job, new Path(outPath));
return (job.waitForCompletion(true) ? 0 : 1);
}
开发者ID:mmiklavc,项目名称:hadoop-testing,代码行数:29,代码来源:CMSStatePaymentsTool.java
示例10: setup
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException {
super.publishConfiguration(context.getConfiguration());
schema = HCatInputFormat.getTableSchema(context.getConfiguration());
columnSize = schema.getFields().size();
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:7,代码来源:ColumnCardinalityMapper.java
示例11: run
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
try {
options.addOption(OPTION_TABLE);
options.addOption(OPTION_OUTPUT_PATH);
parseOptions(options, args);
// start job
String jobName = JOB_TITLE + getOptionsAsString();
System.out.println("Starting: " + jobName);
Configuration conf = getConf();
job = Job.getInstance(conf, jobName);
setJobClasspath(job);
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set("dfs.block.size", "67108864");
// Mapper
String table = getOptionValue(OPTION_TABLE);
String[] dbTableNames = HadoopUtil.parseHiveTableName(table);
HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);
job.setInputFormatClass(HCatInputFormat.class);
job.setMapperClass(ColumnCardinalityMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// Reducer - only one
job.setReducerClass(ColumnCardinalityReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
this.deletePath(job.getConfiguration(), output);
System.out.println("Going to submit HiveColumnCardinalityJob for table '" + table + "'");
int result = waitForCompletion(job);
return result;
} catch (Exception e) {
printUsage(options);
throw e;
}
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:53,代码来源:HiveColumnCardinalityJob.java
示例12: setupMapper
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
private void setupMapper() throws IOException {
String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME);
String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName);
log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]);
HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);
job.setInputFormatClass(HCatInputFormat.class);
job.setMapperClass(IIDistinctColumnsMapper.class);
job.setCombinerClass(IIDistinctColumnsCombiner.class);
job.setMapOutputKeyClass(ShortWritable.class);
job.setMapOutputValueClass(Text.class);
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:17,代码来源:IIDistinctColumnsJob.java
示例13: validate
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; //导入依赖的package包/类
/**
* Validate.
*
* @return true, if successful
*/
boolean validate() {
List<HCatFieldSchema> columns;
try {
HCatInputFormat.setInput(conf, database == null ? "default" : database, table, partitionFilter);
HCatSchema tableSchema = HCatInputFormat.getTableSchema(conf);
columns = tableSchema.getFields();
} catch (IOException exc) {
log.error("Error getting table info {}", toString(), exc);
return false;
}
log.info("{} columns {}", table, columns.toString());
boolean valid = false;
if (columns != null && !columns.isEmpty()) {
// Check labeled column
List<String> columnNames = new ArrayList<String>();
for (HCatFieldSchema col : columns) {
columnNames.add(col.getName());
}
// Need at least one feature column and one label column
valid = columnNames.contains(labelColumn) && columnNames.size() > 1;
if (valid) {
labelPos = columnNames.indexOf(labelColumn);
// Check feature columns
if (featureColumns == null || featureColumns.isEmpty()) {
// feature columns are not provided, so all columns except label column are feature columns
featurePositions = new int[columnNames.size() - 1];
int p = 0;
for (int i = 0; i < columnNames.size(); i++) {
if (i == labelPos) {
continue;
}
featurePositions[p++] = i;
}
columnNames.remove(labelPos);
featureColumns = columnNames;
} else {
// Feature columns were provided, verify all feature columns are present in the table
valid = columnNames.containsAll(featureColumns);
if (valid) {
// Get feature positions
featurePositions = new int[featureColumns.size()];
for (int i = 0; i < featureColumns.size(); i++) {
featurePositions[i] = columnNames.indexOf(featureColumns.get(i));
}
}
}
numFeatures = featureColumns.size();
}
}
return valid;
}
开发者ID:apache,项目名称:lens,代码行数:64,代码来源:TableTrainingSpec.java
注:本文中的org.apache.hive.hcatalog.mapreduce.HCatInputFormat类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论