本文整理汇总了Java中org.apache.crunch.Emitter类的典型用法代码示例。如果您正苦于以下问题:Java Emitter类的具体用法?Java Emitter怎么用?Java Emitter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Emitter类属于org.apache.crunch包,在下文中一共展示了Emitter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter<Pair<String, Integer >> emitter) {
// emit each position this read overlaps
SAMRecord record = input.get();
Integer startPosition = record.getAlignmentStart();
if (!record.getReadUnmappedFlag() && startPosition != null) {
int lastInterval = -1;
for (int i = startPosition; i < record.getAlignmentEnd(); i++) {
int nextInterval = i / intervalSize;
// Emit each read once for every interval it covers
if (nextInterval != lastInterval) {
lastInterval = nextInterval;
emitter.emit(
// emit contig, interval
new Pair<String, Integer>(
record.getReferenceName(),
nextInterval)
);
// Skip length of interval or to the last base
i = Math.min(record.getAlignmentEnd(), i + intervalSize);
}
}
}
}
开发者ID:arahuja,项目名称:varcrunch,代码行数:27,代码来源:ComputeDepthInInterval.java
示例2: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(String line, Emitter<GenericData.Record> emitter) {
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
// parse line into components
recBuilder.set("host", asString(matcher.group(1)));
recBuilder.set("rfc931_identity", asString(matcher.group(2)));
recBuilder.set("username", asString(matcher.group(3)));
recBuilder.set("datetime", asString(matcher.group(4)));
recBuilder.set("request", asString(matcher.group(5)));
recBuilder.set("http_status_code", asInt(matcher.group(6)));
recBuilder.set("response_size", asInt(matcher.group(7)));
recBuilder.set("referrer", asString(matcher.group(8)));
recBuilder.set("user_agent", asString(matcher.group(9)));
emitter.emit(recBuilder.build());
} else {
System.err.println("No match: " + line);
}
}
开发者ID:cloudera,项目名称:cdk,代码行数:21,代码来源:CombinedLogFormatConverter.java
示例3: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Integer, Iterable<Pair<Long, Pair<float[], LongSet>>>> input,
Emitter<Pair<Long, NumericIDValue>> emitter) {
Preconditions.checkState(input.first() == getPartition(),
"Key must match partition: %s != %s", input.first(), getPartition());
for (Pair<Long, Pair<float[], LongSet>> value : input.second()) {
long userID = value.first();
float[] userFeatures = value.second().first();
LongSet knownItemIDs = value.second().second();
Iterable<NumericIDValue> recs = TopN.selectTopN(
new RecommendIterator(userFeatures, partialY.entrySet().iterator(), knownItemIDs),
numRecs);
for (NumericIDValue rec : recs) {
emitter.emit(Pair.of(userID, rec));
}
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:18,代码来源:RecommendReduceFn.java
示例4: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
String userID = columns[0];
String itemID = columns[1];
long numericUserID = StringLongMapping.toLong(userID);
long numericItemID = StringLongMapping.toLong(itemID);
if (!Long.toString(numericUserID).equals(userID)) {
emitter.emit(Pair.of(numericUserID, userID));
}
if (!Long.toString(numericItemID).equals(itemID)) {
emitter.emit(Pair.of(numericItemID, itemID));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:17,代码来源:MappingParseFn.java
示例5: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, NumericIDValue>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
long userID = StringLongMapping.toLong(columns[0]);
long itemID = StringLongMapping.toLong(columns[1]);
float pref;
if (columns.length > 2) {
String valueToken = columns[2];
pref = valueToken.isEmpty() ? Float.NaN : LangUtils.parseFloat(valueToken);
} else {
pref = 1.0f;
}
emitter.emit(Pair.of(userID, new NumericIDValue(itemID, pref)));
}
开发者ID:apsaltis,项目名称:oryx,代码行数:18,代码来源:DelimitedInputParseFn.java
示例6: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<K, Iterable<Pair<Double, T>>> input,
Emitter<Pair<K, Pair<Double, T>>> emitter) {
SortedMap<Double, T> reservoir = Maps.newTreeMap();
for (Pair<Double, T> p : input.second()) {
if (reservoir.size() < sampleSize) {
reservoir.put(p.first(), ptype.getDetachedValue(p.second()));
} else if (p.first() > reservoir.firstKey()) {
reservoir.remove(reservoir.firstKey());
reservoir.put(p.first(), ptype.getDetachedValue(p.second()));
}
}
for (Map.Entry<Double, T> e : reservoir.entrySet()) {
emitter.emit(Pair.of(input.first(), Pair.of(e.getKey(), e.getValue())));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:17,代码来源:ReservoirSampling.java
示例7: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Record record,
Emitter<Pair<Integer, Pair<Long, InternalStats>>> emitter) {
for (int idx = 0; idx < record.getSpec().size(); idx++) {
if (!ignoredColumns.contains(idx)) {
InternalStats ss = stats.get(idx);
if (ss == null) {
ss = new InternalStats();
stats.put(idx, ss);
}
if (categoricalColumns.contains(idx)) {
ss.addCategorical(record.getAsString(idx));
} else {
ss.addNumeric(record.getAsDouble(idx));
}
}
}
count++;
}
开发者ID:apsaltis,项目名称:oryx,代码行数:20,代码来源:Summarizer.java
示例8: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(
Pair<Tuple3<String, Long, String>,
Iterable<Pair<Long,
Pair<Tuple3<String, Long, String>, SpecificRecord>>>> input,
Emitter<Pair<Tuple3<String, Long, String>, SpecificRecord>> emitter) {
for (Pair<Long,
Pair<Tuple3<String, Long, String>, SpecificRecord>> pair : input.second()) {
emitter.emit(pair.second());
}
}
开发者ID:cloudera,项目名称:quince,代码行数:12,代码来源:VariantsLoader.java
示例9: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(
VariantContextWritable input, Emitter<Pair<Variant, Collection<Genotype>>> emitter) {
VariantContext bvc = input.get();
List<org.bdgenomics.adam.models.VariantContext> avcList =
JavaConversions.seqAsJavaList(vcc.convert(bvc));
for (org.bdgenomics.adam.models.VariantContext avc : avcList) {
Variant variant = avc.variant().variant();
Collection<Genotype> genotypes = JavaConversions.asJavaCollection(avc.genotypes());
emitter.emit(Pair.of(variant, genotypes));
}
}
开发者ID:cloudera,项目名称:quince,代码行数:13,代码来源:VCFToADAMVariantFn.java
示例10: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process (SAMRecordWritable input, Emitter <Pair<Long, Pair<Integer, SAMRecordWritable>>> emitter) {
// emit each position this read overlaps
SAMRecord record = input.get();
Integer startPosition = record.getAlignmentStart();
Long lastTask = null;
if (!record.getReadUnmappedFlag() && startPosition != null) {
for (int i = startPosition; i < startPosition + record.getReadBases().length; ++i) {
Long nextTask = positionToTaskMapping.get(new Pair<String, Integer>(record.getReferenceName(), i));
// If we haven't mapped this position to a task, do so evenly
if (nextTask == null) {
nextTask = record.getReferenceName().hashCode() + (long) (i / intervalSize);
}
if (nextTask != lastTask) {
lastTask = nextTask;
emitter.emit(
// emit contig, interval and record
new Pair<Long, Pair<Integer, SAMRecordWritable>>(
lastTask,
new Pair(input.get().getAlignmentStart(), input)
));
}
// Skip length of interval or to the last base
i = Math.min(record.getAlignmentEnd(), i + intervalSize);
}
}
}
开发者ID:arahuja,项目名称:varcrunch,代码行数:32,代码来源:CollectNearbyReadsDoFn.java
示例11: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(
Pair<String, Iterable<StandardEvent>> keyAndEvents,
Emitter<Session> emitter) {
final Iterator<StandardEvent> events = keyAndEvents.second().iterator();
if (!events.hasNext()) {
return;
}
// Initialize the values needed to create a session for this group
final StandardEvent firstEvent = events.next();
long startTime = firstEvent.getTimestamp();
long endTime = firstEvent.getTimestamp();
int numEvents = 1;
// Inspect each event and keep track of start time, end time, and count
while (events.hasNext()) {
final StandardEvent event = events.next();
startTime = Math.min(startTime, event.getTimestamp());
endTime = Math.max(endTime, event.getTimestamp());
numEvents += 1;
}
// Create a session. Use the first event for fields that do not change
emitter.emit(Session.newBuilder() // same on all events:
.setUserId(firstEvent.getUserId()) // the user id (grouped by)
.setSessionId(firstEvent.getSessionId()) // session id (grouped by)
.setIp(firstEvent.getIp()) // the source IP address
.setStartTimestamp(startTime)
.setDuration(endTime - startTime)
.setSessionEventCount(numEvents)
.build());
}
开发者ID:cloudera,项目名称:cdk-examples,代码行数:34,代码来源:CreateSessions.java
示例12: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Long, Pair<float[], LongSet>> input,
Emitter<Pair<Integer, Pair<Long, Pair<float[], LongSet>>>> emitter) {
for (int i = 0; i < numReducers; i++) {
emitter.emit(Pair.of(i, input));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:DistributeRecommendWorkFn.java
示例13: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<String> emitter) {
StringLongMapping mapping = idMapping.getIDMapping();
Iterable<NumericIDValue> recs = TopN.selectTopN(input.second().iterator(), numRecs);
String userID = mapping.toString(input.first());
for (NumericIDValue rec : recs) {
emitter.emit(DelimitedDataUtils.encode(userID,
mapping.toString(rec.getID()),
Float.toString(rec.getValue())));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:12,代码来源:CollectRecommendFn.java
示例14: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Long, float[]> input, Emitter<String> emitter) {
if (input.first() % convergenceSamplingModulus == 0) {
String userID = input.first().toString();
float[] xu = input.second();
for (LongObjectMap.MapEntry<float[]> entry : yState.getY().entrySet()) {
long itemID = entry.getKey();
if (itemID % convergenceSamplingModulus == 0) {
float estimate = (float) SimpleVectorMath.dot(xu, entry.getValue());
emitter.emit(DelimitedDataUtils.encode(userID, itemID, estimate));
}
}
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:15,代码来源:ConvergenceSampleFn.java
示例15: cleanup
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void cleanup(Emitter<Pair<Long, NumericIDValue>> emitter) {
if (previousUserPrefs != null) {
Preconditions.checkNotNull(previousUserID);
output(previousUserID, previousUserPrefs, null, null, emitter);
}
super.cleanup(emitter);
}
开发者ID:apsaltis,项目名称:oryx,代码行数:9,代码来源:MergeNewOldValuesFn.java
示例16: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<Pair<Long, LongFloatMap>> emitter) {
LongFloatMap map = new LongFloatMap();
for (NumericIDValue value : input.second()) {
map.put(value.getID(), value.getValue());
}
if (!map.isEmpty()) {
emitter.emit(Pair.of(input.first(), map));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:11,代码来源:ToVectorReduceFn.java
示例17: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(String line, Emitter<Pair<Long, String>> emitter) {
String[] columns = DelimitedDataUtils.decode(line);
long numericID = Long.parseLong(columns[0]);
String id = columns[1];
emitter.emit(Pair.of(numericID, id));
}
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:ExistingMappingsMapFn.java
示例18: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Integer, Iterable<LongSet>> input, Emitter<Long> emitter) {
Preconditions.checkState(input.first() == getPartition(),
"Key must match partition: %s != %s", input.first(), getPartition());
for (LongSet set : input.second()) {
LongPrimitiveIterator it = set.iterator();
while (it.hasNext()) {
emitter.emit(it.nextLong());
}
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:12,代码来源:PopularReduceFn.java
示例19: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<String> emitter) {
StringLongMapping mapping = idMapping.getIDMapping();
Iterable<NumericIDValue> mostSimilar = TopN.selectTopN(input.second().iterator(), numSimilar);
String item1ID = mapping.toString(input.first());
for (NumericIDValue similar : mostSimilar) {
emitter.emit(DelimitedDataUtils.encode(item1ID,
mapping.toString(similar.getID()),
Float.toString(similar.getValue())));
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:12,代码来源:SimilarReduceFn.java
示例20: process
import org.apache.crunch.Emitter; //导入依赖的package包/类
@Override
public void process(Pair<Integer, Iterable<MatrixRow>> input, Emitter<Pair<Long, NumericIDValue>> emitter) {
Preconditions.checkState(input.first() == getPartition(),
"Key must match partition: %s != %s", input.first(), getPartition());
for (MatrixRow value : input.second()) {
long itemID = value.getRowId();
float[] itemFeatures = value.getValues();
Iterable<NumericIDValue> mostSimilar = TopN.selectTopN(
new MostSimilarItemIterator(partialY.entrySet().iterator(), itemID, itemFeatures), numSimilar);
for (NumericIDValue similar : mostSimilar) {
emitter.emit(Pair.of(itemID, similar));
}
}
}
开发者ID:apsaltis,项目名称:oryx,代码行数:15,代码来源:DistributeSimilarWorkReduceFn.java
注:本文中的org.apache.crunch.Emitter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论