本文整理汇总了Java中com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner类的典型用法代码示例。如果您正苦于以下问题:Java DirectPipelineRunner类的具体用法?Java DirectPipelineRunner怎么用?Java DirectPipelineRunner使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DirectPipelineRunner类属于com.google.cloud.dataflow.sdk.runners包,在下文中一共展示了DirectPipelineRunner类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: setupPipeline
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
private Pipeline setupPipeline(final String inputPath, final String outputPath, boolean enableGcs, boolean enableCloudExec) {
final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
if (enableCloudExec) {
options.setStagingLocation(getGCPTestStaging());
options.setProject(getGCPTestProject());
options.setRunner(BlockingDataflowPipelineRunner.class);
} else if (BucketUtils.isHadoopUrl(inputPath) || BucketUtils.isHadoopUrl(outputPath)) {
options.setRunner(SparkPipelineRunner.class);
} else {
options.setRunner(DirectPipelineRunner.class);
}
if (enableGcs) {
options.setApiKey(getGCPTestApiKey());
}
final Pipeline p = Pipeline.create(options);
DataflowUtils.registerGATKCoders(p);
return p;
}
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:19,代码来源:SmallBamWriterTest.java
示例2: setupRunner
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
/**
* Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
* streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
* flag value.
*/
public void setupRunner() {
if (options.isStreaming()) {
if (options.getRunner() == DirectPipelineRunner.class) {
throw new IllegalArgumentException(
"Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
}
// In order to cancel the pipelines automatically,
// {@literal DataflowPipelineRunner} is forced to be used.
options.setRunner(DataflowPipelineRunner.class);
}
}
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:17,代码来源:DataflowExampleUtils.java
示例3: joinArtistCreditsWithRecordings
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@org.junit.Test
public void joinArtistCreditsWithRecordings() {
DirectPipeline p = DirectPipeline.createForTest();
PCollection<String> artistCreditText = p.apply("artistCredits", Create.of(artistCreditLinesOfJson)).setCoder(StringUtf8Coder.of());
PCollection<KV<Long, MusicBrainzDataObject>> artistCredits = MusicBrainzTransforms.loadTableFromText(artistCreditText, "artist_credit_name", "artist_credit");
PCollection<String> recordingText = p.apply("recordings", Create.of(recordingLinesOfJson)).setCoder(StringUtf8Coder.of());
PCollection<KV<Long, MusicBrainzDataObject>> recordings = MusicBrainzTransforms.loadTableFromText(recordingText, "recording", "artist_credit");
PCollection<MusicBrainzDataObject> joinedRecordings = MusicBrainzTransforms.innerJoin("artist credits with recordings", artistCredits, recordings);
PCollection<Long> recordingIds = joinedRecordings.apply(MapElements.via((MusicBrainzDataObject mbo) -> (Long) mbo.getColumnValue("recording_id")).
withOutputType(new TypeDescriptor<Long>() {
}));
Long bieberRecording = 17069165L;
Long bieberRecording2 = 15508507L;
DataflowAssert.that(recordingIds).satisfies((longs) -> {
List<Long> theList = new ArrayList<Long>();
longs.forEach(theList::add);
assert (theList.contains(bieberRecording));
assert (theList.contains(bieberRecording2));
return null;
});
PCollection<Long> numberJoined = joinedRecordings.apply("count joined recrodings", Count.globally());
PCollection<Long> numberOfArtistCredits = artistCredits.apply("count artist credits", Count.globally());
DirectPipelineRunner.EvaluationResults results = p.run();
long joinedRecordingsCount = results.getPCollection(numberJoined).get(0);
assert (448 == joinedRecordingsCount);
}
开发者ID:GoogleCloudPlatform,项目名称:bigquery-etl-dataflow-sample,代码行数:38,代码来源:MusicBrainzTransformsTest.java
示例4: testNest
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@org.junit.Test
public void testNest() {
DirectPipeline p = DirectPipeline.createForTest();
PCollection<String> artistText = p.apply("artist", Create.of(artistLinesOfJson)).setCoder(StringUtf8Coder.of());
PCollection<String> artistCreditNameText = p.apply("artist_credit_name", Create.of(artistCreditLinesOfJson));
PCollection<String> recordingText = p.apply("recording", Create.of(recordingLinesOfJson)).setCoder(StringUtf8Coder.of());
PCollection<KV<Long, MusicBrainzDataObject>> artistsById = MusicBrainzTransforms.loadTableFromText(artistText, "artist", "id");
PCollection<KV<Long, MusicBrainzDataObject>> recordingsByArtistCredit =
MusicBrainzTransforms.loadTableFromText(recordingText, "recording", "artist_credit");
PCollection<KV<Long, MusicBrainzDataObject>> artistCreditByArtistCredit =
MusicBrainzTransforms.loadTableFromText(artistCreditNameText, "artist_credit_name", "artist_credit");
PCollection<MusicBrainzDataObject> recordingsWithCredits = MusicBrainzTransforms.innerJoin("credited recordings", artistCreditByArtistCredit, recordingsByArtistCredit);
PCollection<KV<Long, MusicBrainzDataObject>> recordingsJoinedWithCredits =
MusicBrainzTransforms.by("artist_credit_name_artist", recordingsWithCredits);
PCollection<MusicBrainzDataObject> artistsWithNestedRecordings = MusicBrainzTransforms.nest(artistsById, recordingsJoinedWithCredits, "recordings");
DirectPipelineRunner.EvaluationResults results = p.run();
List<MusicBrainzDataObject> resultObjects = results.getPCollection(artistsWithNestedRecordings);
assert (resultObjects.size() == 1);
assert (((List<MusicBrainzDataObject>) resultObjects.get(0).getColumnValue("artist_recordings")).size() == 448);
}
开发者ID:GoogleCloudPlatform,项目名称:bigquery-etl-dataflow-sample,代码行数:29,代码来源:MusicBrainzTransformsTest.java
示例5: runner
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
private static Class<? extends PipelineRunner<?>> runner(String name) {
Class<? extends PipelineRunner<?>> c = DirectPipelineRunner.class; // default
if (DEFAULT_RUNNER.equals(name) || name == null) {
c = DataflowPipelineRunner.class;
} else if (BLOCKING_RUNNER.equals(name)) {
c = BlockingDataflowPipelineRunner.class;
} else if (DIRECT_RUNNER.equals(name)) {
c = DirectPipelineRunner.class;
}
return c;
}
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:13,代码来源:DataflowFactory.java
示例6: init
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@Before public void init() throws IOException {
sparkRunner = SparkPipelineRunner.create();
directRunner = DirectPipelineRunner.createForTest();
testDataDirName = Joiner.on(File.separator).join("target", "test-data", name.getMethodName())
+ File.separator;
FileUtils.deleteDirectory(new File(testDataDirName));
new File(testDataDirName).mkdirs();
}
开发者ID:shakamunyi,项目名称:spark-dataflow,代码行数:9,代码来源:TransformTranslatorTest.java
注:本文中的com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论