本文整理汇总了Java中org.apache.spark.sql.Column类的典型用法代码示例。如果您正苦于以下问题:Java Column类的具体用法?Java Column怎么用?Java Column使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Column类属于org.apache.spark.sql包,在下文中一共展示了Column类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: computeJoins
import org.apache.spark.sql.Column; //导入依赖的package包/类
public Dataset<Row> computeJoins(SQLContext sqlContext){
// compute all the joins
Dataset<Row> results = node.computeJoinWithChildren(sqlContext);
// select only the requested result
Column [] selectedColumns = new Column[node.projection.size()];
for (int i = 0; i < selectedColumns.length; i++) {
selectedColumns[i]= new Column(node.projection.get(i));
}
// if there is a filter set, apply it
results = filter == null ? results.select(selectedColumns) : results.filter(filter).select(selectedColumns);
// if results are distinct
if(selectDistinct) results = results.distinct();
return results;
}
开发者ID:tf-dbis-uni-freiburg,项目名称:PRoST,代码行数:19,代码来源:JoinTree.java
示例2: normalize
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Scale based on min,max
*
* @param dataFrame the dataframe to scale
* @param min the minimum value
* @param max the maximum value
* @return the normalized dataframe per column
*/
public static DataRowsFacade normalize(DataRowsFacade dataFrame, double min, double max, List<String> skipColumns) {
List<String> columnsList = DataFrames.toList(dataFrame.get().columns());
columnsList.removeAll(skipColumns);
String[] columnNames = DataFrames.toArray(columnsList);
//first row is min second row is max, each column in a row is for a particular column
List<Row> minMax = minMaxColumns(dataFrame, columnNames);
for (int i = 0; i < columnNames.length; i++) {
String columnName = columnNames[i];
double dMin = ((Number) minMax.get(0).get(i)).doubleValue();
double dMax = ((Number) minMax.get(1).get(i)).doubleValue();
double maxSubMin = (dMax - dMin);
if (maxSubMin == 0)
maxSubMin = 1;
Column newCol = dataFrame.get().col(columnName).minus(dMin).divide(maxSubMin).multiply(max - min).plus(min);
dataFrame = dataRows(dataFrame.get().withColumn(columnName, newCol));
}
return dataFrame;
}
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:30,代码来源:Normalization.java
示例3: writeParquetFile
import org.apache.spark.sql.Column; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeParquetFile(int[] baseColumnMap, int[] partitionBy, String location, String compression,
OperationContext context) {
try {
Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
context.getOperation().getExecRowDefinition().schema());
List<Column> cols = new ArrayList();
for (int i = 0; i < baseColumnMap.length; i++) {
cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
}
List<String> partitionByCols = new ArrayList();
for (int i = 0; i < partitionBy.length; i++) {
partitionByCols.add(ValueRow.getNamedColumn(partitionBy[i]));
}
insertDF.write().option(SPARK_COMPRESSION_OPTION,compression).partitionBy(partitionByCols.toArray(new String[partitionByCols.size()]))
.mode(SaveMode.Append).parquet(location);
ValueRow valueRow=new ValueRow(1);
valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:26,代码来源:SparkDataSet.java
示例4: writeORCFile
import org.apache.spark.sql.Column; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeORCFile(int[] baseColumnMap, int[] partitionBy, String location, String compression,
OperationContext context) {
try {
Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
context.getOperation().getExecRowDefinition().schema());
List<Column> cols = new ArrayList();
for (int i = 0; i < baseColumnMap.length; i++) {
cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
}
String[] partitionByCols = new String[partitionBy.length];
for (int i = 0; i < partitionBy.length; i++) {
partitionByCols[i] = ValueRow.getNamedColumn(partitionBy[i]);
}
insertDF.write().option(SPARK_COMPRESSION_OPTION,compression)
.partitionBy(partitionByCols)
.mode(SaveMode.Append).orc(location);
ValueRow valueRow=new ValueRow(1);
valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:26,代码来源:SparkDataSet.java
示例5: writeTextFile
import org.apache.spark.sql.Column; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeTextFile(SpliceOperation op, String location, String characterDelimiter, String columnDelimiter,
int[] baseColumnMap,
OperationContext context) {
try {
Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
context.getOperation().getExecRowDefinition().schema());
List<Column> cols = new ArrayList();
for (int i = 0; i < baseColumnMap.length; i++) {
cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
}
// spark-2.2.0: commons-lang3-3.3.2 does not support 'XXX' timezone, specify 'ZZ' instead
insertDF.write().option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ")
.mode(SaveMode.Append).csv(location);
ValueRow valueRow=new ValueRow(1);
valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:24,代码来源:SparkDataSet.java
示例6: toColumn
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Convert a list of string names
* to columns
* @param columns the columns to convert
* @return the resulting column list
*/
public static List<Column> toColumn(List<String> columns) {
List<Column> ret = new ArrayList<>();
for (String s : columns)
ret.add(col(s));
return ret;
}
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:13,代码来源:DataFrames.java
示例7: toColumns
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Convert an array of strings
* to column names
* @param columns the columns to convert
* @return the converted columns
*/
public static Column[] toColumns(String... columns) {
Column[] ret = new Column[columns.length];
for (int i = 0; i < columns.length; i++)
ret[i] = col(columns[i]);
return ret;
}
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:13,代码来源:DataFrames.java
示例8: toColumnArray
import org.apache.spark.sql.Column; //导入依赖的package包/类
public static Column[] toColumnArray(List<String> columnList) {
Column[] columnArray = new Column[columnList.size()];
for (int i = 0; i < columnList.size(); i++) {
columnArray[i] = new Column(columnList.get(i));
}
return columnArray;
}
开发者ID:cloudera-labs,项目名称:envelope,代码行数:8,代码来源:RowUtils.java
示例9: windows
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Spark implementation of Window function,
* We convert the derby specification using SparkWindow helper
* Most of the specifications is identical to Spark except the one position index
* and some specific functions. Look at SparkWindow for more
* @param windowContext
* @param context
* @param pushScope
* @param scopeDetail
* @return
*/
public DataSet<V> windows(WindowContext windowContext, OperationContext context, boolean pushScope, String scopeDetail) {
pushScopeIfNeeded(context, pushScope, scopeDetail);
try {
Dataset<Row> dataset = toSparkRow(this,context);
for(WindowAggregator aggregator : windowContext.getWindowFunctions()) {
// we need to remove to convert resultColumnId from a 1 position index to a 0position index
DataType resultDataType = dataset.schema().fields()[aggregator.getResultColumnId()-1].dataType();
// We define the window specification and we get a back a spark.
// Simply provide all the information and spark window will build it for you
Column col = SparkWindow.partitionBy(aggregator.getPartitions())
.function(aggregator.getType())
.inputs(aggregator.getInputColumnIds())
.orderBy(aggregator.getOrderings())
.frameBoundary(aggregator.getFrameDefinition())
.specificArgs(aggregator.getFunctionSpecificArgs())
.resultColumn(aggregator.getResultColumnId())
.resultDataType(resultDataType)
.toColumn();
// Now we replace the result column by the spark specification.
// the result column is already define by derby. We need to replace it
dataset = dataset.withColumn(ValueRow.getNamedColumn(aggregator.getResultColumnId()-1),col);
}
//Convert back to Splice Row
return toSpliceLocatedRow(dataset, context);
} catch (Exception se){
throw new RuntimeException(se);
}finally {
if (pushScope) context.popScope();
}
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:46,代码来源:SparkDataSet.java
示例10: writeAvroFile
import org.apache.spark.sql.Column; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeAvroFile(int[] baseColumnMap, int[] partitionBy, String location, String compression,
OperationContext context) {
try {
StructType schema = AvroUtils.supportAvroDateType(context.getOperation().getExecRowDefinition().schema(),"a");
Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowAvroFunction()),
schema);
List<Column> cols = new ArrayList();
for (int i = 0; i < baseColumnMap.length; i++) {
cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
}
List<String> partitionByCols = new ArrayList();
for (int i = 0; i < partitionBy.length; i++) {
partitionByCols.add(ValueRow.getNamedColumn(partitionBy[i]));
}
insertDF.write().option(SPARK_COMPRESSION_OPTION,compression).partitionBy(partitionByCols.toArray(new String[partitionByCols.size()]))
.mode(SaveMode.Append).format("com.databricks.spark.avro").save(location);
ValueRow valueRow=new ValueRow(1);
valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:29,代码来源:SparkDataSet.java
示例11: convertSortColumns
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Convert Sort Columns, convert to 0-based index
* @param sortColumns
* @return
*/
public static scala.collection.mutable.Buffer<Column> convertSortColumns(ColumnOrdering[] sortColumns){
return Arrays
.stream(sortColumns)
.map(column -> column.getIsAscending() ?
(column.getIsNullsOrderedLow() ? asc_nulls_first(ValueRow.getNamedColumn(column.getColumnId()-1)) :
asc_nulls_last(ValueRow.getNamedColumn(column.getColumnId()-1))) :
(column.getIsNullsOrderedLow() ? desc_nulls_last(ValueRow.getNamedColumn(column.getColumnId()-1)) :
desc_nulls_first(ValueRow.getNamedColumn(column.getColumnId()-1))))
.collect(Collectors.collectingAndThen(Collectors.toList(), JavaConversions::asScalaBuffer));
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:18,代码来源:SparkUtils.java
示例12: convertPartitions
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* Convert partition to Spark dataset columns
* Ignoring partition
* @param sortColumns
* @return
*/
public static scala.collection.mutable.Buffer<Column> convertPartitions(ColumnOrdering[] sortColumns){
return Arrays
.stream(sortColumns)
.map(column -> col(ValueRow.getNamedColumn(column.getColumnId()-1)))
.collect(Collectors.collectingAndThen(Collectors.toList(), JavaConversions::asScalaBuffer));
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:14,代码来源:SparkUtils.java
示例13: transform
import org.apache.spark.sql.Column; //导入依赖的package包/类
@Override
public Dataset<Row> transform(Dataset<?> dataset){
StructType schema = dataset.schema();
StructType structSchema = getStructSchema(schema);
Column structColumn = dataset.apply(DatasetUtil.escapeColumnName(getStructCol()));
Dataset<Row> result = dataset.toDF();
StructField[] fields = structSchema.fields();
for(StructField field : fields){
String name = field.name();
Column fieldColumn = structColumn.getField(DatasetUtil.escapeColumnName(name));
result = result.withColumn(DatasetUtil.escapeColumnName(name), fieldColumn);
}
return result;
}
开发者ID:jeremyore,项目名称:spark-pmml-import,代码行数:22,代码来源:ColumnExploder.java
示例14: getSongbyTrackID
import org.apache.spark.sql.Column; //导入依赖的package包/类
public List<String> getSongbyTrackID(String trackID) {
Dataset<Row> result = InitSpark.spark.sql("SELECT * FROM song limit 100").filter(new Column("trackID").equalTo(trackID));
return result.toJSON().collectAsList();
}
开发者ID:alikemalocalan,项目名称:Spark-API,代码行数:6,代码来源:Recommendation.java
示例15: execute
import org.apache.spark.sql.Column; //导入依赖的package包/类
@SuppressWarnings("deprecation")
private void execute() {
SparkConf conf = new SparkConf();
conf.setAppName("cassandra-spark-poc");
conf.setMaster("local[*]");
SparkContext sparkContext = new SparkContext(conf);
System.out.println(sparkContext);
SparkSession sparkSession = SparkSession.builder().appName("cassandra-spark-poc").master("local[*]")
.getOrCreate();
SQLContext sqlContext = new SQLContext(sparkSession);
Map<String, String> options = new HashMap<String, String>();
options.put("keyspace", "wootag");
options.put("table", "video_view");
Dataset<Row> dataset = sqlContext.read().format("org.apache.spark.sql.cassandra").options(options).load()
.cache();
dataset.registerTempTable("temptable");
String query = "select video_id, view_duration_in_second, count(*) from temptable group by 1, 2";
List<Row> collectAsList = sqlContext.sql(query).collectAsList();
for (Row row : collectAsList) {
System.out.println(row.get(0) + "," + row.get(1) + "," + row.get(2));
}
// sqlContext.sql(query).show(1000);
long startTime = 1485907200000L;
long endTime = 1487226374000L;
for (long i = startTime; i <= endTime; i = i + TimeUnit.DAYS.toMillis(1)) {
dataset.filter(new Column("event_start_timestamp").geq(i))
.filter(new Column("event_start_timestamp").leq(i + TimeUnit.DAYS.toMillis(1)))
.groupBy(new Column("view_duration_in_second"), new Column("video_id")).count()
.orderBy("view_duration_in_second").show(1000);
sleepDelay();
}
}
开发者ID:alokawi,项目名称:spark-cassandra-poc,代码行数:48,代码来源:SparkCassandraUtils.java
示例16: testDataFrameConversions
import org.apache.spark.sql.Column; //导入依赖的package包/类
@Test
public void testDataFrameConversions() {
List<List<Writable>> data = new ArrayList<>();
Schema.Builder builder = new Schema.Builder();
int numColumns = 6;
for (int i = 0; i < numColumns; i++)
builder.addColumnDouble(String.valueOf(i));
for (int i = 0; i < 5; i++) {
List<Writable> record = new ArrayList<>(numColumns);
data.add(record);
for (int j = 0; j < numColumns; j++) {
record.add(new DoubleWritable(1.0));
}
}
Schema schema = builder.build();
JavaRDD<List<Writable>> rdd = sc.parallelize(data);
assertEquals(schema, DataFrames.fromStructType(DataFrames.fromSchema(schema)));
assertEquals(rdd.collect(), DataFrames.toRecords(DataFrames.toDataFrame(schema, rdd)).getSecond().collect());
DataRowsFacade dataFrame = DataFrames.toDataFrame(schema, rdd);
dataFrame.get().show();
Column mean = DataFrames.mean(dataFrame, "0");
Column std = DataFrames.std(dataFrame, "0");
dataFrame.get().withColumn("0", dataFrame.get().col("0").minus(mean)).show();
dataFrame.get().withColumn("0", dataFrame.get().col("0").divide(std)).show();
/* DataFrame desc = dataFrame.describe(dataFrame.columns());
dataFrame.show();
System.out.println(dataFrame.agg(avg("0"), dataFrame.col("0")));
dataFrame.withColumn("0",dataFrame.col("0").minus(avg(dataFrame.col("0"))));
dataFrame.show();
for(String column : dataFrame.columns()) {
System.out.println(DataFrames.mean(desc,column));
System.out.println(DataFrames.min(desc,column));
System.out.println(DataFrames.max(desc,column));
System.out.println(DataFrames.std(desc,column));
}*/
}
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:45,代码来源:DataFramesTests.java
示例17: partition
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* SPARK Vertical Partitioner.
* @param inputPath
* @param outputPath
*/
public static void partition(String inputPath, String outputPath) {
long lStartTime = System.nanoTime();
SparkConf sparkConf = new SparkConf().setAppName("JavaSparkSQL").setMaster("local");
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
SQLContext sqlContext = new SQLContext(ctx);
System.out.println("=== Data source: RDD ===");
@SuppressWarnings("serial")
JavaRDD<RDFgraph> RDF = ctx.textFile(inputPath + "/*").map(new Function<String, RDFgraph>() {
@Override
public RDFgraph call(String line) {
String[] parts = line.split(" (?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
RDFgraph entry = new RDFgraph();
if (parts.length > 2) {
entry.setSubject(parts[0]);
entry.setPredicate(parts[1]);
entry.setObject(parts[2]);
}
return entry;
}
});
DataFrame rawGraph = sqlContext.createDataFrame(RDF, RDFgraph.class);
rawGraph.registerTempTable("rawGraph");
int numPredicates = sqlContext
.sql("SELECT predicate FROM rawGraph WHERE subject != '@prefix' GROUP BY predicate").collect().length;
DataFrame pureGraph = sqlContext
.sql("SELECT subject, predicate, object FROM rawGraph WHERE subject != '@prefix'");
DataFrame partitionedGraph = pureGraph.repartition(numPredicates, new Column("predicate"));
partitionedGraph.write().parquet(outputPath);
long lEndTime = System.nanoTime();
long difference = lEndTime - lStartTime;
System.out.println("Partitioning complete.\nElapsed milliseconds: " + difference / 1000000);
}
开发者ID:martinpz,项目名称:TriAL-QL-Engine,代码行数:51,代码来源:VerticalPartitionerSpark.java
示例18: testQueryFieldsFromIgnite
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* @throws Exception If failed.
*/
public void testQueryFieldsFromIgnite() throws Exception {
JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
try {
JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME);
cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F));
Dataset<Row> df =
cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
df.printSchema();
Row[] res = (Row[])df.collect();
assertEquals("Invalid result length", 1, res.length);
assertEquals("Invalid result", 50, res[0].get(0));
assertEquals("Invalid result", "name50", res[0].get(1));
assertEquals("Invalid result", 5000, res[0].get(2));
Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
df.printSchema();
Row[] res0 = (Row[])df0.collect();
assertEquals("Invalid result length", 1, res0.length);
assertEquals("Invalid result", 50, res0[0].get(0));
assertEquals("Invalid result", "name50", res0[0].get(1));
assertEquals("Invalid result", 5000, res0[0].get(2));
assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
}
finally {
sc.stop();
}
}
开发者ID:apache,项目名称:ignite,代码行数:45,代码来源:JavaStandaloneIgniteRDDSelfTest.java
示例19: testQueryFieldsFromIgnite
import org.apache.spark.sql.Column; //导入依赖的package包/类
/**
* @throws Exception If failed.
*/
public void testQueryFieldsFromIgnite() throws Exception {
JavaSparkContext sc = createContext();
JavaIgniteContext<String, Entity> ic = null;
try {
ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME);
cache.savePairs(sc.parallelize(F.range(0, 1001), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true);
Dataset<Row> df =
cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
df.printSchema();
Row[] res = (Row[])df.collect();
assertEquals("Invalid result length", 1, res.length);
assertEquals("Invalid result", 50, res[0].get(0));
assertEquals("Invalid result", "name50", res[0].get(1));
assertEquals("Invalid result", 5000, res[0].get(2));
Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
df.printSchema();
Row[] res0 = (Row[])df0.collect();
assertEquals("Invalid result length", 1, res0.length);
assertEquals("Invalid result", 50, res0[0].get(0));
assertEquals("Invalid result", "name50", res0[0].get(1));
assertEquals("Invalid result", 5000, res0[0].get(2));
assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
}
finally {
if (ic != null)
ic.close(true);
sc.stop();
}
}
开发者ID:apache,项目名称:ignite,代码行数:50,代码来源:JavaEmbeddedIgniteRDDSelfTest.java
示例20: testFoobar
import org.apache.spark.sql.Column; //导入依赖的package包/类
@Test
public void testFoobar() {
List<Row> foo = new ArrayList();
for (int i = 0; i< 10; i++) {
ValueRow row = new ValueRow(1);
row.setColumn(1,new SQLInteger(i));
foo.add(row);
}
StructType schema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("col1", DataTypes.IntegerType, true)});
// ValueRow row = new ValueRow(2);
// row.setColumn(1,new SQLDouble());
// row.setColumn(2,new SQLInteger());
/*
SpliceSpark.getSession().read().parquet("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/simple_parquet")
.select(new Column("0"),new Column("1"))
.filter(col("0").gt(1).or(col("0").lt(4))).explain(true);
*/
SpliceSpark.getSessionUnsafe().createDataFrame(foo,schema).write().format("orc").mode(SaveMode.Append)
.orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it");
Column filter = (new Column("col1")).gt(1l).and(new Column("col1").lt(1l));
SpliceSpark.getSessionUnsafe().read().schema(schema)
.orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it")
.filter(filter).show();
// .select(new Column("0"),new Column("1")).show();
/*
Dataset<Row> leftSide = SpliceSpark.getSession().createDataFrame(foo,foo.get(0).schema());
Dataset<Row> rightSide = SpliceSpark.getSession().createDataFrame(foo.subList(0,8),foo.get(0).schema());
Column col =
(leftSide.col("0").equalTo(rightSide.col("0"))).
and((leftSide.col("1")).equalTo(rightSide.col("1")));
leftSide.join(rightSide,col,"inner").explain(true);
leftSide.join(rightSide,col,"inner").show(10);
leftSide.join(broadcast(rightSide),col,"leftouter").explain(true);
leftSide.join(broadcast(rightSide),col,"leftouter").show(10);
leftSide.join(broadcast(rightSide),col,"leftanti").show(10);
*/
}
开发者ID:splicemachine,项目名称:spliceengine,代码行数:46,代码来源:SparkDataSetTest.java
注:本文中的org.apache.spark.sql.Column类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论