public class FrameRDDConverterUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction |
static class |
FrameRDDConverterUtils.LongWritableFrameToLongFrameFunction |
static class |
FrameRDDConverterUtils.LongWritableTextToLongTextFunction |
static class |
FrameRDDConverterUtils.LongWritableToSerFunction |
Constructor and Description |
---|
FrameRDDConverterUtils() |
Modifier and Type | Method and Description |
---|---|
static org.apache.spark.api.java.JavaRDD<String> |
binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
DataCharacteristics mcIn,
FileFormatPropertiesCSV props,
boolean strict) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
DataCharacteristics mc,
Types.ValueType[] schema) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext,
org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
DataCharacteristics mc,
Types.ValueType[] schema)
Deprecated.
|
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
DataCharacteristics mcIn,
DataCharacteristics mcOut) |
static org.apache.spark.api.java.JavaRDD<String> |
binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
DataCharacteristics mcIn) |
static int |
convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
String[] colnames,
Types.ValueType[] fschema,
boolean containsID)
NOTE: regarding the support of vector columns, we make the following
schema restriction: single vector column, which allows inference of
the vector length without data access and covers the common case.
|
static org.apache.spark.sql.types.StructType |
convertFrameSchemaToDFSchema(Types.ValueType[] fschema,
boolean containsID)
This function will convert Frame schema into DataFrame schema
|
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
DataCharacteristics mc,
Types.ValueType[] schema,
boolean hasHeader,
String delim,
boolean fill,
double fillValue,
Set<String> naStrings) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaRDD<String> input,
DataCharacteristics mcOut,
Types.ValueType[] schema,
boolean hasHeader,
String delim,
boolean fill,
double fillValue,
Set<String> naStrings) |
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> |
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaRDD<String> dataRdd,
String delim,
Types.ValueType[] schema) |
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> |
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
String fnameIn,
String delim,
Types.ValueType[] schema) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
DataCharacteristics mc,
boolean containsID) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
DataCharacteristics mc,
boolean containsID,
Pair<String[],Types.ValueType[]> out) |
static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> |
matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
DataCharacteristics mcIn) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
DataCharacteristics dcIn) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in,
DataCharacteristics mcOut,
Types.ValueType[] schema) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input,
DataCharacteristics mc,
Types.ValueType[] schema) |
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> input, DataCharacteristics mcOut, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mcIn, FileFormatPropertiesCSV props, boolean strict)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in, DataCharacteristics mcOut, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics dcIn)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn, DataCharacteristics mcOut)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID, Pair<String[],Types.ValueType[]> out)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema)
@Deprecated public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.sql.types.StructType convertFrameSchemaToDFSchema(Types.ValueType[] fschema, boolean containsID)
fschema
- frame schemacontainsID
- true if contains ID columnpublic static int convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema, String[] colnames, Types.ValueType[] fschema, boolean containsID)
dfschema
- schema as StructTypecolnames
- column namesfschema
- array of SystemDS ValueTypescontainsID
- if true, contains ID columnpublic static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc, String fnameIn, String delim, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> dataRdd, String delim, Types.ValueType[] schema)
Copyright © 2020 The Apache Software Foundation. All rights reserved.