public class FrameRDDConverterUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction |
static class |
FrameRDDConverterUtils.LongWritableFrameToLongFrameFunction |
static class |
FrameRDDConverterUtils.LongWritableTextToLongTextFunction |
static class |
FrameRDDConverterUtils.LongWritableToSerFunction |
Constructor and Description |
---|
FrameRDDConverterUtils() |
Modifier and Type | Method and Description |
---|---|
static org.apache.spark.api.java.JavaRDD<String> |
binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
MatrixCharacteristics mcIn,
org.apache.sysml.runtime.io.FileFormatPropertiesCSV props,
boolean strict) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
MatrixCharacteristics mc,
org.apache.sysml.parser.Expression.ValueType[] schema) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext,
org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
MatrixCharacteristics mc,
org.apache.sysml.parser.Expression.ValueType[] schema)
Deprecated.
|
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
MatrixCharacteristics mcIn,
MatrixCharacteristics mcOut) |
static org.apache.spark.api.java.JavaRDD<String> |
binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
MatrixCharacteristics mcIn) |
static int |
convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
String[] colnames,
org.apache.sysml.parser.Expression.ValueType[] fschema,
boolean containsID)
NOTE: regarding the support of vector columns, we make the following
schema restriction: single vector column, which allows inference of
the vector length without data access and covers the common case.
|
static org.apache.spark.sql.types.StructType |
convertFrameSchemaToDFSchema(org.apache.sysml.parser.Expression.ValueType[] fschema,
boolean containsID)
This function will convert Frame schema into DataFrame schema
|
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
MatrixCharacteristics mc,
org.apache.sysml.parser.Expression.ValueType[] schema,
boolean hasHeader,
String delim,
boolean fill,
double fillValue) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaRDD<String> input,
MatrixCharacteristics mcOut,
org.apache.sysml.parser.Expression.ValueType[] schema,
boolean hasHeader,
String delim,
boolean fill,
double fillValue) |
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> |
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaRDD<String> dataRdd,
String delim,
org.apache.sysml.parser.Expression.ValueType[] schema) |
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> |
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
String fnameIn,
String delim,
org.apache.sysml.parser.Expression.ValueType[] schema) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
MatrixCharacteristics mc,
boolean containsID) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
MatrixCharacteristics mc,
boolean containsID,
Pair<String[],org.apache.sysml.parser.Expression.ValueType[]> out) |
static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> |
matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
MatrixCharacteristics mcIn) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
MatrixCharacteristics mcIn) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in,
MatrixCharacteristics mcOut,
org.apache.sysml.parser.Expression.ValueType[] schema) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input,
MatrixCharacteristics mc,
org.apache.sysml.parser.Expression.ValueType[] schema) |
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, MatrixCharacteristics mc, org.apache.sysml.parser.Expression.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> input, MatrixCharacteristics mcOut, org.apache.sysml.parser.Expression.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, MatrixCharacteristics mcIn, org.apache.sysml.runtime.io.FileFormatPropertiesCSV props, boolean strict)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in, MatrixCharacteristics mcOut, org.apache.sysml.parser.Expression.ValueType[] schema)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input, MatrixCharacteristics mc, org.apache.sysml.parser.Expression.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, MatrixCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, MatrixCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, MatrixCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, MatrixCharacteristics mc, boolean containsID)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, MatrixCharacteristics mc, boolean containsID, Pair<String[],org.apache.sysml.parser.Expression.ValueType[]> out)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, MatrixCharacteristics mc, org.apache.sysml.parser.Expression.ValueType[] schema)
@Deprecated public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, MatrixCharacteristics mc, org.apache.sysml.parser.Expression.ValueType[] schema)
public static org.apache.spark.sql.types.StructType convertFrameSchemaToDFSchema(org.apache.sysml.parser.Expression.ValueType[] fschema, boolean containsID)
fschema
- frame schemacontainsID
- true if contains ID columnpublic static int convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema, String[] colnames, org.apache.sysml.parser.Expression.ValueType[] fschema, boolean containsID)
dfschema
- schema as StructTypecolnames
- column namesfschema
- array of SystemML ValueTypescontainsID
- if true, contains ID columnpublic static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc, String fnameIn, String delim, org.apache.sysml.parser.Expression.ValueType[] schema)
Copyright © 2018 The Apache Software Foundation. All rights reserved.