public class FrameRDDConverterUtils extends Object
| Modifier and Type | Class and Description | 
|---|---|
static class  | 
FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction  | 
static class  | 
FrameRDDConverterUtils.LongWritableFrameToLongFrameFunction  | 
static class  | 
FrameRDDConverterUtils.LongWritableTextToLongTextFunction  | 
static class  | 
FrameRDDConverterUtils.LongWritableToSerFunction  | 
| Constructor and Description | 
|---|
FrameRDDConverterUtils()  | 
| Modifier and Type | Method and Description | 
|---|---|
static org.apache.spark.api.java.JavaRDD<String> | 
binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                DataCharacteristics mcIn,
                FileFormatPropertiesCSV props,
                boolean strict)  | 
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | 
binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                      org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                      DataCharacteristics mc,
                      Types.ValueType[] schema)  | 
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> | 
binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext,
                      org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                      DataCharacteristics mc,
                      Types.ValueType[] schema)
Deprecated.  
 | 
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> | 
binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
                        DataCharacteristics mcIn,
                        DataCharacteristics mcOut)  | 
static org.apache.spark.api.java.JavaRDD<String> | 
binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input,
                     DataCharacteristics mcIn)  | 
static int | 
convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
                            String[] colnames,
                            Types.ValueType[] fschema,
                            boolean containsID)
NOTE: regarding the support of vector columns, we make the following 
 schema restriction: single vector column, which allows inference of
 the vector length without data access and covers the common case. 
 | 
static org.apache.spark.sql.types.StructType | 
convertFrameSchemaToDFSchema(Types.ValueType[] fschema,
                            boolean containsID)
This function will convert Frame schema into DataFrame schema 
 | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input,
                DataCharacteristics mc,
                Types.ValueType[] schema,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                org.apache.spark.api.java.JavaRDD<String> input,
                DataCharacteristics mcOut,
                Types.ValueType[] schema,
                boolean hasHeader,
                String delim,
                boolean fill,
                double fillValue,
                Set<String> naStrings)  | 
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> | 
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
           org.apache.spark.api.java.JavaRDD<String> dataRdd,
           String delim,
           Types.ValueType[] schema)  | 
static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> | 
csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
           String fnameIn,
           String delim,
           Types.ValueType[] schema)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                      org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
                      DataCharacteristics mc,
                      boolean containsID)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                      org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
                      DataCharacteristics mc,
                      boolean containsID,
                      Pair<String[],Types.ValueType[]> out)  | 
static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> | 
matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                        org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
                        DataCharacteristics mcIn)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
                                 org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input,
                                 DataCharacteristics dcIn)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
                     org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in,
                     DataCharacteristics mcOut,
                     Types.ValueType[] schema)  | 
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> | 
textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc,
                              org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input,
                              DataCharacteristics mc,
                              Types.ValueType[] schema)  | 
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> csvToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaRDD<String> input, DataCharacteristics mcOut, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue, Set<String> naStrings)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToCsv(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mcIn, FileFormatPropertiesCSV props, boolean strict)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.Text> in, DataCharacteristics mcOut, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> textCellToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<Long,org.apache.hadoop.io.Text> input, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<String> binaryBlockToTextCell(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<org.apache.hadoop.io.LongWritable,FrameBlock> matrixBlockToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics mcIn)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> matrixBlockToBinaryBlockLongIndex(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> input, DataCharacteristics dcIn)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> binaryBlockToMatrixBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> input, DataCharacteristics mcIn, DataCharacteristics mcOut)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> dataFrameToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, DataCharacteristics mc, boolean containsID, Pair<String[],Types.ValueType[]> out)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SparkSession sparkSession,
                                                                                            org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in,
                                                                                            DataCharacteristics mc,
                                                                                            Types.ValueType[] schema)
@Deprecated public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> binaryBlockToDataFrame(org.apache.spark.sql.SQLContext sqlContext, org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema)
public static org.apache.spark.sql.types.StructType convertFrameSchemaToDFSchema(Types.ValueType[] fschema, boolean containsID)
fschema - frame schemacontainsID - true if contains ID columnpublic static int convertDFSchemaToFrameSchema(org.apache.spark.sql.types.StructType dfschema,
                                               String[] colnames,
                                               Types.ValueType[] fschema,
                                               boolean containsID)
dfschema - schema as StructTypecolnames - column namesfschema - array of SystemDS ValueTypescontainsID - if true, contains ID columnpublic static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
                                                                                      String fnameIn,
                                                                                      String delim,
                                                                                      Types.ValueType[] schema)
public static org.apache.spark.api.java.JavaRDD<org.apache.spark.sql.Row> csvToRowRDD(org.apache.spark.api.java.JavaSparkContext sc,
                                                                                      org.apache.spark.api.java.JavaRDD<String> dataRdd,
                                                                                      String delim,
                                                                                      Types.ValueType[] schema)
Copyright © 2021 The Apache Software Foundation. All rights reserved.