public class RDDConverterUtilsExt extends Object
Modifier and Type | Class and Description |
---|---|
static class |
RDDConverterUtilsExt.AddRowID |
static class |
RDDConverterUtilsExt.RDDConverterTypes |
Constructor and Description |
---|
RDDConverterUtilsExt() |
Modifier and Type | Method and Description |
---|---|
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
addIDToDataFrame(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
org.apache.spark.sql.SparkSession sparkSession,
String nameOfCol)
Add element indices as new column to DataFrame
|
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
coordinateMatrixToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc,
org.apache.spark.mllib.linalg.distributed.CoordinateMatrix input,
DataCharacteristics mcIn,
boolean outputEmptyBlocks) |
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
coordinateMatrixToBinaryBlock(org.apache.spark.SparkContext sc,
org.apache.spark.mllib.linalg.distributed.CoordinateMatrix input,
DataCharacteristics mcIn,
boolean outputEmptyBlocks) |
static void |
copyRowBlocks(MatrixBlock mb,
int rowIndex,
MatrixBlock ret,
int numRowsPerBlock,
int rlen,
int clen) |
static void |
copyRowBlocks(MatrixBlock mb,
int rowIndex,
MatrixBlock ret,
long numRowsPerBlock,
long rlen,
long clen) |
static void |
copyRowBlocks(MatrixBlock mb,
long rowIndex,
MatrixBlock ret,
int numRowsPerBlock,
int rlen,
int clen) |
static void |
copyRowBlocks(MatrixBlock mb,
long rowIndex,
MatrixBlock ret,
long numRowsPerBlock,
long rlen,
long clen) |
static void |
postProcessAfterCopying(MatrixBlock ret) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
projectColumns(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df,
ArrayList<String> columns) |
static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
stringDataFrameToVectorDataFrame(org.apache.spark.sql.SparkSession sparkSession,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> inputDF)
Convert a dataframe of comma-separated string rows to a dataframe of
ml.linalg.Vector rows.
|
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> coordinateMatrixToBinaryBlock(org.apache.spark.api.java.JavaSparkContext sc, org.apache.spark.mllib.linalg.distributed.CoordinateMatrix input, DataCharacteristics mcIn, boolean outputEmptyBlocks)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> coordinateMatrixToBinaryBlock(org.apache.spark.SparkContext sc, org.apache.spark.mllib.linalg.distributed.CoordinateMatrix input, DataCharacteristics mcIn, boolean outputEmptyBlocks)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> projectColumns(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, ArrayList<String> columns)
public static void copyRowBlocks(MatrixBlock mb, int rowIndex, MatrixBlock ret, int numRowsPerBlock, int rlen, int clen)
public static void copyRowBlocks(MatrixBlock mb, long rowIndex, MatrixBlock ret, int numRowsPerBlock, int rlen, int clen)
public static void copyRowBlocks(MatrixBlock mb, int rowIndex, MatrixBlock ret, long numRowsPerBlock, long rlen, long clen)
public static void copyRowBlocks(MatrixBlock mb, long rowIndex, MatrixBlock ret, long numRowsPerBlock, long rlen, long clen)
public static void postProcessAfterCopying(MatrixBlock ret)
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> addIDToDataFrame(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df, org.apache.spark.sql.SparkSession sparkSession, String nameOfCol)
df
- input data framesparkSession
- the Spark SessionnameOfCol
- name of index columnpublic static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> stringDataFrameToVectorDataFrame(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> inputDF)
Example input rows:
((1.2, 4.3, 3.4))
(1.2, 3.4, 2.2)
[[1.2, 34.3, 1.2, 1.25]]
[1.2, 3.4]
sparkSession
- Spark SessioninputDF
- dataframe of comma-separated row strings to convert to
dataframe of ml.linalg.Vector rowsCopyright © 2020 The Apache Software Foundation. All rights reserved.