public class SparkExecutionContext extends ExecutionContext
Modifier and Type | Class and Description |
---|---|
static class |
SparkExecutionContext.SparkClusterConfig
Captures relevant spark cluster configuration properties, e.g., memory budgets and
degree of parallelism.
|
Modifier and Type | Field and Description |
---|---|
static boolean |
FAIR_SCHEDULER_MODE |
Modifier and Type | Method and Description |
---|---|
void |
addLineage(String varParent,
String varChild,
boolean broadcast) |
void |
addLineageBroadcast(String varParent,
String varChild)
Adds a child broadcast object to the lineage of a parent rdd.
|
void |
addLineageRDD(String varParent,
String varChild)
Adds a child rdd object to the lineage of a parent rdd.
|
org.apache.spark.broadcast.Broadcast<CacheBlock> |
broadcastVariable(CacheableData<CacheBlock> cd) |
void |
cacheMatrixObject(String var) |
static void |
cleanupBroadcastVariable(org.apache.spark.broadcast.Broadcast<?> bvar)
This call destroys a broadcast variable at all executors and the driver.
|
void |
cleanupCacheableData(CacheableData<?> mo) |
static void |
cleanupRDDVariable(org.apache.spark.api.java.JavaPairRDD<?,?> rvar)
This call removes an rdd variable from executor memory and disk if required.
|
void |
cleanupThreadLocalSchedulerPool(int pool) |
void |
close() |
static org.apache.spark.SparkConf |
createSystemDSSparkConf()
Sets up a SystemDS-preferred Spark configuration based on the implicit
default configuration (as passed via configurations from outside).
|
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
getBinaryMatrixBlockRDDHandleForVariable(String varname)
Spark instructions should call this for all matrix inputs except broadcast
variables.
|
org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
getBinaryMatrixBlockRDDHandleForVariable(String varname,
int numParts,
boolean inclEmpty) |
org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> |
getBinaryTensorBlockRDDHandleForVariable(String varname)
Spark instructions should call this for all tensor inputs except broadcast
variables.
|
org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> |
getBinaryTensorBlockRDDHandleForVariable(String varname,
int numParts,
boolean inclEmpty) |
PartitionedBroadcast<FrameBlock> |
getBroadcastForFrameVariable(String varname) |
PartitionedBroadcast<MatrixBlock> |
getBroadcastForMatrixObject(MatrixObject mo) |
PartitionedBroadcast<TensorBlock> |
getBroadcastForTensorObject(TensorObject to) |
PartitionedBroadcast<TensorBlock> |
getBroadcastForTensorVariable(String varname) |
PartitionedBroadcast<MatrixBlock> |
getBroadcastForVariable(String varname) |
static double |
getBroadcastMemoryBudget()
Obtains the available memory budget for broadcast variables in bytes.
|
static double |
getDataMemoryBudget(boolean min,
boolean refresh)
Obtain the available memory budget for data storage in bytes.
|
static int |
getDefaultParallelism(boolean refresh)
Obtain the default degree of parallelism (cores in the cluster).
|
org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
getFrameBinaryBlockRDDHandleForVariable(String varname)
Spark instructions should call this for all frame inputs except broadcast
variables.
|
static int |
getNumExecutors()
Obtain the number of executors in the cluster (excluding the driver).
|
org.apache.spark.api.java.JavaPairRDD<?,?> |
getRDDHandleForFrameObject(FrameObject fo,
Types.FileFormat fmt)
FIXME: currently this implementation assumes matrix representations but frame signature
in order to support the old transform implementation.
|
org.apache.spark.api.java.JavaPairRDD<?,?> |
getRDDHandleForMatrixObject(MatrixObject mo,
Types.FileFormat fmt) |
org.apache.spark.api.java.JavaPairRDD<?,?> |
getRDDHandleForMatrixObject(MatrixObject mo,
Types.FileFormat fmt,
int numParts,
boolean inclEmpty) |
org.apache.spark.api.java.JavaPairRDD<?,?> |
getRDDHandleForTensorObject(TensorObject to,
Types.FileFormat fmt,
int numParts,
boolean inclEmpty) |
org.apache.spark.api.java.JavaPairRDD<?,?> |
getRDDHandleForVariable(String varname,
Types.FileFormat fmt,
int numParts,
boolean inclEmpty) |
static SparkExecutionContext.SparkClusterConfig |
getSparkClusterConfig()
Obtains the lazily analyzed spark cluster configuration.
|
org.apache.spark.api.java.JavaSparkContext |
getSparkContext()
Returns the used singleton spark context.
|
static org.apache.spark.api.java.JavaSparkContext |
getSparkContextStatic() |
static boolean |
isLazySparkContextCreation() |
static boolean |
isLocalMaster() |
boolean |
isRDDCached(int rddID) |
static boolean |
isSparkContextCreated()
Indicates if the spark context has been created or has
been passed in from outside.
|
void |
repartitionAndCacheMatrixObject(String var) |
static void |
resetSparkContextStatic() |
void |
setRDDHandleForVariable(String varname,
org.apache.spark.api.java.JavaPairRDD<?,?> rdd)
Keep the output rdd of spark rdd operations as meta data of matrix/frame
objects in the symbol table.
|
int |
setThreadLocalSchedulerPool() |
static FrameBlock |
toFrameBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> rdd,
Types.ValueType[] schema,
int rlen,
int clen) |
static FrameBlock |
toFrameBlock(RDDObject rdd,
Types.ValueType[] schema,
int rlen,
int clen) |
static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> |
toFrameJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc,
FrameBlock src) |
static MatrixBlock |
toMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> rdd,
int rlen,
int clen,
int blen,
long nnz)
Utility method for creating a single matrix block out of a binary block RDD.
|
static MatrixBlock |
toMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixCell> rdd,
int rlen,
int clen,
long nnz)
Utility method for creating a single matrix block out of a binary cell RDD.
|
static MatrixBlock |
toMatrixBlock(RDDObject rdd,
int rlen,
int clen,
int blen,
long nnz)
This method is a generic abstraction for calls from the buffer pool.
|
static MatrixBlock |
toMatrixBlock(RDDObject rdd,
int rlen,
int clen,
long nnz) |
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
toMatrixJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc,
MatrixBlock src,
int blen) |
static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> |
toMatrixJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc,
MatrixBlock src,
int blen,
int numParts,
boolean inclEmpty) |
static PartitionedBlock<MatrixBlock> |
toPartitionedMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> rdd,
int rlen,
int clen,
int blen,
long nnz) |
static TensorBlock |
toTensorBlock(org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> rdd,
DataCharacteristics dc) |
static org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> |
toTensorJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc,
TensorBlock src,
int blen) |
static org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> |
toTensorJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc,
TensorBlock src,
int blen,
int numParts,
boolean inclEmpty) |
static void |
writeFrameRDDtoHDFS(RDDObject rdd,
String path,
Types.FileFormat fmt) |
static long |
writeMatrixRDDtoHDFS(RDDObject rdd,
String path,
Types.FileFormat fmt) |
allocateGPUMatrixObject, cleanupDataObject, containsVariable, containsVariable, createCacheableData, createFrameObject, createMatrixObject, getCacheableData, getCacheableData, getDataCharacteristics, getDenseMatrixOutputForGPUInstruction, getFrameInput, getFrameObject, getFrameObject, getGPUContext, getGPUContexts, getLineage, getLineageItem, getListObject, getListObject, getMatrixInput, getMatrixInputForGPUInstruction, getMatrixInputs, getMatrixInputs, getMatrixObject, getMatrixObject, getMetaData, getNumGPUContexts, getOrCreateLineageItem, getProgram, getScalarInput, getScalarInput, getScalarInputs, getSparseMatrixOutputForGPUInstruction, getTensorInput, getTensorObject, getTID, getVariable, getVariable, getVariables, getVarList, getVarListPartitioned, isAutoCreateVars, isFrameObject, isMatrixObject, pinVariables, releaseCacheableData, releaseFrameInput, releaseMatrixInput, releaseMatrixInput, releaseMatrixInputForGPUInstruction, releaseMatrixInputs, releaseMatrixInputs, releaseMatrixOutputForGPUInstruction, releaseTensorInput, releaseTensorInput, removeVariable, setAutoCreateVars, setFrameOutput, setGPUContexts, setLineage, setMatrixOutput, setMatrixOutput, setMetaData, setMetaData, setProgram, setScalarOutput, setTensorOutput, setTID, setVariable, setVariables, toString, traceLineage, unpinVariables
public static final boolean FAIR_SCHEDULER_MODE
public org.apache.spark.api.java.JavaSparkContext getSparkContext()
public static org.apache.spark.api.java.JavaSparkContext getSparkContextStatic()
public static boolean isSparkContextCreated()
public static void resetSparkContextStatic()
public void close()
public static boolean isLazySparkContextCreation()
public static org.apache.spark.SparkConf createSystemDSSparkConf()
public static boolean isLocalMaster()
public org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> getBinaryMatrixBlockRDDHandleForVariable(String varname)
varname
- variable namepublic org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> getBinaryMatrixBlockRDDHandleForVariable(String varname, int numParts, boolean inclEmpty)
public org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> getBinaryTensorBlockRDDHandleForVariable(String varname)
varname
- variable namepublic org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> getBinaryTensorBlockRDDHandleForVariable(String varname, int numParts, boolean inclEmpty)
public org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> getFrameBinaryBlockRDDHandleForVariable(String varname)
varname
- variable namepublic org.apache.spark.api.java.JavaPairRDD<?,?> getRDDHandleForVariable(String varname, Types.FileFormat fmt, int numParts, boolean inclEmpty)
public org.apache.spark.api.java.JavaPairRDD<?,?> getRDDHandleForMatrixObject(MatrixObject mo, Types.FileFormat fmt)
public org.apache.spark.api.java.JavaPairRDD<?,?> getRDDHandleForMatrixObject(MatrixObject mo, Types.FileFormat fmt, int numParts, boolean inclEmpty)
public org.apache.spark.api.java.JavaPairRDD<?,?> getRDDHandleForTensorObject(TensorObject to, Types.FileFormat fmt, int numParts, boolean inclEmpty)
public org.apache.spark.api.java.JavaPairRDD<?,?> getRDDHandleForFrameObject(FrameObject fo, Types.FileFormat fmt)
fo
- frame objectfmt
- file format typepublic org.apache.spark.broadcast.Broadcast<CacheBlock> broadcastVariable(CacheableData<CacheBlock> cd)
public PartitionedBroadcast<MatrixBlock> getBroadcastForMatrixObject(MatrixObject mo)
public PartitionedBroadcast<TensorBlock> getBroadcastForTensorObject(TensorObject to)
public PartitionedBroadcast<MatrixBlock> getBroadcastForVariable(String varname)
public PartitionedBroadcast<TensorBlock> getBroadcastForTensorVariable(String varname)
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname)
public void setRDDHandleForVariable(String varname, org.apache.spark.api.java.JavaPairRDD<?,?> rdd)
varname
- variable namerdd
- JavaPairRDD handle for variablepublic static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> toMatrixJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc, MatrixBlock src, int blen)
public static org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> toMatrixJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc, MatrixBlock src, int blen, int numParts, boolean inclEmpty)
public static org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> toTensorJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc, TensorBlock src, int blen)
public static org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> toTensorJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc, TensorBlock src, int blen, int numParts, boolean inclEmpty)
public static org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> toFrameJavaPairRDD(org.apache.spark.api.java.JavaSparkContext sc, FrameBlock src)
public static MatrixBlock toMatrixBlock(RDDObject rdd, int rlen, int clen, int blen, long nnz)
rdd
- rdd objectrlen
- number of rowsclen
- number of columnsblen
- block lengthnnz
- number of non-zerospublic static MatrixBlock toMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> rdd, int rlen, int clen, int blen, long nnz)
rdd
- JavaPairRDD for matrix blockrlen
- number of rowsclen
- number of columnsblen
- block lengthnnz
- number of non-zerospublic static MatrixBlock toMatrixBlock(RDDObject rdd, int rlen, int clen, long nnz)
public static MatrixBlock toMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixCell> rdd, int rlen, int clen, long nnz)
rdd
- JavaPairRDD for matrix blockrlen
- number of rowsclen
- number of columnsnnz
- number of non-zerospublic static TensorBlock toTensorBlock(org.apache.spark.api.java.JavaPairRDD<TensorIndexes,TensorBlock> rdd, DataCharacteristics dc)
public static PartitionedBlock<MatrixBlock> toPartitionedMatrixBlock(org.apache.spark.api.java.JavaPairRDD<MatrixIndexes,MatrixBlock> rdd, int rlen, int clen, int blen, long nnz)
public static FrameBlock toFrameBlock(RDDObject rdd, Types.ValueType[] schema, int rlen, int clen)
public static FrameBlock toFrameBlock(org.apache.spark.api.java.JavaPairRDD<Long,FrameBlock> rdd, Types.ValueType[] schema, int rlen, int clen)
public static long writeMatrixRDDtoHDFS(RDDObject rdd, String path, Types.FileFormat fmt)
public static void writeFrameRDDtoHDFS(RDDObject rdd, String path, Types.FileFormat fmt)
public void addLineageRDD(String varParent, String varChild)
varParent
- parent variablevarChild
- child variablepublic void addLineageBroadcast(String varParent, String varChild)
varParent
- parent variablevarChild
- child variablepublic void cleanupCacheableData(CacheableData<?> mo)
cleanupCacheableData
in class ExecutionContext
public static void cleanupBroadcastVariable(org.apache.spark.broadcast.Broadcast<?> bvar)
bvar
- broadcast variablepublic static void cleanupRDDVariable(org.apache.spark.api.java.JavaPairRDD<?,?> rvar)
rvar
- rdd variable to removepublic void repartitionAndCacheMatrixObject(String var)
public void cacheMatrixObject(String var)
public int setThreadLocalSchedulerPool()
public void cleanupThreadLocalSchedulerPool(int pool)
public boolean isRDDCached(int rddID)
public static SparkExecutionContext.SparkClusterConfig getSparkClusterConfig()
public static double getBroadcastMemoryBudget()
public static double getDataMemoryBudget(boolean min, boolean refresh)
min
- flag for minimum data budgetrefresh
- flag for refresh with spark contextpublic static int getNumExecutors()
public static int getDefaultParallelism(boolean refresh)
refresh
- flag for refresh with spark contextCopyright © 2020 The Apache Software Foundation. All rights reserved.