static void |
LibMatrixCUDA.abs(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "abs" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.acos(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "acos" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.asin(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "asin" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.atan(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "atan" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.axpy(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName,
double constant) |
Performs daxpy operation
|
static void |
LibMatrixCuDNN.batchNormalizationBackward(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject dout,
MatrixObject scale,
MatrixObject dX,
MatrixObject dScale,
MatrixObject dBias,
double epsilon,
MatrixObject resultSaveMean,
MatrixObject resultSaveInvVariance) |
This method computes the backpropagation errors for image, scale and bias of batch normalization layer
|
static void |
LibMatrixCuDNN.batchNormalizationForwardInference(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject scale,
MatrixObject bias,
MatrixObject runningMean,
MatrixObject runningVar,
MatrixObject ret,
double epsilon) |
Performs the forward BatchNormalization layer computation for inference
|
static void |
LibMatrixCuDNN.batchNormalizationForwardTraining(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject scale,
MatrixObject bias,
MatrixObject runningMean,
MatrixObject runningVar,
MatrixObject ret,
MatrixObject retRunningMean,
MatrixObject retRunningVar,
double epsilon,
double exponentialAverageFactor,
MatrixObject resultSaveMean,
MatrixObject resultSaveInvVariance) |
Performs the forward BatchNormalization layer computation for training
|
static void |
LibMatrixCUDA.biasAdd(GPUContext gCtx,
String instName,
MatrixObject input,
MatrixObject bias,
MatrixObject outputBlock) |
Performs the operation corresponding to the DML script:
ones = matrix(1, rows=1, cols=Hout*Wout)
output = input + matrix(bias %*% ones, rows=1, cols=F*Hout*Wout)
This operation is often followed by conv2d and hence we have introduced bias_add(input, bias) built-in function
|
static void |
LibMatrixCUDA.biasMultiply(GPUContext gCtx,
String instName,
MatrixObject input,
MatrixObject bias,
MatrixObject outputBlock) |
Performs the operation corresponding to the DML script:
ones = matrix(1, rows=1, cols=Hout*Wout)
output = input * matrix(bias %*% ones, rows=1, cols=F*Hout*Wout)
This operation is often followed by conv2d and hence we have introduced bias_add(input, bias) built-in function
|
static void |
LibMatrixCUDA.cbind(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName) |
|
static void |
LibMatrixCUDA.ceil(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "ceil" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.channelSums(GPUContext gCtx,
String instName,
MatrixObject input,
MatrixObject outputBlock,
long C,
long HW) |
Perform channel_sums operations: out = rowSums(matrix(colSums(A), rows=C, cols=HW))
|
static int |
LibMatrixCUDA.computeNNZ(GPUContext gCtx,
jcuda.Pointer densePtr,
int length) |
Utility to compute number of non-zeroes on the GPU
|
static void |
LibMatrixCuDNN.conv2d(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject filter,
MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget) |
Performs a 2D convolution
|
static void |
LibMatrixCuDNN.conv2dBackwardData(GPUContext gCtx,
String instName,
MatrixObject filter,
MatrixObject dout,
MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget) |
This method computes the backpropogation errors for previous layer of convolution operation
|
static void |
LibMatrixCuDNN.conv2dBackwardFilter(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject dout,
MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget) |
This method computes the backpropogation errors for filter of convolution operation
|
static void |
LibMatrixCuDNN.conv2dBiasAdd(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject bias,
MatrixObject filter,
MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget) |
Does a 2D convolution followed by a bias_add
|
static void |
LibMatrixCUDA.cos(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "cos" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.cosh(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "cosh" operation on a matrix on the GPU
|
static LibMatrixCuDNNConvolutionAlgorithm |
LibMatrixCuDNNConvolutionAlgorithm.cudnnGetConvolutionBackwardDataAlgorithm(GPUContext gCtx,
String instName,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
long workspaceLimit) |
Factory method to get the algorithm wrapper for convolution backward data
|
static LibMatrixCuDNNConvolutionAlgorithm |
LibMatrixCuDNNConvolutionAlgorithm.cudnnGetConvolutionBackwardFilterAlgorithm(GPUContext gCtx,
String instName,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
long workspaceLimit) |
Factory method to get the algorithm wrapper for convolution backward filter
|
static LibMatrixCuDNNConvolutionAlgorithm |
LibMatrixCuDNNConvolutionAlgorithm.cudnnGetConvolutionForwardAlgorithm(GPUContext gCtx,
String instName,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
long workspaceLimit) |
Factory method to get the algorithm wrapper for convolution forward
|
static LibMatrixCuDNNPoolingDescriptors |
LibMatrixCuDNNPoolingDescriptors.cudnnPoolingBackwardDescriptors(GPUContext gCtx,
String instName,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType) |
Get descriptors for maxpooling backward operation
|
static LibMatrixCuDNNPoolingDescriptors |
LibMatrixCuDNNPoolingDescriptors.cudnnPoolingDescriptors(GPUContext gCtx,
String instName,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType) |
Get descriptors for maxpooling operation
|
static void |
LibMatrixCUDA.cumulativeScan(ExecutionContext ec,
GPUContext gCtx,
String instName,
String kernelFunction,
MatrixObject in,
String outputName) |
Cumulative scan
|
static void |
LibMatrixCUDA.cumulativeSumProduct(ExecutionContext ec,
GPUContext gCtx,
String instName,
String kernelFunction,
MatrixObject in,
String outputName) |
Cumulative sum-product kernel cascade invokation
|
static void |
LibMatrixCUDA.denseTranspose(ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer A,
jcuda.Pointer C,
long numRowsA,
long numColsA) |
Computes C = t(A)
|
void |
CudaSupportFunctions.deviceToHost(GPUContext gCtx,
jcuda.Pointer src,
double[] dest,
String instName,
boolean isEviction) |
|
void |
DoublePrecisionCudaSupportFunctions.deviceToHost(GPUContext gCtx,
jcuda.Pointer src,
double[] dest,
String instName,
boolean isEviction) |
|
void |
SinglePrecisionCudaSupportFunctions.deviceToHost(GPUContext gCtx,
jcuda.Pointer src,
double[] dest,
String instName,
boolean isEviction) |
|
static jcuda.Pointer |
LibMatrixCUDA.double2float(GPUContext gCtx,
jcuda.Pointer A,
jcuda.Pointer ret,
int numElems) |
|
static void |
LibMatrixCUDA.exp(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "exp" operation on a matrix on the GPU
|
static jcuda.Pointer |
LibMatrixCUDA.float2double(GPUContext gCtx,
jcuda.Pointer A,
jcuda.Pointer ret,
int numElems) |
|
static void |
LibMatrixCUDA.floor(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "floor" operation on a matrix on the GPU
|
static JCudaKernels |
LibMatrixCUDA.getCudaKernels(GPUContext gCtx) |
|
static jcuda.Pointer |
LibMatrixCUDA.getDensePointer(GPUContext gCtx,
MatrixObject input,
String instName) |
Convenience method to get jcudaDenseMatrixPtr.
|
static jcuda.Pointer |
LibMatrixCuDNN.getDensePointerForCuDNN(GPUContext gCtx,
MatrixObject image,
String instName,
int numRows,
int numCols) |
Convenience method to get jcudaDenseMatrixPtr.
|
static long |
LibMatrixCUDA.getNnz(GPUContext gCtx,
String instName,
MatrixObject mo,
boolean recomputeDenseNNZ) |
Note: if the matrix is in dense format, it explicitly re-computes the number of nonzeros.
|
void |
CudaSupportFunctions.hostToDevice(GPUContext gCtx,
double[] src,
jcuda.Pointer dest,
String instName) |
|
void |
DoublePrecisionCudaSupportFunctions.hostToDevice(GPUContext gCtx,
double[] src,
jcuda.Pointer dest,
String instName) |
|
void |
SinglePrecisionCudaSupportFunctions.hostToDevice(GPUContext gCtx,
double[] src,
jcuda.Pointer dest,
String instName) |
|
static boolean |
LibMatrixCUDA.isInSparseFormat(GPUContext gCtx,
MatrixObject mo) |
|
static void |
LibMatrixCUDA.log(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "log" operation on a matrix on the GPU
|
static void |
LibMatrixCuDNN.lstm(ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer X,
jcuda.Pointer wPointer,
jcuda.Pointer out0,
jcuda.Pointer c0,
boolean return_sequences,
String outputName,
String cyName,
int N,
int M,
int D,
int T) |
Computes the forward pass for an LSTM layer with M neurons.
|
static void |
LibMatrixCuDNN.lstmBackward(ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer x,
jcuda.Pointer hx,
jcuda.Pointer cx,
jcuda.Pointer wPointer,
String doutName,
String dcyName,
String dxName,
String dwName,
String dbName,
String dhxName,
String dcxName,
boolean return_sequences,
int N,
int M,
int D,
int T) |
|
static MatrixObject |
LibMatrixCuMatMult.matmult(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject left,
MatrixObject right,
String outputName,
boolean isLeftTransposed,
boolean isRightTransposed) |
Matrix multiply on GPU Examines sparsity and shapes and routes call to
appropriate method from cuBLAS or cuSparse C = op(A) x op(B)
The user is expected to call
ec.releaseMatrixOutputForGPUInstruction(outputName);
|
static void |
LibMatrixCUDA.matmultTSMM(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject left,
String outputName,
boolean isLeftTransposed) |
Performs tsmm, A %*% A' or A' %*% A, on GPU by exploiting cublasDsyrk(...)
|
static void |
LibMatrixCUDA.matrixMatrixArithmetic(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName,
boolean isLeftTransposed,
boolean isRightTransposed,
BinaryOperator op) |
Performs elementwise arithmetic operation specified by op of two input matrices in1 and in2
|
static void |
LibMatrixCUDA.matrixMatrixRelational(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName,
BinaryOperator op) |
Performs elementwise operation relational specified by op of two input matrices in1 and in2
|
static void |
LibMatrixCUDA.matrixScalarArithmetic(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in,
String outputName,
boolean isInputTransposed,
ScalarOperator op) |
Entry point to perform elementwise matrix-scalar arithmetic operation specified by op
|
static void |
LibMatrixCUDA.matrixScalarOp(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in,
String outputName,
boolean isInputTransposed,
ScalarOperator op) |
Utility to do matrix-scalar operation kernel
|
static void |
LibMatrixCUDA.matrixScalarRelational(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in,
String outputName,
ScalarOperator op) |
Entry point to perform elementwise matrix-scalar relational operation specified by op
|
static void |
LibMatrixCuDNN.pooling(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget) |
performs maxpooling on GPU by exploiting cudnnPoolingForward(...)
|
static void |
LibMatrixCuDNN.poolingBackward(GPUContext gCtx,
String instName,
MatrixObject image,
MatrixObject dout,
MatrixObject maxpoolOutput,
MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget) |
Performs maxpoolingBackward on GPU by exploiting cudnnPoolingBackward(...)
This method computes the backpropogation errors for previous layer of maxpooling operation
|
static void |
LibMatrixCUDA.rbind(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName) |
|
static void |
LibMatrixCuDNN.relu(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in,
String outputName) |
Performs the relu operation on the GPU.
|
static void |
LibMatrixCUDA.reluBackward(GPUContext gCtx,
String instName,
MatrixObject input,
MatrixObject dout,
MatrixObject outputBlock) |
This method computes the backpropagation errors for previous layer of relu operation
|
static void |
LibMatrixCUDA.round(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "round" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.sigmoid(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "sigmoid" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.sign(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "sign" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.sin(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "sin" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.sinh(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "sinh" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.sliceOperations(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
IndexRange ixrange,
String outputName) |
Method to perform rightIndex operation for a given lower and upper bounds in row and column dimensions.
|
static void |
LibMatrixCuDNN.softmax(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "softmax" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.solve(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
MatrixObject in2,
String outputName) |
Implements the "solve" function for systemds Ax = B (A is of size m*n, B is of size m*1, x is of size n*1)
|
static void |
LibMatrixCUDA.sqrt(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "sqrt" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.tan(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "tan" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.tanh(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String outputName) |
Performs an "tanh" operation on a matrix on the GPU
|
static void |
LibMatrixCUDA.transpose(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in,
String outputName) |
Transposes the input matrix using cublasDgeam
|
static void |
LibMatrixCUDA.unaryAggregate(ExecutionContext ec,
GPUContext gCtx,
String instName,
MatrixObject in1,
String output,
AggregateUnaryOperator op) |
Entry point to perform Unary aggregate operations on the GPU.
|