public class LibMatrixCuDNN extends LibMatrixCUDA
| Modifier and Type | Field and Description |
|---|---|
protected static int |
CONVOLUTION_PREFERENCE |
cudaSupportFunctions, CUDNN_DATA_TYPE, customKernelSuffix, maxNumElementsOfCuDNNTensor, sizeOfDataType| Constructor and Description |
|---|
LibMatrixCuDNN() |
| Modifier and Type | Method and Description |
|---|---|
static void |
batchNormalizationBackward(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dX,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dScale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dBias,
double epsilon,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance)
This method computes the backpropagation errors for image, scale and bias of batch normalization layer
|
static void |
batchNormalizationForwardInference(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret,
double epsilon)
Performs the forward BatchNormalization layer computation for inference
|
static void |
batchNormalizationForwardTraining(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningVar,
double epsilon,
double exponentialAverageFactor,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance)
Performs the forward BatchNormalization layer computation for training
|
protected static void |
checkStatus(int status)
Convenience method for checking the status of CuDNN kernel.
|
static void |
conv2d(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
Performs a 2D convolution
|
static void |
conv2dBackwardData(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
This method computes the backpropogation errors for previous layer of convolution operation
|
static void |
conv2dBackwardFilter(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
This method computes the backpropogation errors for filter of convolution operation
|
static void |
conv2dBiasAdd(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
Does a 2D convolution followed by a bias_add
|
protected static jcuda.jcudnn.cudnnHandle |
getCudnnHandle(GPUContext gCtx) |
protected static jcuda.Pointer |
getDensePointerForCuDNN(GPUContext gCtx,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
String instName)
Convenience method to get jcudaDenseMatrixPtr.
|
static jcuda.Pointer |
getDensePointerForCuDNN(GPUContext gCtx,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
String instName,
int numRows,
int numCols)
Convenience method to get jcudaDenseMatrixPtr.
|
static void |
lstm(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer X,
jcuda.Pointer wPointer,
jcuda.Pointer out0,
jcuda.Pointer c0,
boolean return_sequences,
String outputName,
String cyName,
int N,
int M,
int D,
int T)
Computes the forward pass for an LSTM layer with M neurons.
|
static void |
lstmBackward(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer x,
jcuda.Pointer hx,
jcuda.Pointer cx,
jcuda.Pointer wPointer,
String doutName,
String dcyName,
String dxName,
String dwName,
String dbName,
String dhxName,
String dcxName,
boolean return_sequences,
int N,
int M,
int D,
int T) |
static void |
pooling(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget)
performs maxpooling on GPU by exploiting cudnnPoolingForward(...)
|
static void |
poolingBackward(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject maxpoolOutput,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget)
Performs maxpoolingBackward on GPU by exploiting cudnnPoolingBackward(...)
This method computes the backpropogation errors for previous layer of maxpooling operation
|
static void |
relu(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in,
String outputName)
Performs the relu operation on the GPU.
|
static void |
softmax(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in1,
String outputName)
Performs an "softmax" operation on a matrix on the GPU
|
abs, acos, asin, atan, axpy, biasAdd, biasMultiply, cbind, ceil, channelSums, computeNNZ, cos, cosh, dataTypePointerTo, denseTranspose, deviceCopy, double2float, exp, float2double, floor, getCublasHandle, getCudaKernels, getCusparseHandle, getDenseMatrixOutputForGPUInstruction, getDensePointer, getNnz, getSparsePointer, isInSparseFormat, log, matmultTSMM, matrixMatrixArithmetic, matrixMatrixRelational, matrixScalarArithmetic, matrixScalarOp, matrixScalarRelational, one, rbind, reluBackward, resetFloatingPointPrecision, round, sigmoid, sign, sin, sinh, sliceDenseDense, sliceOperations, sliceSparseDense, solve, sqrt, tan, tanh, toInt, transpose, unaryAggregate, zeroprotected static jcuda.jcudnn.cudnnHandle getCudnnHandle(GPUContext gCtx)
public static void conv2dBiasAdd(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject output, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.image - input image matrix objectbias - bias matrix objectfilter - filter matrix objectoutput - output matrix objectN - number of input imagesC - number of channelsH - height of each imageW - width of each imageK - number of output "channels"R - height of filterS - width of filterpad_h - padding heightpad_w - padding widthstride_h - stride heightstride_w - string widthP - output heightQ - output widthintermediateMemoryBudget - intermediate memory budgetpublic static void conv2d(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.image - input matrix objectfilter - filter matrix objectoutputBlock - output matrix objectN - number of input imagesC - number of channelsH - height of each imageW - width of each imageK - number of output "channels"R - height of filterS - width of filterpad_h - padding heightpad_w - padding widthstride_h - stride heightstride_w - string widthP - output heightQ - output widthintermediateMemoryBudget - intermediate memory budgetpublic static void softmax(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in1,
String outputName)
ec - execution contextgCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.in1 - input matrixoutputName - output matrix namepublic static void conv2dBackwardFilter(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.image - input imagedout - errors from next layeroutputBlock - output errorsN - number of imagesC - number of channelsH - heightW - widthK - number of filtersR - filter heightS - filter widthpad_h - pad heightpad_w - pad widthstride_h - stride heightstride_w - stride widthP - output activation heightQ - output activation widthintermediateMemoryBudget - intermediate memory budgetpublic static void conv2dBackwardData(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject output, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.filter - filter used in conv2ddout - errors from next layeroutput - output errorsN - number of imagesC - number of channelsH - heightW - widthK - number of filtersR - filter heightS - filter widthpad_h - pad heightpad_w - pad widthstride_h - stride heightstride_w - stride widthP - output activation heightQ - output activation widthintermediateMemoryBudget - intermediate memory budgetpublic static void pooling(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, LibMatrixDNN.PoolingType poolingType, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.image - image as matrix objectoutputBlock - output matrixN - batch sizeC - number of channelsH - height of imageW - width of imageK - number of filtersR - height of filterS - width of filterpad_h - vertical paddingpad_w - horizontal paddingstride_h - horizontal stridestride_w - vertical strideP - (H - R + 1 + 2*pad_h)/stride_hQ - (W - S + 1 + 2*pad_w)/stride_wpoolingType - type of poolingintermediateMemoryBudget - intermediate memory budgetpublic static void poolingBackward(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject maxpoolOutput, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, LibMatrixDNN.PoolingType poolingType, double intermediateMemoryBudget)
gCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.image - image as matrix objectdout - delta matrix, output of previous layermaxpoolOutput - (optional and can be null) output of maxpool forward functionoutputBlock - output matrixN - batch sizeC - number of channelsH - height of imageW - width of imageK - number of filtersR - height of filterS - width of filterpad_h - vertical paddingpad_w - horizontal paddingstride_h - horizontal stridestride_w - vertical strideP - (H - R + 1 + 2*pad_h)/stride_hQ - (W - S + 1 + 2*pad_w)/stride_wpoolingType - type of poolingintermediateMemoryBudget - intermediate memory budgetpublic static void relu(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in,
String outputName)
ec - currently active ExecutionContextgCtx - a valid GPUContextinstName - the invoking instruction's name for record Statistics.in - input matrixoutputName - name of the output matrixpublic static void lstm(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer X,
jcuda.Pointer wPointer,
jcuda.Pointer out0,
jcuda.Pointer c0,
boolean return_sequences,
String outputName,
String cyName,
int N,
int M,
int D,
int T)
throws DMLRuntimeException
ec - execution contextgCtx - gpu contextinstName - name of the instructionX - input matrix pointerwPointer - weight matrix pointerout0 - Outputs from previous timestepc0 - Initial cell statereturn_sequences - Whether to return `out` at all timesteps, or just for the final timestep.outputName - name of the out variable. If `return_sequences` is True, outputs for all timesteps.cyName - name of the output cell state. Cell state for final timestep.N - minibatch sizeM - hidden sizeD - number of featuresT - sequence lengthDMLRuntimeException - if errorpublic static void lstmBackward(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer x,
jcuda.Pointer hx,
jcuda.Pointer cx,
jcuda.Pointer wPointer,
String doutName,
String dcyName,
String dxName,
String dwName,
String dbName,
String dhxName,
String dcxName,
boolean return_sequences,
int N,
int M,
int D,
int T)
throws DMLRuntimeException
DMLRuntimeExceptionpublic static void batchNormalizationForwardTraining(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar, org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret, org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningVar, double epsilon, double exponentialAverageFactor, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance) throws DMLRuntimeException
gCtx - a valid GPUContextinstName - name of the instructionimage - input imagescale - scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]bias - bias (as per CuDNN) and beta as per original paper: shape [1, C, 1, 1]runningMean - running mean accumulated during training phase: shape [1, C, 1, 1]runningVar - running variance accumulated during training phase: shape [1, C, 1, 1]ret - (output) normalized inputretRunningMean - (output) running mean accumulated during training phase: shape [1, C, 1, 1]retRunningVar - (output) running variance accumulated during training phase: shape [1, C, 1, 1]epsilon - epsilon value used in the batch normalization formulaexponentialAverageFactor - factor used in the moving average computationresultSaveMean - (output) running mean accumulated during training phase: shape [1, C, 1, 1]resultSaveInvVariance - (output) running variance accumulated during training phase: shape [1, C, 1, 1]DMLRuntimeException - if error occurspublic static void batchNormalizationForwardInference(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar, org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret, double epsilon) throws DMLRuntimeException
gCtx - a valid GPUContextinstName - name of the instructionimage - input imagescale - scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]bias - bias (as per CuDNN) and beta as per original paper: shape [1, C, 1, 1]runningMean - running mean accumulated during training phase: shape [1, C, 1, 1]runningVar - running variance accumulated during training phase: shape [1, C, 1, 1]ret - normalized inputepsilon - epsilon value used in the batch normalization formulaDMLRuntimeException - if error occurspublic static void batchNormalizationBackward(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dX, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dScale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dBias, double epsilon, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance) throws DMLRuntimeException
gCtx - a valid GPUContextinstName - name of the instructionimage - input imagedout - input errors of shape C, H, Wscale - scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]dX - (output) backpropagation errors for previous layerdScale - backpropagation error for scaledBias - backpropagation error for biasepsilon - epsilon value used in the batch normalization formularesultSaveMean - (input) running mean accumulated during training phase: shape [1, C, 1, 1]resultSaveInvVariance - (input) running variance accumulated during training phase: shape [1, C, 1, 1]DMLRuntimeException - if error occursprotected static jcuda.Pointer getDensePointerForCuDNN(GPUContext gCtx, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, String instName)
gCtx - a valid GPUContextimage - input matrix objectinstName - name of the instructionpublic static jcuda.Pointer getDensePointerForCuDNN(GPUContext gCtx, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, String instName, int numRows, int numCols) throws DMLRuntimeException
gCtx - a valid GPUContextimage - input matrix objectinstName - name of the instructionnumRows - expected number of rowsnumCols - expected number of columnsDMLRuntimeException - if error occurs while sparse to dense conversionprotected static void checkStatus(int status)
status - status returned by CuDNNCopyright © 2018 The Apache Software Foundation. All rights reserved.