public class LibMatrixCuDNN extends LibMatrixCUDA
Modifier and Type | Field and Description |
---|---|
protected static int |
CONVOLUTION_PREFERENCE |
cudaSupportFunctions, CUDNN_DATA_TYPE, customKernelSuffix, maxNumElementsOfCuDNNTensor, sizeOfDataType
Constructor and Description |
---|
LibMatrixCuDNN() |
Modifier and Type | Method and Description |
---|---|
static void |
batchNormalizationBackward(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dX,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dScale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dBias,
double epsilon,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance)
This method computes the backpropagation errors for image, scale and bias of batch normalization layer
|
static void |
batchNormalizationForwardInference(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret,
double epsilon)
Performs the forward BatchNormalization layer computation for inference
|
static void |
batchNormalizationForwardTraining(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningVar,
double epsilon,
double exponentialAverageFactor,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance)
Performs the forward BatchNormalization layer computation for training
|
protected static void |
checkStatus(int status)
Convenience method for checking the status of CuDNN kernel.
|
static void |
conv2d(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
Performs a 2D convolution
|
static void |
conv2dBackwardData(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
This method computes the backpropogation errors for previous layer of convolution operation
|
static void |
conv2dBackwardFilter(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
This method computes the backpropogation errors for filter of convolution operation
|
static void |
conv2dBiasAdd(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject output,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
double intermediateMemoryBudget)
Does a 2D convolution followed by a bias_add
|
protected static jcuda.jcudnn.cudnnHandle |
getCudnnHandle(GPUContext gCtx) |
protected static jcuda.Pointer |
getDensePointerForCuDNN(GPUContext gCtx,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
String instName)
Convenience method to get jcudaDenseMatrixPtr.
|
static jcuda.Pointer |
getDensePointerForCuDNN(GPUContext gCtx,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
String instName,
int numRows,
int numCols)
Convenience method to get jcudaDenseMatrixPtr.
|
static void |
lstm(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer X,
jcuda.Pointer wPointer,
jcuda.Pointer out0,
jcuda.Pointer c0,
boolean return_sequences,
String outputName,
String cyName,
int N,
int M,
int D,
int T)
Computes the forward pass for an LSTM layer with M neurons.
|
static void |
lstmBackward(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
jcuda.Pointer x,
jcuda.Pointer hx,
jcuda.Pointer cx,
jcuda.Pointer wPointer,
String doutName,
String dcyName,
String dxName,
String dwName,
String dbName,
String dhxName,
String dcxName,
boolean return_sequences,
int N,
int M,
int D,
int T) |
static void |
pooling(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget)
performs maxpooling on GPU by exploiting cudnnPoolingForward(...)
|
static void |
poolingBackward(GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject image,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject maxpoolOutput,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock,
int N,
int C,
int H,
int W,
int K,
int R,
int S,
int pad_h,
int pad_w,
int stride_h,
int stride_w,
int P,
int Q,
LibMatrixDNN.PoolingType poolingType,
double intermediateMemoryBudget)
Performs maxpoolingBackward on GPU by exploiting cudnnPoolingBackward(...)
This method computes the backpropogation errors for previous layer of maxpooling operation
|
static void |
relu(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in,
String outputName)
Performs the relu operation on the GPU.
|
static void |
softmax(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec,
GPUContext gCtx,
String instName,
org.apache.sysml.runtime.controlprogram.caching.MatrixObject in1,
String outputName)
Performs an "softmax" operation on a matrix on the GPU
|
abs, acos, asin, atan, axpy, biasAdd, biasMultiply, cbind, ceil, channelSums, computeNNZ, cos, cosh, dataTypePointerTo, denseTranspose, deviceCopy, double2float, exp, float2double, floor, getCublasHandle, getCudaKernels, getCusparseHandle, getDenseMatrixOutputForGPUInstruction, getDensePointer, getNnz, getSparsePointer, isInSparseFormat, log, matmultTSMM, matrixMatrixArithmetic, matrixMatrixRelational, matrixScalarArithmetic, matrixScalarOp, matrixScalarRelational, one, rbind, reluBackward, resetFloatingPointPrecision, round, sigmoid, sign, sin, sinh, sliceDenseDense, sliceOperations, sliceSparseDense, solve, sqrt, tan, tanh, toInt, transpose, unaryAggregate, zero
protected static jcuda.jcudnn.cudnnHandle getCudnnHandle(GPUContext gCtx)
public static void conv2dBiasAdd(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject output, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.image
- input image matrix objectbias
- bias matrix objectfilter
- filter matrix objectoutput
- output matrix objectN
- number of input imagesC
- number of channelsH
- height of each imageW
- width of each imageK
- number of output "channels"R
- height of filterS
- width of filterpad_h
- padding heightpad_w
- padding widthstride_h
- stride heightstride_w
- string widthP
- output heightQ
- output widthintermediateMemoryBudget
- intermediate memory budgetpublic static void conv2d(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.image
- input matrix objectfilter
- filter matrix objectoutputBlock
- output matrix objectN
- number of input imagesC
- number of channelsH
- height of each imageW
- width of each imageK
- number of output "channels"R
- height of filterS
- width of filterpad_h
- padding heightpad_w
- padding widthstride_h
- stride heightstride_w
- string widthP
- output heightQ
- output widthintermediateMemoryBudget
- intermediate memory budgetpublic static void softmax(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec, GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject in1, String outputName)
ec
- execution contextgCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.in1
- input matrixoutputName
- output matrix namepublic static void conv2dBackwardFilter(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.image
- input imagedout
- errors from next layeroutputBlock
- output errorsN
- number of imagesC
- number of channelsH
- heightW
- widthK
- number of filtersR
- filter heightS
- filter widthpad_h
- pad heightpad_w
- pad widthstride_h
- stride heightstride_w
- stride widthP
- output activation heightQ
- output activation widthintermediateMemoryBudget
- intermediate memory budgetpublic static void conv2dBackwardData(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject filter, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject output, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.filter
- filter used in conv2ddout
- errors from next layeroutput
- output errorsN
- number of imagesC
- number of channelsH
- heightW
- widthK
- number of filtersR
- filter heightS
- filter widthpad_h
- pad heightpad_w
- pad widthstride_h
- stride heightstride_w
- stride widthP
- output activation heightQ
- output activation widthintermediateMemoryBudget
- intermediate memory budgetpublic static void pooling(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, LibMatrixDNN.PoolingType poolingType, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.image
- image as matrix objectoutputBlock
- output matrixN
- batch sizeC
- number of channelsH
- height of imageW
- width of imageK
- number of filtersR
- height of filterS
- width of filterpad_h
- vertical paddingpad_w
- horizontal paddingstride_h
- horizontal stridestride_w
- vertical strideP
- (H - R + 1 + 2*pad_h)/stride_hQ
- (W - S + 1 + 2*pad_w)/stride_wpoolingType
- type of poolingintermediateMemoryBudget
- intermediate memory budgetpublic static void poolingBackward(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject maxpoolOutput, org.apache.sysml.runtime.controlprogram.caching.MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, LibMatrixDNN.PoolingType poolingType, double intermediateMemoryBudget)
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.image
- image as matrix objectdout
- delta matrix, output of previous layermaxpoolOutput
- (optional and can be null) output of maxpool forward functionoutputBlock
- output matrixN
- batch sizeC
- number of channelsH
- height of imageW
- width of imageK
- number of filtersR
- height of filterS
- width of filterpad_h
- vertical paddingpad_w
- horizontal paddingstride_h
- horizontal stridestride_w
- vertical strideP
- (H - R + 1 + 2*pad_h)/stride_hQ
- (W - S + 1 + 2*pad_w)/stride_wpoolingType
- type of poolingintermediateMemoryBudget
- intermediate memory budgetpublic static void relu(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec, GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject in, String outputName)
ec
- currently active ExecutionContext
gCtx
- a valid GPUContext
instName
- the invoking instruction's name for record Statistics
.in
- input matrixoutputName
- name of the output matrixpublic static void lstm(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec, GPUContext gCtx, String instName, jcuda.Pointer X, jcuda.Pointer wPointer, jcuda.Pointer out0, jcuda.Pointer c0, boolean return_sequences, String outputName, String cyName, int N, int M, int D, int T) throws DMLRuntimeException
ec
- execution contextgCtx
- gpu contextinstName
- name of the instructionX
- input matrix pointerwPointer
- weight matrix pointerout0
- Outputs from previous timestepc0
- Initial cell statereturn_sequences
- Whether to return `out` at all timesteps, or just for the final timestep.outputName
- name of the out variable. If `return_sequences` is True, outputs for all timesteps.cyName
- name of the output cell state. Cell state for final timestep.N
- minibatch sizeM
- hidden sizeD
- number of featuresT
- sequence lengthDMLRuntimeException
- if errorpublic static void lstmBackward(org.apache.sysml.runtime.controlprogram.context.ExecutionContext ec, GPUContext gCtx, String instName, jcuda.Pointer x, jcuda.Pointer hx, jcuda.Pointer cx, jcuda.Pointer wPointer, String doutName, String dcyName, String dxName, String dwName, String dbName, String dhxName, String dcxName, boolean return_sequences, int N, int M, int D, int T) throws DMLRuntimeException
DMLRuntimeException
public static void batchNormalizationForwardTraining(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar, org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret, org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject retRunningVar, double epsilon, double exponentialAverageFactor, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance) throws DMLRuntimeException
gCtx
- a valid GPUContext
instName
- name of the instructionimage
- input imagescale
- scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]bias
- bias (as per CuDNN) and beta as per original paper: shape [1, C, 1, 1]runningMean
- running mean accumulated during training phase: shape [1, C, 1, 1]runningVar
- running variance accumulated during training phase: shape [1, C, 1, 1]ret
- (output) normalized inputretRunningMean
- (output) running mean accumulated during training phase: shape [1, C, 1, 1]retRunningVar
- (output) running variance accumulated during training phase: shape [1, C, 1, 1]epsilon
- epsilon value used in the batch normalization formulaexponentialAverageFactor
- factor used in the moving average computationresultSaveMean
- (output) running mean accumulated during training phase: shape [1, C, 1, 1]resultSaveInvVariance
- (output) running variance accumulated during training phase: shape [1, C, 1, 1]DMLRuntimeException
- if error occurspublic static void batchNormalizationForwardInference(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject bias, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject runningVar, org.apache.sysml.runtime.controlprogram.caching.MatrixObject ret, double epsilon) throws DMLRuntimeException
gCtx
- a valid GPUContext
instName
- name of the instructionimage
- input imagescale
- scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]bias
- bias (as per CuDNN) and beta as per original paper: shape [1, C, 1, 1]runningMean
- running mean accumulated during training phase: shape [1, C, 1, 1]runningVar
- running variance accumulated during training phase: shape [1, C, 1, 1]ret
- normalized inputepsilon
- epsilon value used in the batch normalization formulaDMLRuntimeException
- if error occurspublic static void batchNormalizationBackward(GPUContext gCtx, String instName, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dout, org.apache.sysml.runtime.controlprogram.caching.MatrixObject scale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dX, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dScale, org.apache.sysml.runtime.controlprogram.caching.MatrixObject dBias, double epsilon, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveMean, org.apache.sysml.runtime.controlprogram.caching.MatrixObject resultSaveInvVariance) throws DMLRuntimeException
gCtx
- a valid GPUContext
instName
- name of the instructionimage
- input imagedout
- input errors of shape C, H, Wscale
- scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]dX
- (output) backpropagation errors for previous layerdScale
- backpropagation error for scaledBias
- backpropagation error for biasepsilon
- epsilon value used in the batch normalization formularesultSaveMean
- (input) running mean accumulated during training phase: shape [1, C, 1, 1]resultSaveInvVariance
- (input) running variance accumulated during training phase: shape [1, C, 1, 1]DMLRuntimeException
- if error occursprotected static jcuda.Pointer getDensePointerForCuDNN(GPUContext gCtx, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, String instName)
gCtx
- a valid GPUContext
image
- input matrix objectinstName
- name of the instructionpublic static jcuda.Pointer getDensePointerForCuDNN(GPUContext gCtx, org.apache.sysml.runtime.controlprogram.caching.MatrixObject image, String instName, int numRows, int numCols) throws DMLRuntimeException
gCtx
- a valid GPUContext
image
- input matrix objectinstName
- name of the instructionnumRows
- expected number of rowsnumCols
- expected number of columnsDMLRuntimeException
- if error occurs while sparse to dense conversionprotected static void checkStatus(int status)
status
- status returned by CuDNNCopyright © 2018 The Apache Software Foundation. All rights reserved.