2using System.Collections.Generic;
 
   40        const int CUDNN_STREAMS_PER_GROUP = 3;
 
   42        long[] m_rghCudnn = 
null;
 
   43        long[] m_rghStream = 
null;
 
   50        List<long> m_rghBottomDesc = 
new List<long>();
 
   51        List<long> m_rghTopDesc = 
new List<long>();
 
   53        long m_hFilterDesc = 0;
 
   54        List<long> m_rghConvDesc = 
new List<long>();
 
   55        int m_nBottomOffset = 0;
 
   57        int m_nBiasOffset = 0;
 
   59        ulong[] m_rglWorkspaceFwdSizes = 
null;
 
   60        ulong[] m_rglWorkspaceBwdFilterSizes = 
null;
 
   61        ulong[] m_rglWorkspaceBwdDataSizes = 
null;
 
   62        ulong[] m_rglWorkspaceFwdOffsets = 
null; 
 
   63        ulong[] m_rglWorkspaceBwdFilterOffsets = 
null; 
 
   64        ulong[] m_rglWorkspaceBwdDataOffsets = 
null; 
 
   65        bool m_bUseTensorCores = 
false;
 
  119            for (
int i = 0; i < m_rghBottomDesc.Count; i++)
 
  121                m_cuda.FreeTensorDesc(m_rghBottomDesc[i]);
 
  122                m_cuda.FreeTensorDesc(m_rghTopDesc[i]);
 
  123                m_cuda.FreeConvolutionDesc(m_rghConvDesc[i]);
 
  126            m_rghBottomDesc.Clear();
 
  127            m_rghTopDesc.Clear();
 
  128            m_rghConvDesc.Clear();
 
  130            if (m_hBiasDesc != 0)
 
  132                m_cuda.FreeTensorDesc(m_hBiasDesc);
 
  136            if (m_hFilterDesc != 0)
 
  138                m_cuda.FreeFilterDesc(m_hFilterDesc);
 
  142            for (
int g = 0; g < (
m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
 
  144                if (m_rghStream != 
null && m_rghStream[g] != 0)
 
  145                    m_cuda.FreeStream(m_rghStream[g]);
 
  147                if (m_rghCudnn != 
null && m_rghCudnn[g] != 0)
 
  148                    m_cuda.FreeCuDNN(m_rghCudnn[g]);
 
  164            base.LayerSetUp(colBottom, colTop);
 
  168                for (
int i = 0; i < colBottom.
Count; i++)
 
  170                    if (colBottom[i].HalfSize)
 
  171                        m_log.
FAIL(
"Half sizes are only supported with the CUDNN engine!");
 
  178            m_rghStream = 
new long[
m_nGroup * CUDNN_STREAMS_PER_GROUP];
 
  179            m_rghCudnn = 
new long[
m_nGroup * CUDNN_STREAMS_PER_GROUP];
 
  187            m_rglWorkspaceFwdSizes = 
new ulong[colBottom.
Count];
 
  188            m_rglWorkspaceBwdFilterSizes = 
new ulong[colBottom.
Count];
 
  189            m_rglWorkspaceBwdDataSizes = 
new ulong[colBottom.
Count];
 
  190            m_rglWorkspaceFwdOffsets = 
new ulong[
m_nGroup * CUDNN_STREAMS_PER_GROUP];
 
  191            m_rglWorkspaceBwdFilterOffsets = 
new ulong[
m_nGroup * CUDNN_STREAMS_PER_GROUP];
 
  192            m_rglWorkspaceBwdDataOffsets = 
new ulong[
m_nGroup * CUDNN_STREAMS_PER_GROUP];
 
  194            for (
int i = 0; i < colBottom.
Count; i++)
 
  202                m_rglWorkspaceFwdSizes[i] = 0;
 
  203                m_rglWorkspaceBwdFilterSizes[i] = 0;
 
  204                m_rglWorkspaceBwdDataSizes[i] = 0;
 
  207            for (
int g = 0; g < 
m_nGroup * CUDNN_STREAMS_PER_GROUP; g++)
 
  209                m_rghStream[g] = 
m_cuda.CreateStream();
 
  210                m_rghCudnn[g] = 
m_cuda.CreateCuDNN(m_rghStream[g]);
 
  211                m_rglWorkspaceFwdOffsets[g] = 0;
 
  212                m_rglWorkspaceBwdFilterOffsets[g] = 0;
 
  213                m_rglWorkspaceBwdDataOffsets[g] = 0;
 
  217            if (typeof(T) == typeof(
double))
 
  219                m_log.
WriteLine(
"WARNING: Tensor cores are only supported with the 'float' base type.  Tensor core use will be disabled for the 'double' base type.");
 
  220                m_bUseTensorCores = 
false;
 
  228            m_hFilterDesc = 
m_cuda.CreateFilterDesc();
 
  232            for (
int i = 0; i < colBottom.
Count; i++)
 
  234                m_rghBottomDesc.Add(
m_cuda.CreateTensorDesc());
 
  235                m_rghTopDesc.Add(
m_cuda.CreateTensorDesc());
 
  236                m_rghConvDesc.Add(
m_cuda.CreateConvolutionDesc());
 
  241                m_hBiasDesc = 
m_cuda.CreateTensorDesc();
 
  251            base.Reshape(colBottom, colTop);
 
  257                for (
int i = 0; i < colBottom.
Count; i++)
 
  259                    if (colBottom[i].HalfSize)
 
  260                        m_log.
FAIL(
"Half sizes are only supported with the CUDNN engine!");
 
  266            m_log.
CHECK_EQ(2, 
m_nNumSpatialAxes, 
"cuDNN Deconvolution input must have 2 spatial axes (e.g., height and width).  Use 'engine: CAFFE' for general ND deconvolution.");
 
  282            for (
int i = 0; i < colBottom.
Count; i++)
 
  286                m_cuda.SetConvolutionDesc(m_rghConvDesc[i], szPad.Height, szPad.Width, szStride.Height, szStride.Width, szDilation.Height, szDilation.Width, m_bUseTensorCores, 
m_bUseHalfSize);
 
  297                ulong lWsSizeFwd = 0;
 
  298                ulong lWsSizeBwdFilter = 0;
 
  299                ulong lWsSizeBwdData = 0;
 
  301                m_cuda.GetConvolutionInfo(m_rghCudnn[0], m_rghTopDesc[i], m_hFilterDesc, m_rghConvDesc[i], m_rghBottomDesc[i], lWorkspaceLimitBytes, m_bUseTensorCores, out algoFwd, out lWsSizeFwd, out algoBwdFilter, out lWsSizeBwdFilter, out algoBwdData, out lWsSizeBwdData, algoFwdPreferred);
 
  302                m_rgfwdAlgo[i] = algoFwd;
 
  303                m_rglWorkspaceFwdSizes[i] = lWsSizeFwd;
 
  304                m_rgbwdFilterAlgo[i] = algoBwdFilter;
 
  305                m_rglWorkspaceBwdFilterSizes[i] = lWsSizeBwdFilter;
 
  306                m_rgbwdDataAlgo[i] = algoBwdData;
 
  307                m_rglWorkspaceBwdDataSizes[i] = lWsSizeBwdData;
 
  311            ulong lTotalWsFwd = 0;
 
  312            ulong lTotalWsBwdFilter = 0;
 
  313            ulong lTotalWsBwdData = 0;
 
  315            for (
int i = 0; i < colBottom.
Count; i++)
 
  317                lTotalWsFwd = Math.Max(lTotalWsFwd, m_rglWorkspaceFwdSizes[i]);
 
  318                lTotalWsBwdFilter = Math.Max(lTotalWsBwdFilter, m_rglWorkspaceBwdFilterSizes[i]);
 
  319                lTotalWsBwdData = Math.Max(lTotalWsBwdData, m_rglWorkspaceBwdDataSizes[i]);
 
  323            ulong lMaxWorkspace = Math.Max(lTotalWsFwd, Math.Max(lTotalWsBwdFilter, lTotalWsBwdData));
 
  326            ulong lTotalMaxWorkspace = (ulong)lMaxWorkspace * (ulong)
m_nGroup * CUDNN_STREAMS_PER_GROUP;
 
  335            for (
int g = 0; g < (
m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)
 
  337                m_rglWorkspaceFwdOffsets[g] = (ulong)g * lTotalWsFwd;
 
  338                m_rglWorkspaceBwdFilterOffsets[g] = (ulong)g * lTotalWsBwdFilter;
 
  339                m_rglWorkspaceBwdDataOffsets[g] = (ulong)g * lTotalWsBwdData;
 
  363            T[] rgPadData = 
m_blobPad.update_cpu_data();
 
  370                int nStride = 
val_at(rgStrideData, i);
 
  371                int nKernel = 
val_at(rgKernelShape, i);
 
  372                int nPad = 
val_at(rgPadData, i);
 
  373                int nDilation = 
val_at(rgDilationData, i);
 
  377                int nKernelExtent = nDilation * (nKernel - 1) + 1;
 
  378                int nOutputDim = nStride * (nInputDim - 1) + nKernelExtent - 2 * nPad;
 
  419            for (
int i = 0; i < colBottom.
Count; i++)
 
  421                if (colBottom[i].HalfSize)
 
  422                    m_log.
FAIL(
"The CAFFE engine does not support half sizes!");
 
  424                long hBottomData = colBottom[i].gpu_data;
 
  425                long hTopData = colTop[i].mutable_gpu_data;
 
  427                for (
int n = 0; n < 
m_nNum; n++)
 
  446            long hWeightDiff = 
m_colBlobs[0].mutable_gpu_diff;
 
  448            for (
int i = 0; i < colTop.
Count; i++)
 
  450                if (colTop[i].HalfSize || colBottom[i].HalfSize)
 
  451                    m_log.
FAIL(
"The CAFFE engine does not support half sizes!");
 
  453                long hTopDiff = colTop[i].gpu_diff;
 
  454                long hBottomData = colBottom[i].gpu_data;
 
  455                long hBottomDiff = colBottom[i].mutable_gpu_diff;
 
  460                    long hBiasDiff = 
m_colBlobs[1].mutable_gpu_diff;
 
  462                    for (
int n = 0; n < 
m_nNum; n++)
 
  470                    for (
int n = 0; n < 
m_nNum; n++)
 
  477                        if (rgbPropagateDown[i])
 
  494            for (
int i = 0; i < colBottom.
Count; i++)
 
  496                long hBottomData = colBottom[i].gpu_data;
 
  497                long hTopData = colTop[i].mutable_gpu_data;
 
  503                    m_cuda.ConvolutionBackwardData(m_rghCudnn[g],
 
  508                                              hBottomData, m_nBottomOffset * g,
 
  511                                              wsArgs.
WorkspaceData, (
int)m_rglWorkspaceBwdDataOffsets[g], m_rglWorkspaceBwdDataSizes[i],
 
  514                                              hTopData, m_nTopOffset * g);
 
  515                    m_cuda.SynchronizeStream(m_rghStream[g]);
 
  522                        m_cuda.AddTensor(m_rghCudnn[g],
 
  525                                              hBiasData, m_nBiasOffset * g,
 
  528                                              hTopData, m_nTopOffset * g);
 
  529                        m_cuda.SynchronizeStream(m_rghStream[g]);
 
  535                m_cuda.SynchronizeThread();
 
  548            long hWeightDiff = 0;
 
  562            for (
int i = 0; i < colTop.
Count; i++)
 
  564                long hTopDiff = colTop[i].gpu_diff;
 
  575                                                       hTopDiff, m_nTopOffset * g,
 
  578                                                       hBiasDiff, m_nBiasOffset * g);
 
  584                        long hBottomData = colBottom[i].gpu_data;
 
  589                                                       hTopDiff, m_nTopOffset * g,
 
  591                                                       hBottomData, m_nBottomOffset * g,
 
  593                                                       m_rgbwdFilterAlgo[i],
 
  595                                                       m_rglWorkspaceBwdFilterSizes[i],
 
  602                    if (rgbPropagateDown[i])
 
  607                        long hBottomDiff = colBottom[i].mutable_gpu_diff;
 
  612                                                  hTopDiff, m_nTopOffset * g,
 
  618                                                  m_rglWorkspaceFwdSizes[i],
 
  621                                                  hBottomDiff, m_nBottomOffset * g);
 
  627                m_cuda.SynchronizeThread();
 
The Log class provides general output in text form.
 
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
 
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
 
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
 
The BlobCollection contains a list of Blobs.
 
int Count
Returns the number of items in the collection.
 
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
 
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
 
long WorkspaceData
Get/set the handle to workspace data in GPU memory.
 
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
 
int m_nBottomDim
The bottom dimension.
 
int m_nNumOutput
The number of outputs.
 
int m_nTopDim
The top dimension.
 
List< int > m_rgOutputShape
The spatial dimensions of the output.
 
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
 
int m_nChannelAxis
The channel axis.
 
int m_nChannels
The number of channels in each item.
 
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
 
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
 
int m_nNumSpatialAxes
The number of spatial axes.
 
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
 
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
 
Blob< T > m_blobStride
The spatial dimensions of the stride.
 
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
 
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
 
int m_nWeightOffset
The weight offset used.
 
Blob< T > m_blobPad
The spatial dimensions of the padding.
 
int m_nNum
The number of items in the batch.
 
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
 
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
 
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
 
int input_shape(int i)
Returns the spatial dimensions of the input.
 
bool m_bBiasTerm
Whether or not to use bias.
 
The DeconvolutionLayer convolves the input with a bank of learned filtered, and (optionally) add bias...
 
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation.
 
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation.
 
DeconvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The DeconvolutionLayer constructor.
 
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using Engine.CAFFE.
 
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes.
 
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using Engine.CUDNN.
 
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
 
override bool reverse_dimensions()
Returns true, for we want deconvolution, not convolution.
 
override void compute_output_shape()
Computes the output shape used by the BaseConvolutionLayer.
 
override void dispose()
Releases all GPU and host resources used by the Layer.
 
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation with Engine.CUDNN.
 
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation with Engine.CAFFE.
 
Log m_log
Specifies the Log for output.
 
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
 
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
 
T m_tZero
Specifies a generic type equal to 0.0.
 
T m_tOne
Specifies a generic type equal to 1.0.
 
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
 
Size size_at(Blob< T > b)
Returns the Size of a given two element Blob, such as one that stores Blob size information.
 
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
 
bool m_bReshapeOnForwardNeeded
Specifies whether or not the reshape on forward is needed or not.
 
LayerParameter.LayerType m_type
Specifies the Layer type.
 
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
 
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
 
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the convolution which is faster but not sup...
 
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
 
Specifies the base parameter for all layers.
 
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
 
LayerType
Specifies the layer type.
 
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
 
The MyCaffe.common namespace contains common MyCaffe classes.
 
CONV_BWD_FILTER_ALGO
Specifies the cuDnn convolution backward filter algorithm to use.
 
CONV_FWD_ALGO
Specifies the cuDnn convolution forward algorithm to use.
 
CONV_BWD_DATA_ALGO
Specifies the cuDnn convolution backward data algorithm to use.
 
The MyCaffe.fillers namespace contains all fillers including the Filler class.
 
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
 
The MyCaffe.param namespace contains parameters used to create models.
 
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...