2using System.Collections.Generic;
 
  104        int m_nNumKernelsIm2col;
 
  105        int m_nNumKernelsCol2im;
 
  106        int m_nConvOutChannels;
 
  107        int m_nConvInChannels;
 
  108        int m_nConvOutSpatialDim;
 
  116        long m_hWorkspaceData = 0;
 
  117        ulong m_lWorkspaceSize = 0;
 
  118        bool m_bWorkspaceOwner = 
false;
 
  145            m_blobColBuffer = 
new Blob<T>(cuda, log);
 
  148            m_blobBiasMultiplier = 
new Blob<T>(cuda, log);
 
  162            m_blobBiasMultiplier.
Dispose();
 
  164            if (m_bWorkspaceOwner && m_hWorkspaceData != 0)
 
  166                m_cuda.DisableGhostMemory();
 
  167                m_cuda.FreeMemory(m_hWorkspaceData);
 
  168                m_cuda.ResetGhostMemory();
 
  169                m_hWorkspaceData = 0;
 
  170                m_bWorkspaceOwner = 
false;
 
  192            ulong lWorkspaceLimitBytes = ulong.MaxValue;
 
  196            if (lWorkspaceLimitBytes != ulong.MaxValue)
 
  197                lWorkspaceLimitBytes *= 16;
 
  201                lWorkspaceLimitBytes = ulong.MaxValue;
 
  208                lWorkspaceLimitBytes = 0; 
 
  210            return lWorkspaceLimitBytes;
 
  221                col.
Add(m_blobColBuffer);
 
  222                col.
Add(m_blobBiasMultiplier);
 
  237            m_bWorkspaceOwner = 
true;
 
  238            return new common.
WorkspaceArgs(m_hWorkspaceData, m_lWorkspaceSize);
 
  248            if (!m_bWorkspaceOwner && base.setWorkspace(lSizeInBytes))
 
  251            m_bWorkspaceOwner = 
true;
 
  253            if (lSizeInBytes < m_lWorkspaceSize)
 
  256            m_lWorkspaceSize = lSizeInBytes;
 
  257            m_cuda.DisableGhostMemory();
 
  259            if (m_hWorkspaceData != 0)
 
  260                m_cuda.FreeMemory(m_hWorkspaceData);
 
  262            if (m_lWorkspaceSize > 0)
 
  263                m_hWorkspaceData = 
m_cuda.AllocMemory((
long)m_lWorkspaceSize);
 
  265            m_cuda.ResetGhostMemory();
 
  277            base.ReInitializeParameters(target);
 
  311            int nNumAxes = colBottom[0].num_axes;
 
  318            List<int> rgSpaitalDimBlobShape = 
new List<int>() { Math.Max(
m_nNumSpatialAxes, 1) };
 
  328                rgKernelShape[0] = (T)Convert.ChangeType(p.
kernel_h.Value, typeof(T));
 
  329                rgKernelShape[1] = (T)Convert.ChangeType(p.
kernel_w.Value, typeof(T));
 
  334                m_log.
CHECK(nNumKernelDims == 1 || nNumKernelDims == 
m_nNumSpatialAxes, 
"Kernel size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() + 
" times; " + 
m_nNumSpatialAxes.ToString() + 
" spatial dims);");
 
  338                    int nIdx = (nNumKernelDims == 1) ? 0 : i;
 
  339                    rgKernelShape[i] = (T)Convert.ChangeType(p.
kernel_size[nIdx], typeof(T));
 
  345                m_log.
CHECK_GT((
int)Convert.ChangeType(rgKernelShape[i], typeof(
int)), 0, 
"Filter dimension must be non-zero.");
 
  358                m_log.
CHECK_EQ(0, p.
stride.Count, 
"Either stride_size or stride_h/w should be specified; not both.");
 
  359                rgStrideData[0] = (T)Convert.ChangeType(p.
stride_h.Value, typeof(T));
 
  360                rgStrideData[1] = (T)Convert.ChangeType(p.
stride_w.Value, typeof(T));
 
  364                int nNumStrideDims = p.
stride.Count;
 
  365                m_log.
CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims == 
m_nNumSpatialAxes, 
"Stride size must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() + 
" times; " + 
m_nNumSpatialAxes.ToString() + 
" spatial dims);");
 
  366                int nDefaultStride = 1;
 
  370                    if (nNumStrideDims == 0)
 
  372                        rgStrideData[i] = (T)Convert.ChangeType(nDefaultStride, typeof(T));
 
  376                        int nIdx = (nNumStrideDims == 1) ? 0 : i;
 
  377                        rgStrideData[i] = (T)Convert.ChangeType(p.
stride[nIdx], typeof(T));
 
  379                    m_log.
CHECK_GT((
int)Convert.ChangeType(rgStrideData[i], typeof(
int)), 0, 
"Stride dimension must be non-zero.");
 
  387            m_blobPad.Reshape(rgSpaitalDimBlobShape);
 
  388            T[] rgPadData = 
m_blobPad.mutable_cpu_data;
 
  393                m_log.
CHECK_EQ(0, p.
pad.Count, 
"Either pad_size or pad_h/w should be specified; not both.");
 
  394                rgPadData[0] = (T)Convert.ChangeType(p.
pad_h.Value, typeof(T));
 
  395                rgPadData[1] = (T)Convert.ChangeType(p.
pad_w.Value, typeof(T));
 
  399                int nNumPadDims = p.
pad.Count;
 
  400                m_log.
CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims == 
m_nNumSpatialAxes, 
"Pad size must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() + 
" times; " + 
m_nNumSpatialAxes.ToString() + 
" spatial dims);");
 
  405                    if (nNumPadDims == 0)
 
  407                        rgPadData[i] = (T)Convert.ChangeType(nDefaultPad, typeof(T));
 
  411                        int nIdx = (nNumPadDims == 1) ? 0 : i;
 
  412                        rgPadData[i] = (T)Convert.ChangeType(p.
pad[nIdx], typeof(T));
 
  423            int nNumDilationDims = p.
dilation.Count;
 
  425            m_log.
CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims == 
m_nNumSpatialAxes, 
"Dilation size must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() + 
" times; " + 
m_nNumSpatialAxes.ToString() + 
" spatial dims);");
 
  426            int nDefaultDilation = 1;
 
  430                if (nNumDilationDims == 0)
 
  432                    rgDilationData[i] = (T)Convert.ChangeType(nDefaultDilation, typeof(T));
 
  436                    int nIdx = (nNumDilationDims == 1) ? 0 : i;
 
  437                    rgDilationData[i] = (T)Convert.ChangeType(p.
dilation[nIdx], typeof(T));
 
  450                if (!(
val_at(rgKernelShape, i) == 1 && 
 
  451                      val_at(rgStrideData, i) == 1 && 
 
  452                      val_at(rgPadData, i) == 0))
 
  483            List<int> rgWeightShape = 
new List<int>();
 
  484            rgWeightShape.Add(m_nConvOutChannels);
 
  485            rgWeightShape.Add(m_nConvInChannels / 
m_nGroup);
 
  489                rgWeightShape.Add(
val_at(rgKernelShape, i));
 
  494            List<int> rgBiasShape = 
new List<int>() { 
m_nNumOutput };
 
  541                    wtFiller.
Fill(blobWts1);
 
  572                        biasFiller.
Fill(blobBias1);
 
  609            for (
int i = 1; i < colBottom.
Count; i++)
 
  611                m_log.
CHECK(
Utility.Compare<
int>(colBottom[0].shape(), colBottom[i].shape()), 
"Shape mismatch - bottom[0]: '" + colBottom[0].shape_string + 
"' vs. bottom[" + i.ToString() + 
"]: '" + colBottom[i].shape_string + 
"'");
 
  618            List<int> rgTopShape = 
new List<int>();
 
  622                rgTopShape.Add(colBottom[0].shape(i));
 
  632            for (
int i = 0; i < colTop.
Count; i++)
 
  638                m_nConvOutSpatialDim = colBottom[0].count(nFirstSpatialAxis);
 
  640                m_nConvOutSpatialDim = colTop[0].count(nFirstSpatialAxis);
 
  642            m_nColOffset = m_nKernelDim * m_nConvOutSpatialDim;
 
  643            m_nOutputOffset = m_nConvOutChannels * m_nConvOutSpatialDim / 
m_nGroup;
 
  655                        rgConvInputShapeData[i] = (T)Convert.ChangeType(colTop[0].shape(
m_nChannelAxis + i), typeof(T));
 
  657                        rgConvInputShapeData[i] = (T)Convert.ChangeType(colBottom[0].shape(
m_nChannelAxis + i), typeof(T));
 
  681            m_nNumKernelsIm2col = m_nConvInChannels * m_nConvOutSpatialDim;
 
  693                    m_blobBiasMultiplier.
Reshape(rgBiasMultShape);
 
  694                    m_blobBiasMultiplier.
SetData(1.0);
 
  735        protected void forward_gemm(
long hInput, 
int nInputOffset, 
long hWeights, 
long hOutput, 
int nOutputOffset, 
bool bSkipIm2Col = 
false)
 
  737            long hColBuff = hInput;
 
  738            int nColBuffOffset = nInputOffset;
 
  745                hColBuff = m_blobColBuffer.
gpu_data;
 
  750            m_cuda.gemm(
false, 
false, m_nConvOutChannels / 
m_nGroup, m_nConvOutSpatialDim, m_nKernelDim, 
m_tOne, hWeights, hColBuff, 
m_tZero, hOutput, 0, nColBuffOffset, nOutputOffset, 
m_nGroup, 
m_nWeightOffset, m_nColOffset, m_nOutputOffset); 
 
  762        protected void forward_bias(
long hOutput, 
int nOutputOffset, 
long hBias)
 
  764            m_cuda.gemm(
false, 
false, 
m_nNumOutput, 
m_nOutSpatialDim, 1, 
m_tOne, hBias, m_blobBiasMultiplier.
gpu_data, 
m_tOne, hOutput, 0, 0, nOutputOffset);
 
  778        protected void backward_gemm(
long hOutput, 
int nOutputOffset, 
long hWeights, 
long hInput, 
int nInputOffset)
 
  781            int nColBuffOffset = 0;
 
  786                nColBuffOffset = nInputOffset;
 
  794            m_cuda.gemm(
true, 
false, m_nKernelDim, m_nConvOutSpatialDim, m_nConvOutChannels / 
m_nGroup, 
m_tOne, hWeights, hOutput, 
m_tZero, hColBuff, 0, nOutputOffset, nColBuffOffset, 
m_nGroup, 
m_nWeightOffset, m_nOutputOffset, m_nColOffset);
 
  797                conv_col2im(hColBuff, nColBuffOffset, hInput, nInputOffset);
 
  811        protected void weight_gemm(
long hInput, 
int nInputOffset, 
long hOutput, 
int nOutputOffset, 
long hWeights)
 
  813            long hColBuff = hInput;
 
  814            int nColBuffOffset = nInputOffset;
 
  819                hColBuff = m_blobColBuffer.
gpu_data;
 
  828            m_cuda.gemm(
false, 
true, m_nConvOutChannels / 
m_nGroup, m_nKernelDim, m_nConvOutSpatialDim, 
m_tOne, hOutput, hColBuff, 
m_tOne, hWeights, nOutputOffset, nColBuffOffset, 0, 
m_nGroup, m_nOutputOffset, m_nColOffset, 
m_nWeightOffset);
 
  842            m_cuda.gemv(
false, 
m_nNumOutput, 
m_nOutSpatialDim, 
m_tOne, hInput, m_blobBiasMultiplier.
gpu_data, 
m_tOne, hBias, nInputOffset, 0, 0);
 
  867        private void conv_im2col(
long hData, 
int nDataOffset, 
long hColBuff, 
int nColBuffOffset)
 
  880                              val_at(rgConvInputShape, 1),
 
  881                              val_at(rgConvInputShape, 2),
 
  911        private void conv_col2im(
long hColBuff, 
int nColBuffOffset, 
long hData, 
int nDataOffset)
 
  924                              val_at(rgConvInputShape, 1),
 
  925                              val_at(rgConvInputShape, 2),
 
  939                m_cuda.col2im_nd(hColBuff, 
 
The Log class provides general output in text form.
 
void CHECK(bool b, string str)
Test a flag for true.
 
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
 
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
 
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
 
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
 
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
 
The Utility class provides general utility funtions.
 
The BlobCollection contains a list of Blobs.
 
void Add(Blob< T > b)
Add a new Blob to the collection.
 
int Count
Returns the number of items in the collection.
 
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
 
The Blob is the main holder of data that moves through the Layers of the Net.
 
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
 
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
 
string shape_string
Returns a string describing the Blob's shape.
 
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
 
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
 
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
 
long gpu_shape
Returns the shape GPU handle used by the CudaDnn connection. The shape data contains the shape inform...
 
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
 
string Name
Get/set the name of the Blob.
 
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
 
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
 
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
 
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
 
WorkspaceArgs(long hData, ulong lSize)
The WorkspaceArgs constructor.
 
Abstract Filler class used to fill blobs with values.
 
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
 
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
 
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
 
int m_nBottomDim
The bottom dimension.
 
bool m_bIs1x1
Whether or not the kernel is 1x1.
 
int m_nNumOutput
The number of outputs.
 
int m_nTopDim
The top dimension.
 
List< int > m_rgOutputShape
The spatial dimensions of the output.
 
override void dispose()
Releases all GPU and host resources used by the Layer.
 
List< int > m_rgColBufferShape
The spatial dimensionss of the col_buffer.
 
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
 
int m_nOutSpatialDim
The output spatial dimension.
 
int m_nChannelAxis
The channel axis.
 
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
 
int m_nChannels
The number of channels in each item.
 
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
 
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
 
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
 
abstract bool reverse_dimensions()
reverse_dimensions should return true iff we are implementing deconv, so that conv helpers know which...
 
int m_nNumSpatialAxes
The number of spatial axes.
 
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
 
override int MinBottomBlobs
Returns the minimum number of required bottom Blobs: input
 
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: output
 
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
 
override bool EqualNumBottomTopBlobs
Returns that there are an equal number of top and bottom Blobs.
 
Blob< T > m_blobStride
The spatial dimensions of the stride.
 
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
 
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
 
List< int > m_rgBottomShape
The buttom shape.
 
BaseConvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The BaseConvolutionLayer constructor.
 
int m_nWeightOffset
The weight offset used.
 
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
 
Blob< T > m_blobPad
The spatial dimensions of the padding.
 
int m_nNum
The number of items in the batch.
 
abstract void compute_output_shape()
Compute height_out and width_out from other parameters.
 
Blob< T > m_blobConvInputShape
The spatial dimensions of the convolution input.
 
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
 
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
 
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
 
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
 
bool m_bForceNDim2col
Whether or not to force n-dim 2 column.
 
int input_shape(int i)
Returns the spatial dimensions of the input.
 
bool m_bBiasTerm
Whether or not to use bias.
 
An interface for the units of computation which can be composed into a Net.
 
Log m_log
Specifies the Log for output.
 
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
 
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
 
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
 
T m_tZero
Specifies a generic type equal to 0.0.
 
T m_tOne
Specifies a generic type equal to 1.0.
 
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
 
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
 
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
 
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
 
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
 
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
 
Specifies the parameters for the ConvolutionLayer. The default weight filler is set to the XavierFill...
 
FillerParameter weight_filler
The filler for the weight. The default is set to use the 'xavier' filler.
 
uint group
The group size for group convolution.
 
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
 
bool force_nd_im2col
Whether to force use of the general ND convolution, even if a specific implementation for blobs of th...
 
int axis
The axis to interpret as 'channels' when performing convolution. Preceding dimensions are treated as ...
 
FillerParameter bias_filler
The filler for the bias. The default is set to use the 'constant = 0.1' filler.
 
bool bias_term
Whether to have bias terms or not.
 
int cudnn_workspace_limit
Specifies the workspace limit used by cuDnn. A value of 0 directs cuDNN to use the fastest algorithm ...
 
uint num_output
The number of outputs for the layer.
 
bool cudnn_workspace_allow_on_groups
When true, allows workspace usage on groups > 1 (default = false).
 
uint? stride_h
The stride height (2D only)
 
List< uint > kernel_size
Kernel size is given as a single value for equal dimensions in all spatial dimensions,...
 
List< uint > dilation
Factor used to dilate the kernel, (implicitly) zero-filling the resulting holes. (Kernel dilation is ...
 
uint? stride_w
The stride width (2D only)
 
uint? pad_h
The padding height (2D only)
 
uint? kernel_h
The kernel height (2D only)
 
List< uint > stride
Stride is given as a single value for equal dimensions in all spatial dimensions, or once per spatial...
 
uint? kernel_w
The kernel width (2D only)
 
uint? pad_w
The padding width (2D only)
 
List< uint > pad
Pad is given as a single value for equal dimensions in all spatial dimensions, or once per spatial di...
 
Specifies the base parameter for all layers.
 
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
 
string name
Specifies the name of this LayerParameter.
 
bool use_halfsize
Specifies whether or not to use half sized memory or not.
 
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
 
The MyCaffe.common namespace contains common MyCaffe classes.
 
BLOB_TYPE
Defines the tpe of data held by a given Blob.
 
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
 
The MyCaffe.fillers namespace contains all fillers including the Filler class.
 
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
 
The MyCaffe.param namespace contains parameters used to create models.
 
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...