2using System.Collections.Generic;
104 int m_nNumKernelsIm2col;
105 int m_nNumKernelsCol2im;
106 int m_nConvOutChannels;
107 int m_nConvInChannels;
108 int m_nConvOutSpatialDim;
116 long m_hWorkspaceData = 0;
117 ulong m_lWorkspaceSize = 0;
118 bool m_bWorkspaceOwner =
false;
145 m_blobColBuffer =
new Blob<T>(cuda, log);
148 m_blobBiasMultiplier =
new Blob<T>(cuda, log);
162 m_blobBiasMultiplier.
Dispose();
164 if (m_bWorkspaceOwner && m_hWorkspaceData != 0)
166 m_cuda.DisableGhostMemory();
167 m_cuda.FreeMemory(m_hWorkspaceData);
168 m_cuda.ResetGhostMemory();
169 m_hWorkspaceData = 0;
170 m_bWorkspaceOwner =
false;
192 ulong lWorkspaceLimitBytes = ulong.MaxValue;
196 if (lWorkspaceLimitBytes != ulong.MaxValue)
197 lWorkspaceLimitBytes *= 16;
201 lWorkspaceLimitBytes = ulong.MaxValue;
208 lWorkspaceLimitBytes = 0;
210 return lWorkspaceLimitBytes;
221 col.
Add(m_blobColBuffer);
222 col.
Add(m_blobBiasMultiplier);
237 m_bWorkspaceOwner =
true;
238 return new common.
WorkspaceArgs(m_hWorkspaceData, m_lWorkspaceSize);
248 if (!m_bWorkspaceOwner && base.setWorkspace(lSizeInBytes))
251 m_bWorkspaceOwner =
true;
253 if (lSizeInBytes < m_lWorkspaceSize)
256 m_lWorkspaceSize = lSizeInBytes;
257 m_cuda.DisableGhostMemory();
259 if (m_hWorkspaceData != 0)
260 m_cuda.FreeMemory(m_hWorkspaceData);
262 if (m_lWorkspaceSize > 0)
263 m_hWorkspaceData =
m_cuda.AllocMemory((
long)m_lWorkspaceSize);
265 m_cuda.ResetGhostMemory();
277 base.ReInitializeParameters(target);
311 int nNumAxes = colBottom[0].num_axes;
318 List<int> rgSpaitalDimBlobShape =
new List<int>() { Math.Max(
m_nNumSpatialAxes, 1) };
328 rgKernelShape[0] = (T)Convert.ChangeType(p.
kernel_h.Value, typeof(T));
329 rgKernelShape[1] = (T)Convert.ChangeType(p.
kernel_w.Value, typeof(T));
334 m_log.
CHECK(nNumKernelDims == 1 || nNumKernelDims ==
m_nNumSpatialAxes,
"Kernel size must be specified once, or once per spatial dimension (kernel_size specified " + nNumKernelDims.ToString() +
" times; " +
m_nNumSpatialAxes.ToString() +
" spatial dims);");
338 int nIdx = (nNumKernelDims == 1) ? 0 : i;
339 rgKernelShape[i] = (T)Convert.ChangeType(p.
kernel_size[nIdx], typeof(T));
345 m_log.
CHECK_GT((
int)Convert.ChangeType(rgKernelShape[i], typeof(
int)), 0,
"Filter dimension must be non-zero.");
358 m_log.
CHECK_EQ(0, p.
stride.Count,
"Either stride_size or stride_h/w should be specified; not both.");
359 rgStrideData[0] = (T)Convert.ChangeType(p.
stride_h.Value, typeof(T));
360 rgStrideData[1] = (T)Convert.ChangeType(p.
stride_w.Value, typeof(T));
364 int nNumStrideDims = p.
stride.Count;
365 m_log.
CHECK(nNumStrideDims == 0 || nNumStrideDims == 1 || nNumStrideDims ==
m_nNumSpatialAxes,
"Stride size must be specified once, or once per spatial dimension (stride specified " + nNumStrideDims.ToString() +
" times; " +
m_nNumSpatialAxes.ToString() +
" spatial dims);");
366 int nDefaultStride = 1;
370 if (nNumStrideDims == 0)
372 rgStrideData[i] = (T)Convert.ChangeType(nDefaultStride, typeof(T));
376 int nIdx = (nNumStrideDims == 1) ? 0 : i;
377 rgStrideData[i] = (T)Convert.ChangeType(p.
stride[nIdx], typeof(T));
379 m_log.
CHECK_GT((
int)Convert.ChangeType(rgStrideData[i], typeof(
int)), 0,
"Stride dimension must be non-zero.");
387 m_blobPad.Reshape(rgSpaitalDimBlobShape);
388 T[] rgPadData =
m_blobPad.mutable_cpu_data;
393 m_log.
CHECK_EQ(0, p.
pad.Count,
"Either pad_size or pad_h/w should be specified; not both.");
394 rgPadData[0] = (T)Convert.ChangeType(p.
pad_h.Value, typeof(T));
395 rgPadData[1] = (T)Convert.ChangeType(p.
pad_w.Value, typeof(T));
399 int nNumPadDims = p.
pad.Count;
400 m_log.
CHECK(nNumPadDims == 0 || nNumPadDims == 1 || nNumPadDims ==
m_nNumSpatialAxes,
"Pad size must be specified once, or once per spatial dimension (pad specified " + nNumPadDims.ToString() +
" times; " +
m_nNumSpatialAxes.ToString() +
" spatial dims);");
405 if (nNumPadDims == 0)
407 rgPadData[i] = (T)Convert.ChangeType(nDefaultPad, typeof(T));
411 int nIdx = (nNumPadDims == 1) ? 0 : i;
412 rgPadData[i] = (T)Convert.ChangeType(p.
pad[nIdx], typeof(T));
423 int nNumDilationDims = p.
dilation.Count;
425 m_log.
CHECK(nNumDilationDims == 0 || nNumDilationDims == 1 || nNumDilationDims ==
m_nNumSpatialAxes,
"Dilation size must be specified once, or once per spatial dimension (dilation specified " + nNumDilationDims.ToString() +
" times; " +
m_nNumSpatialAxes.ToString() +
" spatial dims);");
426 int nDefaultDilation = 1;
430 if (nNumDilationDims == 0)
432 rgDilationData[i] = (T)Convert.ChangeType(nDefaultDilation, typeof(T));
436 int nIdx = (nNumDilationDims == 1) ? 0 : i;
437 rgDilationData[i] = (T)Convert.ChangeType(p.
dilation[nIdx], typeof(T));
450 if (!(
val_at(rgKernelShape, i) == 1 &&
451 val_at(rgStrideData, i) == 1 &&
452 val_at(rgPadData, i) == 0))
483 List<int> rgWeightShape =
new List<int>();
484 rgWeightShape.Add(m_nConvOutChannels);
485 rgWeightShape.Add(m_nConvInChannels /
m_nGroup);
489 rgWeightShape.Add(
val_at(rgKernelShape, i));
494 List<int> rgBiasShape =
new List<int>() {
m_nNumOutput };
541 wtFiller.
Fill(blobWts1);
572 biasFiller.
Fill(blobBias1);
609 for (
int i = 1; i < colBottom.
Count; i++)
611 m_log.
CHECK(
Utility.Compare<
int>(colBottom[0].shape(), colBottom[i].shape()),
"Shape mismatch - bottom[0]: '" + colBottom[0].shape_string +
"' vs. bottom[" + i.ToString() +
"]: '" + colBottom[i].shape_string +
"'");
618 List<int> rgTopShape =
new List<int>();
622 rgTopShape.Add(colBottom[0].shape(i));
632 for (
int i = 0; i < colTop.
Count; i++)
638 m_nConvOutSpatialDim = colBottom[0].count(nFirstSpatialAxis);
640 m_nConvOutSpatialDim = colTop[0].count(nFirstSpatialAxis);
642 m_nColOffset = m_nKernelDim * m_nConvOutSpatialDim;
643 m_nOutputOffset = m_nConvOutChannels * m_nConvOutSpatialDim /
m_nGroup;
655 rgConvInputShapeData[i] = (T)Convert.ChangeType(colTop[0].shape(
m_nChannelAxis + i), typeof(T));
657 rgConvInputShapeData[i] = (T)Convert.ChangeType(colBottom[0].shape(
m_nChannelAxis + i), typeof(T));
681 m_nNumKernelsIm2col = m_nConvInChannels * m_nConvOutSpatialDim;
693 m_blobBiasMultiplier.
Reshape(rgBiasMultShape);
694 m_blobBiasMultiplier.
SetData(1.0);
735 protected void forward_gemm(
long hInput,
int nInputOffset,
long hWeights,
long hOutput,
int nOutputOffset,
bool bSkipIm2Col =
false)
737 long hColBuff = hInput;
738 int nColBuffOffset = nInputOffset;
745 hColBuff = m_blobColBuffer.
gpu_data;
750 m_cuda.gemm(
false,
false, m_nConvOutChannels /
m_nGroup, m_nConvOutSpatialDim, m_nKernelDim,
m_tOne, hWeights, hColBuff,
m_tZero, hOutput, 0, nColBuffOffset, nOutputOffset,
m_nGroup,
m_nWeightOffset, m_nColOffset, m_nOutputOffset);
762 protected void forward_bias(
long hOutput,
int nOutputOffset,
long hBias)
764 m_cuda.gemm(
false,
false,
m_nNumOutput,
m_nOutSpatialDim, 1,
m_tOne, hBias, m_blobBiasMultiplier.
gpu_data,
m_tOne, hOutput, 0, 0, nOutputOffset);
778 protected void backward_gemm(
long hOutput,
int nOutputOffset,
long hWeights,
long hInput,
int nInputOffset)
781 int nColBuffOffset = 0;
786 nColBuffOffset = nInputOffset;
794 m_cuda.gemm(
true,
false, m_nKernelDim, m_nConvOutSpatialDim, m_nConvOutChannels /
m_nGroup,
m_tOne, hWeights, hOutput,
m_tZero, hColBuff, 0, nOutputOffset, nColBuffOffset,
m_nGroup,
m_nWeightOffset, m_nOutputOffset, m_nColOffset);
797 conv_col2im(hColBuff, nColBuffOffset, hInput, nInputOffset);
811 protected void weight_gemm(
long hInput,
int nInputOffset,
long hOutput,
int nOutputOffset,
long hWeights)
813 long hColBuff = hInput;
814 int nColBuffOffset = nInputOffset;
819 hColBuff = m_blobColBuffer.
gpu_data;
828 m_cuda.gemm(
false,
true, m_nConvOutChannels /
m_nGroup, m_nKernelDim, m_nConvOutSpatialDim,
m_tOne, hOutput, hColBuff,
m_tOne, hWeights, nOutputOffset, nColBuffOffset, 0,
m_nGroup, m_nOutputOffset, m_nColOffset,
m_nWeightOffset);
842 m_cuda.gemv(
false,
m_nNumOutput,
m_nOutSpatialDim,
m_tOne, hInput, m_blobBiasMultiplier.
gpu_data,
m_tOne, hBias, nInputOffset, 0, 0);
867 private void conv_im2col(
long hData,
int nDataOffset,
long hColBuff,
int nColBuffOffset)
880 val_at(rgConvInputShape, 1),
881 val_at(rgConvInputShape, 2),
911 private void conv_col2im(
long hColBuff,
int nColBuffOffset,
long hData,
int nDataOffset)
924 val_at(rgConvInputShape, 1),
925 val_at(rgConvInputShape, 2),
939 m_cuda.col2im_nd(hColBuff,
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
The Utility class provides general utility funtions.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
string shape_string
Returns a string describing the Blob's shape.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
long gpu_shape
Returns the shape GPU handle used by the CudaDnn connection. The shape data contains the shape inform...
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
WorkspaceArgs(long hData, ulong lSize)
The WorkspaceArgs constructor.
Abstract Filler class used to fill blobs with values.
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
int m_nBottomDim
The bottom dimension.
bool m_bIs1x1
Whether or not the kernel is 1x1.
int m_nNumOutput
The number of outputs.
int m_nTopDim
The top dimension.
List< int > m_rgOutputShape
The spatial dimensions of the output.
override void dispose()
Releases all GPU and host resources used by the Layer.
List< int > m_rgColBufferShape
The spatial dimensionss of the col_buffer.
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
int m_nOutSpatialDim
The output spatial dimension.
int m_nChannelAxis
The channel axis.
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
int m_nChannels
The number of channels in each item.
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
abstract bool reverse_dimensions()
reverse_dimensions should return true iff we are implementing deconv, so that conv helpers know which...
int m_nNumSpatialAxes
The number of spatial axes.
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
override int MinBottomBlobs
Returns the minimum number of required bottom Blobs: input
override int MinTopBlobs
Returns the minimum number of required top (output) Blobs: output
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
override bool EqualNumBottomTopBlobs
Returns that there are an equal number of top and bottom Blobs.
Blob< T > m_blobStride
The spatial dimensions of the stride.
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
List< int > m_rgBottomShape
The buttom shape.
BaseConvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The BaseConvolutionLayer constructor.
int m_nWeightOffset
The weight offset used.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Blob< T > m_blobPad
The spatial dimensions of the padding.
int m_nNum
The number of items in the batch.
abstract void compute_output_shape()
Compute height_out and width_out from other parameters.
Blob< T > m_blobConvInputShape
The spatial dimensions of the convolution input.
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
bool m_bForceNDim2col
Whether or not to force n-dim 2 column.
int input_shape(int i)
Returns the spatial dimensions of the input.
bool m_bBiasTerm
Whether or not to use bias.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
T m_tZero
Specifies a generic type equal to 0.0.
T m_tOne
Specifies a generic type equal to 1.0.
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Specifies the parameters for the ConvolutionLayer. The default weight filler is set to the XavierFill...
FillerParameter weight_filler
The filler for the weight. The default is set to use the 'xavier' filler.
uint group
The group size for group convolution.
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
bool force_nd_im2col
Whether to force use of the general ND convolution, even if a specific implementation for blobs of th...
int axis
The axis to interpret as 'channels' when performing convolution. Preceding dimensions are treated as ...
FillerParameter bias_filler
The filler for the bias. The default is set to use the 'constant = 0.1' filler.
bool bias_term
Whether to have bias terms or not.
int cudnn_workspace_limit
Specifies the workspace limit used by cuDnn. A value of 0 directs cuDNN to use the fastest algorithm ...
uint num_output
The number of outputs for the layer.
bool cudnn_workspace_allow_on_groups
When true, allows workspace usage on groups > 1 (default = false).
uint? stride_h
The stride height (2D only)
List< uint > kernel_size
Kernel size is given as a single value for equal dimensions in all spatial dimensions,...
List< uint > dilation
Factor used to dilate the kernel, (implicitly) zero-filling the resulting holes. (Kernel dilation is ...
uint? stride_w
The stride width (2D only)
uint? pad_h
The padding height (2D only)
uint? kernel_h
The kernel height (2D only)
List< uint > stride
Stride is given as a single value for equal dimensions in all spatial dimensions, or once per spatial...
uint? kernel_w
The kernel width (2D only)
uint? pad_w
The padding width (2D only)
List< uint > pad
Pad is given as a single value for equal dimensions in all spatial dimensions, or once per spatial di...
Specifies the base parameter for all layers.
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
string name
Specifies the name of this LayerParameter.
bool use_halfsize
Specifies whether or not to use half sized memory or not.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...