2using System.Collections.Generic;
57 long m_hBottomDesc = 0;
88 m_blobScale =
new Blob<T>(cuda, log);
90 m_blobSquareInput =
new Blob<T>(cuda, log);
92 m_blobSquareOutput =
new Blob<T>(cuda, log);
94 m_blobPoolOutput =
new Blob<T>(cuda, log);
96 m_blobPowerOutput =
new Blob<T>(cuda, log);
98 m_blobProductInput =
new Blob<T>(cuda, log);
112 if (m_splitLayer !=
null)
118 if (m_squareLayer !=
null)
121 m_squareLayer =
null;
124 if (m_poolLayer !=
null)
130 if (m_powerLayer !=
null)
136 if (m_powerLayer !=
null)
142 if (m_hNormDesc != 0)
144 m_cuda.FreeLRNDesc(m_hNormDesc);
148 if (m_hBottomDesc != 0)
150 m_cuda.FreeTensorDesc(m_hBottomDesc);
156 m_cuda.FreeTensorDesc(m_hTopDesc);
162 m_cuda.FreeCuDNN(m_hCuDnn);
166 if (m_hTempData1 != 0)
168 m_cuda.FreeMemory(m_hTempData1);
172 if (m_hTempData2 != 0)
174 m_cuda.FreeMemory(m_hTempData2);
194 col.
Add(m_blobScale);
198 col.
Add(m_blobSquareInput);
199 col.
Add(m_blobSquareOutput);
200 col.
Add(m_blobPoolOutput);
201 col.
Add(m_blobPowerOutput);
202 col.
Add(m_blobProductInput);
231 m_log.
CHECK_EQ(m_nSize % 2, 1,
"LRN only supports odd values for local_size.");
232 m_nPrePad = (m_nSize - 1) / 2;
241 m_colSplitTopVec.
Add(m_blobProductInput);
242 m_colSplitTopVec.
Add(m_blobSquareInput);
245 m_splitLayer.
Setup(colBottom, m_colSplitTopVec);
250 m_colSquareBottomVec.
Add(m_blobSquareInput);
251 m_colSquareTopVec.
Add(m_blobSquareOutput);
255 m_squareLayer.
Setup(m_colSquareBottomVec, m_colSquareTopVec);
259 m_colPoolTopVec.
Add(m_blobPoolOutput);
265 m_poolLayer.
Setup(m_colSquareTopVec, m_colPoolTopVec);
270 m_colPowerTopVec.
Add(m_blobPowerOutput);
276 m_powerLayer.
Setup(m_colPoolTopVec, m_colPowerTopVec);
281 m_colProductBottomVec.
Add(m_blobProductInput);
282 m_colProductBottomVec.
Add(m_blobPowerOutput);
286 m_productLayer.
Setup(m_colProductBottomVec, colTop);
295 m_hCuDnn =
m_cuda.CreateCuDNN();
296 m_hNormDesc =
m_cuda.CreateLRNDesc();
297 m_hBottomDesc =
m_cuda.CreateTensorDesc();
298 m_hTopDesc =
m_cuda.CreateTensorDesc();
308 m_log.
CHECK_EQ(4, colBottom[0].num_axes,
"Input must have 4 axes, corresponding to (num, channels, height, width)");
309 m_nNum = colBottom[0].num;
310 m_nChannels = colBottom[0].channels;
311 m_nHeight = colBottom[0].height;
312 m_nWidth = colBottom[0].width;
317 colTop[0].
Reshape(m_nNum, m_nChannels, m_nHeight, m_nWidth);
319 m_blobScale.
Reshape(m_nNum, m_nChannels, m_nHeight, m_nWidth);
323 m_splitLayer.
Reshape(colBottom, m_colSplitTopVec);
324 m_squareLayer.
Reshape(m_colSquareBottomVec, m_colSquareTopVec);
325 m_poolLayer.
Reshape(m_colSquareTopVec, m_colPoolTopVec);
326 m_powerLayer.
Reshape(m_colPoolTopVec, m_colPowerTopVec);
327 m_productLayer.
Reshape(m_colProductBottomVec, colTop);
334 m_cuda.SetTensorDesc(m_hBottomDesc, m_nNum, m_nChannels, m_nHeight, m_nWidth);
335 m_cuda.SetTensorDesc(m_hTopDesc, m_nNum, m_nChannels, m_nHeight, m_nWidth);
336 m_cuda.SetLRNDesc(m_hNormDesc, (uint)m_nSize, m_dfAlpha, m_dfBeta, m_dfK);
340 int nTotalSize = m_nNum * m_nChannels * m_nHeight * m_nWidth;
342 if (nTotalSize > m_nTempDataSize)
344 if (m_hTempData1 != 0)
346 m_cuda.FreeMemory(m_hTempData1);
350 if (m_hTempData2 != 0)
352 m_cuda.FreeMemory(m_hTempData2);
356 m_hTempData1 =
m_cuda.AllocMemory(nTotalSize);
357 m_hTempData2 =
m_cuda.AllocMemory(nTotalSize);
358 m_nTempDataSize = nTotalSize;
418 CrossChannelForward(colBottom, colTop);
422 WithinChannelForward(colBottom, colTop);
426 m_log.
FAIL(
"Unknown normalization region.");
448 CrossChannelBackward(colTop, rgbPropagateDown, colBottom);
452 WithinChannelBackward(colTop, rgbPropagateDown, colBottom);
456 m_log.
FAIL(
"Unknown normalization region.");
473 long hBottomData = colBottom[0].gpu_data;
474 long hTopData = colTop[0].mutable_gpu_data;
477 m_cuda.DivisiveNormalizationForward(m_hCuDnn, m_hNormDesc,
m_tOne, m_hBottomDesc, hBottomData, m_hTempData1, m_hTempData2,
m_tZero, m_hTopDesc, hTopData);
479 m_cuda.LRNCrossChannelForward(m_hCuDnn, m_hNormDesc,
m_tOne, m_hBottomDesc, hBottomData,
m_tZero, m_hTopDesc, hTopData);
494 long hTopDiff = colTop[0].gpu_diff;
495 long hTopData = colTop[0].gpu_data;
496 long hBottomData = colBottom[0].gpu_data;
497 long hBottomDiff = colBottom[0].mutable_gpu_diff;
500 m_cuda.DivisiveNormalizationBackward(m_hCuDnn, m_hNormDesc,
m_tOne, m_hBottomDesc, hBottomData, hTopDiff, m_hTempData1, m_hTempData2,
m_tZero, m_hBottomDesc, hBottomDiff);
502 m_cuda.LRNCrossChannelBackward(m_hCuDnn, m_hNormDesc,
m_tOne, m_hTopDesc, hTopData, m_hTopDesc, hTopDiff, m_hBottomDesc, hBottomData,
m_tZero, m_hBottomDesc, hBottomDiff);
508 long hBottomData = colBottom[0].gpu_data;
509 long hTopData = colTop[0].mutable_gpu_data;
514 int nThreads = m_nNum * m_nHeight * m_nWidth;
515 m_cuda.lrn_fillscale(nThreads, hBottomData, m_nNum, m_nChannels, m_nHeight, m_nWidth, m_nSize,
convert(m_dfAlpha / m_nSize),
convert(m_dfK), hScaleData);
517 nThreads = colBottom[0].count();
518 m_cuda.lrn_computeoutput(nThreads, hBottomData, hScaleData,
convert(-m_dfBeta), hTopData);
523 m_splitLayer.
Forward(colBottom, m_colSplitTopVec);
524 m_squareLayer.
Forward(m_colSquareBottomVec, m_colSquareTopVec);
525 m_poolLayer.
Forward(m_colSquareTopVec, m_colPoolTopVec);
526 m_powerLayer.
Forward(m_colPoolTopVec, m_colPowerTopVec);
527 m_productLayer.
Forward(m_colProductBottomVec, colTop);
532 int nThreads = m_nNum * m_nHeight * m_nWidth;
533 long hBottomData = colBottom[0].gpu_data;
534 long hTopData = colTop[0].gpu_data;
535 long hScaleData = m_blobScale.
gpu_data;
536 long hTopDiff = colTop[0].gpu_diff;
537 long hBottomDiff = colBottom[0].mutable_gpu_diff;
539 m_cuda.lrn_computediff(nThreads, hBottomData, hTopData, hScaleData, hTopDiff, m_nNum, m_nChannels, m_nHeight, m_nWidth, m_nSize,
convert(-m_dfBeta),
convert(2.0 * m_dfAlpha * m_dfBeta / m_nSize), hBottomDiff);
544 if (rgbPropagateDown[0])
546 List<bool> rgbProductPropagateDown =
Utility.
Create<
bool>(2,
true);
547 m_productLayer.
Backward(colTop, rgbProductPropagateDown, m_colProductBottomVec);
548 m_powerLayer.
Backward(m_colPowerTopVec, rgbPropagateDown, m_colPoolTopVec);
549 m_poolLayer.
Backward(m_colPoolTopVec, rgbPropagateDown, m_colSquareTopVec);
550 m_squareLayer.
Backward(m_colSquareTopVec, rgbPropagateDown, m_colSquareBottomVec);
551 m_splitLayer.
Backward(m_colSplitTopVec, rgbPropagateDown, colBottom);
The Log class provides general output in text form.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
string Name
Get/set the name of the Blob.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The EltwiseLayer computes elementwise oeprations, such as product and sum, along multiple input blobs...
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
The "Local Response Normalization" LRNLayer is used to normalize the input in a local region across o...
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: input
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation using the Engine.CUDNN mode.
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation using the Engine.CAFFE mode.
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the inputs using the Engine.CAFFE mode.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation using either the Engine.CUDNN or Engine.CAFFE mode depending on the engine parame...
LRNLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The LRNLayer constructor.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the inputs using either the Engine.CUDNN or Engine....
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: lrn
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer for both Engine.CUDNN and Engine.CAFFE modes.
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the inputs using the Engine.CUDNN mode.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
T m_tZero
Specifies a generic type equal to 0.0.
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
T m_tOne
Specifies a generic type equal to 1.0.
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
void Dispose()
Releases all GPU and host resources used by the Layer.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
LayerParameter.LayerType m_type
Specifies the Layer type.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) Blob to have the same shape as the bottom (input) Blob.
The PoolingLayer pools the input image by taking the max, average, etc. within regions....
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
The PowerLayer computes the power of the input. This layer is initialized with the MyCaffe....
The SplitLayer creates a 'split' path in the network by copying the bottom blob into multiple top blo...
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Specifies the parameters for the EltwiseLayer.
EltwiseOp
Defines the operation to perform.
EltwiseOp operation
Specifies the element-wise operation.
List< uint > kernel_size
Kernel size is given as a single value for equal dimensions in all spatial dimensions,...
List< uint > pad
Pad is given as a single value for equal dimensions in all spatial dimensions, or once per spatial di...
Specifies the parameter for the LRNLayer.
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
NormRegion
Defines the normalization region.
double beta
Specifies the beta value used as the power parameter in the normalization formula....
NormRegion norm_region
Specifies the region over which to normalize.
uint local_size
Specifies the local size of the normalization window width.
double alpha
Specifies the alpha value used for variance scaling in the normalization formula. NOTE: cuDNN uses a ...
double k
Specifies the k value used by the normalization parameter. NOTE: cuDNN uses a default of k = 2....
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
LRNParameter lrn_param
Returns the parameter set when initialized with LayerType.LRN
PoolingParameter pooling_param
Returns the parameter set when initialized with LayerType.POOLING
bool use_halfsize
Specifies whether or not to use half sized memory or not.
EltwiseParameter eltwise_param
Returns the parameter set when initialized with LayerType.ELTWISE
PowerParameter power_param
Returns the parameter set when initialized with LayerType.POWER
LayerType
Specifies the layer type.
Specifies the parameters for the PoolingLayer.
PoolingMethod
Defines the pooling method.
PoolingMethod pool
Specifies the pooling method.
double power
Specifies power value in the formula .
double scale
Specifies scale value in the formula .
double shift
Specifies shift value in the formula .
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...