2using System.Collections.Generic;
25 List<double> m_rgdfCoeffs =
new List<double>();
27 Blob<T> m_blobSingleSecondary =
null;
28 bool m_bStableProdGrad;
48 m_blobIdx =
new Blob<T>(cuda, log);
57 if (m_blobSingleSecondary !=
null)
58 m_blobSingleSecondary.
Dispose();
108 m_log.
CHECK(nCoeffSize == 0 || (!m_bCoeffBlob && nCoeffSize == colBottom.
Count)
109 || (m_bCoeffBlob && nCoeffSize == colBottom.
Count - 1),
"Eltwise Layer takes one coefficient per bottom blob.");
116 int nCoeffBlobCount = (m_bCoeffBlob) ? 1 : 0;
134 m_log.
CHECK_EQ(colBottom.
Count, 2,
"Only two inputs allowed when 'allow_single_batch_input' = true.");
136 if (colBottom[1].count() == 1)
137 m_log.
CHECK_EQ(colBottom.
Count, 2,
"Only two inputs allowed when colBottom[1].count() == 1.");
139 for (
int i = 1; i < colBottom.
Count; i++)
141 if (m_bCoeffBlob && i == colBottom.
Count - 1)
143 m_log.
CHECK_EQ(i, colBottom[i].shape(0),
"Dimensions of coeff blob axis 0 must equal the number of bottom blobs (not including the coeff blob itself).");
145 for (
int input_axis = 0, coeff_axis = 1; coeff_axis < colBottom[i].num_axes; input_axis++, coeff_axis++)
147 m_log.
CHECK_EQ(colBottom[0].shape(input_axis), colBottom[i].shape(coeff_axis),
"Each axis i >= 1 of the coeff blob must match the (i-1)th axis of the input.");
152 if (colBottom.
Count == 2 && colBottom[1].count() == 1)
154 if (m_blobSingleSecondary ==
null)
159 double dfVal =
Utility.ConvertVal<T>(colBottom[i].GetData(0));
160 m_blobSingleSecondary.
SetData(dfVal);
166 m_log.
CHECK(
Utility.Compare<
int>(colBottom[i].shape(), colBottom[0].shape(),
false),
"The bottoms should all be of the same shape.");
169 if (m_blobSingleSecondary ==
null)
173 m_log.
CHECK_EQ(colBottom[i].num, 1,
"The batch for the second input must be 1.");
174 m_log.
CHECK_EQ(colBottom[i].count(1), colBottom[0].count(1),
"All shapes other than the first shape must match!");
184 m_blobIdx.
Reshape(colBottom[0].shape());
194 Blob<T> blob = (m_blobSingleSecondary !=
null) ? m_blobSingleSecondary : colBottom[1];
196 int nCount = colTop[0].count();
197 long hTopData = colTop[0].mutable_gpu_data;
208 colBottom[1].gpu_data,
215 m_cuda.mul(nCount, colBottom[0].gpu_data, blob.
gpu_data, hTopData);
217 for (
int i = 2; i < colBottom.
Count; i++)
219 m_cuda.mul(nCount, hTopData, colBottom[i].gpu_data, hTopData);
224 m_cuda.div(nCount, colBottom[0].gpu_data, blob.
gpu_data, hTopData);
226 for (
int i = 2; i < colBottom.
Count; i++)
228 m_cuda.div(nCount, hTopData, colBottom[i].gpu_data, hTopData);
235 int nNum = colTop[0].num;
236 int nDim = nCount / nNum;
237 hCoeffData = colBottom[colBottom.
Count - 1].gpu_data;
240 for (
int i = 0; i < colBottom.
Count - nCoeffCount; i++)
242 long hBottomData = (i == 0 || colBottom.
Count > 3) ? colBottom[i].gpu_data : blob.
gpu_data;
243 m_cuda.coeff_sum_fwd(nCount, nDim, i * nNum, m_rgdfCoeffs[i], hCoeffData, hBottomData, hTopData);
248 m_cuda.set(nCount, hTopData, 0);
250 for (
int i = 0; i < colBottom.
Count; i++)
252 long hBottomData = (i == 0 || colBottom.
Count > 2) ? colBottom[i].gpu_data : blob.
gpu_data;
253 m_cuda.axpy(nCount, m_rgdfCoeffs[i], hBottomData, hTopData);
261 int nNum = colTop[0].num;
262 int nDim = nCount / nNum;
263 hCoeffData = colBottom[colBottom.
Count - 1].gpu_data;
266 for (
int i = 0; i < colBottom.
Count - nCoeffCount; i++)
268 long hBottomData = (i == 0 || colBottom.
Count > 3) ? colBottom[i].gpu_data : blob.
gpu_data;
269 m_cuda.coeff_sub_fwd(nCount, nDim, i * nNum, m_rgdfCoeffs[i], hCoeffData, hBottomData, hTopData);
274 m_cuda.scale(nCount, m_rgdfCoeffs[0], colBottom[0].gpu_data, hTopData);
276 for (
int i = 1; i < colBottom.
Count; i++)
278 long hBottomData = (i == 0 || colBottom.
Count > 2) ? colBottom[i].gpu_data : blob.
gpu_data;
279 m_cuda.axpy(nCount, -1 * m_rgdfCoeffs[i], hBottomData, hTopData);
286 m_cuda.max_fwd(nCount, colBottom[0].gpu_data, colBottom[1].gpu_data, 0, hTopData, hMask);
288 for (
int i = 2; i < colBottom.
Count; i++)
290 m_cuda.max_fwd(nCount, hTopData, colBottom[i].gpu_data, i-1, hTopData, hMask);
296 m_cuda.min_fwd(nCount, colBottom[0].gpu_data, colBottom[1].gpu_data, 0, hTopData, hMask);
298 for (
int i = 2; i < colBottom.
Count; i++)
300 m_cuda.min_fwd(nCount, hTopData, colBottom[i].gpu_data, i - 1, hTopData, hMask);
305 m_log.
FAIL(
"Unknown elementwise operation.");
319 int nCount = colTop[0].count();
320 long hTopData = colTop[0].gpu_data;
321 long hTopDiff = colTop[0].gpu_diff;
323 int nNum = colTop[0].num;
324 int nDim = nCount / nNum;
327 hCoeffData = colBottom[colBottom.
Count - 1].gpu_data;
329 for (
int i = 0; i < colBottom.
Count; i++)
331 if (rgbPropagateDown[i])
333 long hBottomData = colBottom[i].gpu_data;
334 long hBottomDiff = colBottom[i].mutable_gpu_diff;
336 if (i == 1 && m_blobSingleSecondary !=
null)
338 hBottomData = m_blobSingleSecondary.
gpu_data;
345 if (m_bStableProdGrad)
347 bool bInitialized =
false;
348 for (
int j = 0; j < colBottom.
Count; j++)
355 m_cuda.copy(nCount, colBottom[j].gpu_data, hBottomDiff);
360 m_cuda.mul(nCount, colBottom[j].gpu_data, hBottomDiff, hBottomDiff);
366 m_cuda.div(nCount, hTopData, hBottomData, hBottomDiff);
368 m_cuda.mul(nCount, hBottomDiff, hTopDiff, hBottomDiff);
372 m_cuda.mul(nCount, hTopData, hBottomData, hBottomDiff);
373 m_cuda.mul(nCount, hBottomDiff, hTopDiff, hBottomDiff);
379 m_cuda.coeff_sum_bwd(nCount, nDim, i * nNum, m_rgdfCoeffs[i], hCoeffData, hTopDiff, hBottomDiff);
383 if (m_rgdfCoeffs[i] == 1.0)
384 m_cuda.copy(nCount, hTopDiff, hBottomDiff);
386 m_cuda.scale(nCount, m_rgdfCoeffs[i], hTopDiff, hBottomDiff);
393 m_cuda.coeff_sub_bwd(nCount, nDim, i * nNum, m_rgdfCoeffs[i], hCoeffData, hTopDiff, hBottomDiff);
397 double dfScale = (i == 0) ? 1 : -1;
398 m_cuda.scale(nCount, dfScale * m_rgdfCoeffs[i], hTopDiff, hBottomDiff);
404 m_cuda.max_bwd(nCount, hTopDiff, i, hMask, hBottomDiff);
409 m_cuda.min_bwd(nCount, hTopDiff, i, hMask, hBottomDiff);
413 m_log.
FAIL(
"Unknown elementwise operation.");
421 m_cuda.channel_sum(nCount, 1, nNum, colTop[0].channels * colTop[0].count(2), m_blobSingleSecondary.
gpu_diff, colBottom[1].mutable_gpu_diff);
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The EltwiseLayer computes elementwise oeprations, such as product and sum, along multiple input blobs...
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
EltwiseLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The EltwiseLayer constructor.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
The Forward computation.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: output (result of eltwise operation in input...
override int MinBottomBlobs
Returns the minimum required number of bottom (input) Blobs: input1, input2
override void dispose()
Releases all GPU and host resources used by the Layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the input.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
LayerParameter.LayerType m_type
Specifies the Layer type.
LayerParameter layer_param
Returns the LayerParameter for this Layer.
Specifies the parameters for the EltwiseLayer.
EltwiseOp
Defines the operation to perform.
bool allow_single_batch_input
Specifies whether to allow single batch input for the second input (default = false).
List< double > coeff
Specifies the blob-wise coefficient for SUM operation.
bool stable_prod_grad
Specifies whether or not to use an asymptotically slower (for > 2 inputs) but stabler method of compu...
EltwiseOp operation
Specifies the element-wise operation.
bool coeff_blob
If true and the EltwiseOp is SUM, the last bottom blob is a singleton coefficient for the first N-1 b...
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
EltwiseParameter eltwise_param
Returns the parameter set when initialized with LayerType.ELTWISE
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...