2using System.Collections.Generic;
51 bool m_bUseGlobalStats;
52 double m_dfMovingAverageFraction;
57 Blob<T> m_blobBatchSumMultiplier;
59 Blob<T> m_blobSpaitalSumMultiplier;
62 bool m_bScaleBias =
false;
64 long m_hFwdBottomDesc = 0;
65 long m_hFwdTopDesc = 0;
66 long m_hBwdBottomDesc = 0;
67 long m_hBwdTopDesc = 0;
68 long m_hFwdScaleBiasMeanVarDesc = 0;
69 long m_hBwdScaleBiasMeanVarDesc = 0;
73 Blob<T> m_blobPrivateTop =
null;
74 Blob<T> m_blobPrivateBottom =
null;
75 const double CUDNN_BN_MIN_EPSILON = 1e-5;
88 m_blobMean =
new common.Blob<T>(cuda, log);
90 m_blobVariance =
new common.
Blob<T>(cuda, log);
92 m_blobTemp =
new common.
Blob<T>(cuda, log);
94 m_blobXNorm =
new common.
Blob<T>(cuda, log);
96 m_blobBatchSumMultiplier =
new common.
Blob<T>(cuda, log);
98 m_blobNumByChans =
new common.
Blob<T>(cuda, log);
100 m_blobSpaitalSumMultiplier =
new common.
Blob<T>(cuda, log);
108 m_blobPrivateTop =
new Blob<T>(cuda, log);
110 m_blobPrivateBottom =
new Blob<T>(cuda, log);
112 m_blobScaleOnes =
new Blob<T>(cuda, log);
114 m_blobBiasZeros =
new Blob<T>(cuda, log);
122 m_blobMean.Dispose();
126 m_blobBatchSumMultiplier.
Dispose();
128 m_blobSpaitalSumMultiplier.
Dispose();
132 if (m_blobPrivateTop !=
null)
135 m_blobPrivateTop =
null;
138 if (m_blobPrivateBottom !=
null)
141 m_blobPrivateBottom =
null;
144 if (m_blobScaleOnes !=
null)
147 m_blobScaleOnes =
null;
150 if (m_blobBiasZeros !=
null)
153 m_blobBiasZeros =
null;
156 if (m_hBwdBottomDesc != 0)
158 m_cuda.FreeTensorDesc(m_hBwdBottomDesc);
159 m_hBwdBottomDesc = 0;
162 if (m_hBwdScaleBiasMeanVarDesc != 0)
164 m_cuda.FreeTensorDesc(m_hBwdScaleBiasMeanVarDesc);
165 m_hBwdScaleBiasMeanVarDesc = 0;
168 if (m_hBwdTopDesc != 0)
170 m_cuda.FreeTensorDesc(m_hBwdTopDesc);
174 if (m_hFwdBottomDesc != 0)
176 m_cuda.FreeTensorDesc(m_hFwdBottomDesc);
177 m_hFwdBottomDesc = 0;
180 if (m_hFwdScaleBiasMeanVarDesc != 0)
182 m_cuda.FreeTensorDesc(m_hFwdScaleBiasMeanVarDesc);
183 m_hFwdScaleBiasMeanVarDesc = 0;
186 if (m_hFwdTopDesc != 0)
188 m_cuda.FreeTensorDesc(m_hFwdTopDesc);
194 m_cuda.FreeCuDNN(m_hCuDnn);
208 col.
Add(m_blobVariance);
212 col.
Add(m_blobPrivateBottom);
213 col.
Add(m_blobPrivateTop);
217 col.
Add(m_blobScaleOnes);
218 col.
Add(m_blobBiasZeros);
224 col.
Add(m_blobXNorm);
225 col.
Add(m_blobBatchSumMultiplier);
226 col.
Add(m_blobNumByChans);
227 col.
Add(m_blobSpaitalSumMultiplier);
254 base.ReInitializeParameters(target);
258 for (
int i = 0; i < 3; i++)
282 if (colBottom[0].num_axes == 1)
285 m_nChannels = colBottom[0].shape(1);
294 if (m_bScaleBias && !bUseCuDnn)
295 m_bScaleBias =
false;
303 List<int> rgSize =
new List<int>();
304 rgSize.Add(m_nChannels);
351 for (
int i = 0; i < 3; i++)
367 for (
int i = 3; i < 5; i++)
391 int nChannels = colBottom[0].channels;
392 List<int> rgShape =
new List<int>() { 1, nChannels, 1, 1 };
396 m_blobScaleOnes.
Reshape(rgShape);
398 m_blobBiasZeros.
Reshape(rgShape);
402 m_hCuDnn =
m_cuda.CreateCuDNN();
403 m_hFwdBottomDesc =
m_cuda.CreateTensorDesc();
404 m_hFwdTopDesc =
m_cuda.CreateTensorDesc();
405 m_hFwdScaleBiasMeanVarDesc =
m_cuda.CreateTensorDesc();
406 m_hBwdBottomDesc =
m_cuda.CreateTensorDesc();
407 m_hBwdTopDesc =
m_cuda.CreateTensorDesc();
408 m_hBwdScaleBiasMeanVarDesc =
m_cuda.CreateTensorDesc();
410 m_dfEps = Math.Min(m_dfEps, CUDNN_BN_MIN_EPSILON);
412 m_blobMean.Reshape(rgShape);
413 m_blobVariance.
Reshape(rgShape);
415 if (colBottom[0] == colTop[0])
431 m_nChannels = colBottom[0].channels;
439 if (colBottom[0].num_axes >= 1)
440 m_log.
CHECK_EQ(colBottom[0].shape(1), m_nChannels,
"The colBottom[0].shape(1) should equal the channel count '" + m_nChannels.ToString() +
"'.");
444 List<int> rgSize =
new List<int>();
445 rgSize.Add(m_nChannels);
447 m_blobMean.Reshape(rgSize);
448 m_blobVariance.
Reshape(rgSize);
455 rgSize[0] = colBottom[0].shape(0);
456 m_blobBatchSumMultiplier.
Reshape(rgSize);
458 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
459 if (m_blobSpaitalSumMultiplier.
num_axes == 0 ||
460 m_blobSpaitalSumMultiplier.
shape(0) != nSpatialDim)
462 rgSize[0] = nSpatialDim;
463 m_blobSpaitalSumMultiplier.
Reshape(rgSize);
464 m_blobSpaitalSumMultiplier.
SetData(1);
467 int nNumByChans = m_nChannels * colBottom[0].shape(0);
468 if (m_blobNumByChans.
num_axes == 0 ||
469 m_blobNumByChans.
shape(0) != nNumByChans)
471 rgSize[0] = nNumByChans;
472 m_blobNumByChans.
Reshape(rgSize);
473 m_blobBatchSumMultiplier.
SetData(1);
482 int N = colBottom[0].num;
483 int C = colBottom[0].channels;
484 int H = colBottom[0].height;
485 int W = colBottom[0].width;
488 m_cuda.SetTensorDesc(m_hFwdBottomDesc, N, C, H, W);
489 m_cuda.SetTensorDesc(m_hFwdTopDesc, N, C, H, W);
490 m_cuda.SetTensorDesc(m_hBwdBottomDesc, N, C, H, W);
491 m_cuda.SetTensorDesc(m_hBwdTopDesc, N, C, H, W);
494 m_blobMean.Reshape(1, C, 1, 1);
495 m_blobVariance.
Reshape(1, C, 1, 1);
501 m_blobScaleOnes.
Reshape(1, C, 1, 1);
507 m_blobBiasZeros.
Reshape(1, C, 1, 1);
512 m_cuda.DeriveBatchNormDesc(m_hFwdScaleBiasMeanVarDesc, m_hFwdBottomDesc, m_hBwdScaleBiasMeanVarDesc, m_hBwdBottomDesc, m_mode);
514 if (colTop[0] == colBottom[0])
548 long hBottomData = colBottom[0].gpu_data;
549 long hTopData = colTop[0].mutable_gpu_data;
550 int nNum = colBottom[0].shape(0);
551 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
553 if (colBottom[0] != colTop[0])
554 m_cuda.copy(colBottom[0].count(), hBottomData, hTopData);
556 if (m_bUseGlobalStats)
561 if (dfScaleFactor != 0)
562 dfScaleFactor = 1.0 / dfScaleFactor;
564 int nCount = m_blobVariance.
count();
566 m_cuda.scale(nCount, dfScaleFactor,
m_colBlobs[0].gpu_data, m_blobMean.mutable_gpu_data);
572 m_cuda.gemv(
false, m_nChannels * nNum, nSpatialDim, 1.0 / (nNum * nSpatialDim), hBottomData, m_blobSpaitalSumMultiplier.
gpu_data, 0.0, m_blobNumByChans.
mutable_gpu_data);
573 m_cuda.gemv(
true, nNum, m_nChannels, 1.0, m_blobNumByChans.
gpu_data, m_blobBatchSumMultiplier.
gpu_data, 0.0, m_blobMean.mutable_gpu_data);
577 m_cuda.gemm(
false,
false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.
gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.
mutable_gpu_data);
578 m_cuda.gemm(
false,
false, m_nChannels * nNum, nSpatialDim, 1, -1.0, m_blobNumByChans.
gpu_data, m_blobSpaitalSumMultiplier.
gpu_data, 1.0, hTopData);
580 if (!m_bUseGlobalStats)
589 dfVal *= m_dfMovingAverageFraction;
593 m_cuda.axpby(m_blobMean.count(), 1.0, m_blobMean.gpu_data, m_dfMovingAverageFraction,
m_colBlobs[0].mutable_gpu_data);
594 int nM = colBottom[0].count() / m_nChannels;
595 double dfBiasCorrectionFactor = (nM > 1) ? ((
double)nM / (double)(nM - 1)) : 1.0;
596 m_cuda.axpby(m_blobVariance.
count(), dfBiasCorrectionFactor, m_blobVariance.
gpu_data, m_dfMovingAverageFraction,
m_colBlobs[1].mutable_gpu_data);
619 if (colBottom[0] != colTop[0])
621 hTopDiff = colTop[0].gpu_diff;
629 long hBottomDiff = colBottom[0].mutable_gpu_diff;
630 if (m_bUseGlobalStats)
636 long hTopData = m_blobXNorm.
gpu_data;
637 int nNum = colBottom[0].shape()[0];
638 int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));
652 m_cuda.mul(m_blobTemp.
count(), hTopData, hTopDiff, hBottomDiff);
653 m_cuda.gemv(
false, m_nChannels * nNum, nSpatialDim, 1.0, hBottomDiff, m_blobSpaitalSumMultiplier.
gpu_data, 0.0, m_blobNumByChans.
mutable_gpu_data);
654 m_cuda.gemv(
true, nNum, m_nChannels, 1.0, m_blobNumByChans.
gpu_data, m_blobBatchSumMultiplier.
gpu_data, 0.0, m_blobMean.mutable_gpu_data);
657 m_cuda.gemm(
false,
false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.
gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.
mutable_gpu_data);
658 m_cuda.gemm(
false,
false, m_nChannels * nNum, nSpatialDim, 1, 1.0, m_blobNumByChans.
gpu_data, m_blobSpaitalSumMultiplier.
gpu_data, 0.0, hBottomDiff);
661 m_cuda.mul(m_blobTemp.
count(), hTopData, hBottomDiff, hBottomDiff);
665 m_cuda.gemv(
true, nNum, m_nChannels, 1.0, m_blobNumByChans.
gpu_data, m_blobBatchSumMultiplier.
gpu_data, 0.0, m_blobMean.mutable_gpu_data);
669 m_cuda.gemm(
false,
false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.
gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.
mutable_gpu_data);
670 m_cuda.gemm(
false,
false, nNum * m_nChannels, nSpatialDim, 1, 1.0, m_blobNumByChans.
gpu_data, m_blobSpaitalSumMultiplier.
gpu_data, 1.0, hBottomDiff);
673 m_cuda.axpby(m_blobTemp.
count(), 1.0, hTopDiff, -1.0 / (double)(nNum * nSpatialDim), hBottomDiff);
685 long hBottomData = colBottom[0].gpu_data;
686 long hTopData = colTop[0].mutable_gpu_data;
688 if (colTop[0] == colBottom[0])
691 double dfEps = m_dfEps;
697 if (!m_bUseGlobalStats)
699 long hSaveMean = m_blobMean.mutable_gpu_data;
705 double dfFactor = 1.0;
707 if (m_nIteration > 0)
708 dfFactor = 1 - m_dfMovingAverageFraction;
711 m_hFwdBottomDesc, hBottomData,
712 m_hFwdTopDesc, hTopData,
713 m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,
714 dfFactor, hGlobalMean, hGlobalVar, dfEps, hSaveMean, hSaveVar,
true);
720 m_hFwdBottomDesc, hBottomData,
721 m_hFwdTopDesc, hTopData,
722 m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,
723 1.0, hGlobalMean, hGlobalVar, dfEps, 0, 0,
false);
726 if (colTop[0] == colBottom[0])
728 m_blobPrivateBottom.
CopyFrom(colBottom[0]);
729 colTop[0].
CopyFrom(m_blobPrivateTop);
738 long hTopDiff = colTop[0].gpu_diff;
739 long hBottomData = colBottom[0].gpu_data;
740 long hBottomDiff = colBottom[0].mutable_gpu_diff;
741 double dfEps = m_dfEps;
742 long hMean = (m_bUseGlobalStats) ? 0 : m_blobMean.gpu_data;
743 long hVariance = (m_bUseGlobalStats) ? 0 : m_blobVariance.
gpu_data;
748 if (colTop[0] == colBottom[0])
751 m_blobPrivateTop.
CopyFrom(colTop[0],
true);
752 hTopDiff = m_blobPrivateTop.
gpu_diff;
753 hBottomData = m_blobPrivateBottom.
gpu_data;
757 m_hBwdBottomDesc, hBottomData,
758 m_hBwdBottomDesc, hTopDiff,
759 m_hBwdBottomDesc, hBottomDiff,
760 m_hBwdScaleBiasMeanVarDesc, hScaleData, hScaleDiff, hBiasDiff,
761 dfEps, hMean, hVariance);
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Blob(CudaDnn< T > cuda, Log log, bool bIncludeDiff=true, bool bUseHalfSize=false)
The Blob constructor.
int num_axes
Returns the number of axes in the Blob.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Abstract Filler class used to fill blobs with values.
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
The BatchNormLayer normalizes the input to have 0-mean and/or unit (1) variance across the batch....
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int ExactNumBottomBlobs
Returns the exact number of bottom (input) Blobs required: input
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation.
override int ExactNumTopBlobs
Returns the exact number of top (output) Blobs required: batchnorm
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void dispose()
Releases all GPU and host resources used by the Layer.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation.
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using the native Cuda version.
BatchNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
Constructor.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using cuDNN.
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using the native Cuda version.
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using cuDNN.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
T m_tZero
Specifies a generic type equal to 0.0.
T m_tOne
Specifies a generic type equal to 1.0.
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
double convertD(T df)
Converts a generic to a double value.
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Phase m_phase
Specifies the Phase under which the Layer is run.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
LayerParameter.LayerType m_type
Specifies the Layer type.
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
bool m_bNetReshapeRequest
Specifies whether the reshape is requested from a Net.Reshape call or not.
FillerParameter bias_filler
Specifies the bias filler used to file the bias value. If null, a constant(0) filler is used.
bool scale_bias
Specifies to use the scale and bias terms, otherwise the scale = 1 and bias = 0 are used to form an i...
double eps
Specifies a small value to add to the variance estimate so that we don't divide by zero.
double moving_average_fraction
Specifies how much the moving average decays each iteration. Smaller values make the moving average d...
FillerParameter scale_filler
Specifies the scale filler used to fill the scale value. If null, a constant(1) filler is used.
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
bool? use_global_stats
If false, normalization is performed over the current mini-batch and global statistics are accumulate...
Specifies the filler parameters used to create each Filler.
Specifies the base parameter for all layers.
List< ParamSpec > parameters
Specifies the ParamSpec parameters of the LayerParameter.
string name
Specifies the name of this LayerParameter.
bool use_halfsize
Specifies whether or not to use half sized memory or not.
BatchNormParameter batch_norm_param
Returns the parameter set when initialized with LayerType.BATCHNORM
LayerType
Specifies the layer type.
Specifies training parameters (multipliers on global learning constants, and the name of other settin...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Phase
Defines the Phase under which to run a Net.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
BATCHNORM_MODE
Specifies the cuDnn batch norm mode to use.
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...