mycaffe/html/_batch_norm_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.fillers;

using MyCaffe.param;


namespace MyCaffe.layers

{

    public class BatchNormLayer<T> : Layer<T>

    {

        Blob<T> m_blobMean;     // also used as save mean with cuDNN

        Blob<T> m_blobVariance; // also used as save var with cuDNN

        Blob<T> m_blobTemp;

        Blob<T> m_blobXNorm;

        bool m_bUseGlobalStats;

        double m_dfMovingAverageFraction;

        int m_nChannels;

        double m_dfEps;


        // extra temporary variables used to carry out sums/broadcasting using BLAS

        Blob<T> m_blobBatchSumMultiplier;

        Blob<T> m_blobNumByChans;

        Blob<T> m_blobSpaitalSumMultiplier;


        // cuDNN support

        bool m_bScaleBias = false;

        long m_hCuDnn = 0;

        long m_hFwdBottomDesc = 0;

        long m_hFwdTopDesc = 0;

        long m_hBwdBottomDesc = 0;

        long m_hBwdTopDesc = 0;

        long m_hFwdScaleBiasMeanVarDesc = 0;

        long m_hBwdScaleBiasMeanVarDesc = 0;

        BATCHNORM_MODE m_mode = BATCHNORM_MODE.SPATIAL;

        Blob<T> m_blobScaleOnes = null;

        Blob<T> m_blobBiasZeros = null;

        Blob<T> m_blobPrivateTop = null;

        Blob<T> m_blobPrivateBottom = null;

        const double CUDNN_BN_MIN_EPSILON = 1e-5;

        int m_nIteration = 0;


        public BatchNormLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.BATCHNORM;

            m_blobMean = new common.Blob<T>(cuda, log);

            m_blobMean.Name = m_param.name + " mean";

            m_blobVariance = new common.Blob<T>(cuda, log);

            m_blobVariance.Name = m_param.name + " variance";

            m_blobTemp = new common.Blob<T>(cuda, log);

            m_blobTemp.Name = m_param.name + " temp";

            m_blobXNorm = new common.Blob<T>(cuda, log);

            m_blobXNorm.Name = m_param.name + " xnorm";

            m_blobBatchSumMultiplier = new common.Blob<T>(cuda, log);

            m_blobBatchSumMultiplier.Name = m_param.name + " summult";

            m_blobNumByChans = new common.Blob<T>(cuda, log);

            m_blobNumByChans.Name = m_param.name + "numbychan";

            m_blobSpaitalSumMultiplier = new common.Blob<T>(cuda, log);

            m_blobSpaitalSumMultiplier.Name = m_param.name + "spatialsummult";


            if (p.batch_norm_param.useCudnn())

            {

                m_blobMean.Name = m_param.name + "save mean";

                m_blobVariance.Name = m_param.name + "save var";


                m_blobPrivateTop = new Blob<T>(cuda, log);

                m_blobPrivateTop.Name = m_param.name + "private top";

                m_blobPrivateBottom = new Blob<T>(cuda, log);

                m_blobPrivateBottom.Name = m_param.name + "private bottom";

                m_blobScaleOnes = new Blob<T>(cuda, log);

                m_blobScaleOnes.Name = m_param.name + "scale ones";

                m_blobBiasZeros = new Blob<T>(cuda, log);

                m_blobBiasZeros.Name = m_param.name + "bias zeros";

            }

        }


        protected override void dispose()

        {

            m_blobMean.Dispose();

            m_blobVariance.Dispose();

            m_blobTemp.Dispose();

            m_blobXNorm.Dispose();

            m_blobBatchSumMultiplier.Dispose();

            m_blobNumByChans.Dispose();

            m_blobSpaitalSumMultiplier.Dispose();


            // CuDnn Cleanup

            if (m_blobPrivateTop != null)

            {

                m_blobPrivateTop.Dispose();

                m_blobPrivateTop = null;

            }


            if (m_blobPrivateBottom != null)

            {

                m_blobPrivateBottom.Dispose();

                m_blobPrivateBottom = null;

            }


            if (m_blobScaleOnes != null)

            {

                m_blobScaleOnes.Dispose();

                m_blobScaleOnes = null;

            }


            if (m_blobBiasZeros != null)

            {

                m_blobBiasZeros.Dispose();

                m_blobBiasZeros = null;

            }


            if (m_hBwdBottomDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hBwdBottomDesc);

                m_hBwdBottomDesc = 0;

            }


            if (m_hBwdScaleBiasMeanVarDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hBwdScaleBiasMeanVarDesc);

                m_hBwdScaleBiasMeanVarDesc = 0;

            }


            if (m_hBwdTopDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hBwdTopDesc);

                m_hBwdTopDesc = 0;

            }


            if (m_hFwdBottomDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hFwdBottomDesc);

                m_hFwdBottomDesc = 0;

            }


            if (m_hFwdScaleBiasMeanVarDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hFwdScaleBiasMeanVarDesc);

                m_hFwdScaleBiasMeanVarDesc = 0;

            }


            if (m_hFwdTopDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hFwdTopDesc);

                m_hFwdTopDesc = 0;

            }


            if (m_hCuDnn != 0)

            {

                m_cuda.FreeCuDNN(m_hCuDnn);

                m_hCuDnn = 0;

            }


            base.dispose();

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            col.Add(m_blobMean);

            col.Add(m_blobVariance);


            if (m_param.batch_norm_param.useCudnn())

            {

                col.Add(m_blobPrivateBottom);

                col.Add(m_blobPrivateTop);


                if (!m_bScaleBias)

                {

                    col.Add(m_blobScaleOnes);

                    col.Add(m_blobBiasZeros);

                }

            }

            else

            {

                col.Add(m_blobTemp);

                col.Add(m_blobXNorm);

                col.Add(m_blobBatchSumMultiplier);

                col.Add(m_blobNumByChans);

                col.Add(m_blobSpaitalSumMultiplier);

            }

        }


        public override int ExactNumBottomBlobs

        {

            get { return 1; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 1; }

        }


        public override bool ReInitializeParameters(WEIGHT_TARGET target)

        {

            base.ReInitializeParameters(target);


            if (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.WEIGHTS)

            {

                for (int i = 0; i < 3; i++)

                {

                    m_colBlobs[i].SetData(0);

                }

            }


            return true;

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            bool bUseCuDnn = m_param.batch_norm_param.useCudnn();


            m_dfMovingAverageFraction = m_param.batch_norm_param.moving_average_fraction;

            m_bUseGlobalStats = (m_phase == Phase.TEST || m_phase == Phase.RUN) ? true : false;


            if (m_param.batch_norm_param.use_global_stats.HasValue)

                m_bUseGlobalStats = m_param.batch_norm_param.use_global_stats.Value;


            if (colBottom[0].num_axes == 1)

                m_nChannels = 1;

            else

                m_nChannels = colBottom[0].shape(1);


            m_dfEps = m_param.batch_norm_param.eps;


            m_bScaleBias = m_param.batch_norm_param.scale_bias;  // by default = false;

            if (m_param.batch_norm_param.scale_filler != null || // implicit set.

                m_param.batch_norm_param.bias_filler != null)

                m_bScaleBias = true;


            if (m_bScaleBias && !bUseCuDnn)

                m_bScaleBias = false;


            if (m_colBlobs.Count > 0)

            {

                m_log.WriteLine("Skipping parameter initialization.");

            }

            else

            {

                List<int> rgSize = new List<int>();

                rgSize.Add(m_nChannels);


                m_colBlobs.Clear(true);


                m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false));  // global mean

                m_colBlobs[0].Name = m_param.name + "_global_mean";

                m_colBlobs[0].type = BLOB_TYPE.INTERNAL;

                m_colBlobs[0].SetData(0.0);

                m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false));  // glboal var

                m_colBlobs[1].Name = m_param.name + "_global_var";

                m_colBlobs[1].type = BLOB_TYPE.INTERNAL;

                m_colBlobs[1].SetData(0.0);

                m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize, false));  // variance correction

                m_colBlobs[2].Name = m_param.name + "_var_corr";

                m_colBlobs[2].type = BLOB_TYPE.INTERNAL;

                m_colBlobs[2].SetData(1.0);


                if (m_bScaleBias)

                {

                    m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize)); // scale

                    m_colBlobs[3].type = BLOB_TYPE.INTERNAL;

                    m_colBlobs[3].Name = m_param.name + "_scale";


                    FillerParameter fpScale = m_param.batch_norm_param.scale_filler;

                    if (fpScale == null)

                        fpScale = new FillerParameter("constant", 1.0);


                    Filler<T> fillerScale = Filler<T>.Create(m_cuda, m_log, fpScale);

                    fillerScale.Fill(m_colBlobs[3]);


                    m_colBlobs.Add(new Blob<T>(m_cuda, m_log, rgSize)); // bias

                    m_colBlobs[4].Name = m_param.name + "_bias";

                    m_colBlobs[4].type = BLOB_TYPE.INTERNAL;


                    FillerParameter fpBias = m_param.batch_norm_param.bias_filler;

                    if (fpBias == null)

                        fpBias = new FillerParameter("constant", 0.0);


                    Filler<T> fillerBias = Filler<T>.Create(m_cuda, m_log, fpBias);

                    fillerBias.Fill(m_colBlobs[4]);

                }


                m_nIteration = 0;

            }


            // Mask statistics from optimization by setting local learning rates

            // for mean, variance, and variance correction to zero.

            for (int i = 0; i < 3; i++)

            {

                if (m_param.parameters.Count == i)

                {

                    m_param.parameters.Add(new ParamSpec(0.0, 0.0));

                }

                else

                {

                    m_param.parameters[i].lr_mult = 0;

                    m_param.parameters[i].decay_mult = 0;

                }

            }


            // Set lr for scale and bias to 1

            if (m_bScaleBias)

            {

                for (int i = 3; i < 5; i++)

                {

                    if (m_param.parameters.Count == i)

                    {

                        m_param.parameters.Add(new ParamSpec(1.0, 1.0));

                    }

                    else

                    {

                        m_param.parameters[i].lr_mult = 1;

                        m_param.parameters[i].decay_mult = 1;

                    }

                }

            }


            if (!m_param.batch_norm_param.useCudnn())

                return;


            //-----------------------------------

            // Handle cuDNN setup

            //-----------------------------------


            // Setup the convert to half flags used by the Layer just before calling forward and backward.

            m_bUseHalfSize = m_param.use_halfsize;


            int nChannels = colBottom[0].channels;

            List<int> rgShape = new List<int>() { 1, nChannels, 1, 1 };


            if (!m_bScaleBias)

            {

                m_blobScaleOnes.Reshape(rgShape);

                m_blobScaleOnes.SetData(1.0);

                m_blobBiasZeros.Reshape(rgShape);

                m_blobBiasZeros.SetData(0.0);

            }


            m_hCuDnn = m_cuda.CreateCuDNN();

            m_hFwdBottomDesc = m_cuda.CreateTensorDesc();

            m_hFwdTopDesc = m_cuda.CreateTensorDesc();

            m_hFwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc();

            m_hBwdBottomDesc = m_cuda.CreateTensorDesc();

            m_hBwdTopDesc = m_cuda.CreateTensorDesc();

            m_hBwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc();

            m_mode = BATCHNORM_MODE.SPATIAL;

            m_dfEps = Math.Min(m_dfEps, CUDNN_BN_MIN_EPSILON);


            m_blobMean.Reshape(rgShape);

            m_blobVariance.Reshape(rgShape);


            if (colBottom[0] == colTop[0]) // CuDNN BN does not support in-place.

            {

                m_blobPrivateTop.ReshapeLike(colTop[0]);

                m_blobPrivateBottom.ReshapeLike(colBottom[0]);

            }

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (m_bNetReshapeRequest)

            {

                m_nChannels = colBottom[0].channels;

            }

            else

            {

                if (!reshapeNeeded(colBottom, colTop))

                    return;

            }


            if (colBottom[0].num_axes >= 1)

                m_log.CHECK_EQ(colBottom[0].shape(1), m_nChannels, "The colBottom[0].shape(1) should equal the channel count '" + m_nChannels.ToString() + "'.");


            colTop[0].ReshapeLike(colBottom[0]);


            List<int> rgSize = new List<int>();

            rgSize.Add(m_nChannels);


            m_blobMean.Reshape(rgSize);

            m_blobVariance.Reshape(rgSize);


            if (!m_param.batch_norm_param.useCudnn())

            {

                m_blobTemp.ReshapeLike(colBottom[0]);

                m_blobXNorm.ReshapeLike(colBottom[0]);


                rgSize[0] = colBottom[0].shape(0);

                m_blobBatchSumMultiplier.Reshape(rgSize);


                int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));

                if (m_blobSpaitalSumMultiplier.num_axes == 0 ||

                    m_blobSpaitalSumMultiplier.shape(0) != nSpatialDim)

                {

                    rgSize[0] = nSpatialDim;

                    m_blobSpaitalSumMultiplier.Reshape(rgSize);

                    m_blobSpaitalSumMultiplier.SetData(1);

                }


                int nNumByChans = m_nChannels * colBottom[0].shape(0);

                if (m_blobNumByChans.num_axes == 0 ||

                    m_blobNumByChans.shape(0) != nNumByChans)

                {

                    rgSize[0] = nNumByChans;

                    m_blobNumByChans.Reshape(rgSize);

                    m_blobBatchSumMultiplier.SetData(1);

                }


                return;

            }


            //-----------------------------------

            // Handle cuDNN setup

            //-----------------------------------

            int N = colBottom[0].num;

            int C = colBottom[0].channels;

            int H = colBottom[0].height;

            int W = colBottom[0].width;


            // Setup the main tensors.

            m_cuda.SetTensorDesc(m_hFwdBottomDesc, N, C, H, W);

            m_cuda.SetTensorDesc(m_hFwdTopDesc, N, C, H, W);

            m_cuda.SetTensorDesc(m_hBwdBottomDesc, N, C, H, W);

            m_cuda.SetTensorDesc(m_hBwdTopDesc, N, C, H, W);


            // Setup auxilary tensors for caching mean and inVar for forward and backard pass.

            m_blobMean.Reshape(1, C, 1, 1);

            m_blobVariance.Reshape(1, C, 1, 1);


            if (!m_param.batch_norm_param.scale_bias)

            {

                if (m_blobScaleOnes.channels != C)    // scale

                {

                    m_blobScaleOnes.Reshape(1, C, 1, 1);

                    m_blobScaleOnes.SetData(1.0);

                }


                if (m_blobBiasZeros.channels != C)    // bias

                {

                    m_blobBiasZeros.Reshape(1, C, 1, 1);

                    m_blobBiasZeros.SetData(0.0);

                }

            }


            m_cuda.DeriveBatchNormDesc(m_hFwdScaleBiasMeanVarDesc, m_hFwdBottomDesc, m_hBwdScaleBiasMeanVarDesc, m_hBwdBottomDesc, m_mode);


            if (colTop[0] == colBottom[0])

            {

                m_blobPrivateTop.ReshapeLike(colTop[0]);

                m_blobPrivateBottom.ReshapeLike(colBottom[0]);

            }

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (m_param.batch_norm_param.useCudnn())

                forward_cudnn(colBottom, colTop);

            else

                forward_cuda(colBottom, colTop);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            if (m_param.batch_norm_param.useCudnn())

                backward_cudnn(colTop, rgbPropagateDown, colBottom);

            else

                backward_cuda(colTop, rgbPropagateDown, colBottom);

        }


        protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hBottomData = colBottom[0].gpu_data;

            long hTopData = colTop[0].mutable_gpu_data;

            int nNum = colBottom[0].shape(0);

            int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));


            if (colBottom[0] != colTop[0])

                m_cuda.copy(colBottom[0].count(), hBottomData, hTopData);


            if (m_bUseGlobalStats)

            {

                // use the stored mean/variance estimates

                double dfScaleFactor = convertD(m_colBlobs[2].GetData(0));


                if (dfScaleFactor != 0)

                    dfScaleFactor = 1.0 / dfScaleFactor;


                int nCount = m_blobVariance.count();


                m_cuda.scale(nCount, dfScaleFactor, m_colBlobs[0].gpu_data, m_blobMean.mutable_gpu_data);

                m_cuda.scale(nCount, dfScaleFactor, m_colBlobs[1].gpu_data, m_blobVariance.mutable_gpu_data);

            }

            else

            {

                // compute mean

                m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0 / (nNum * nSpatialDim), hBottomData, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

                m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);

            }


            // subtract mean

            m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, -1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 1.0, hTopData);


            if (!m_bUseGlobalStats)

            {

                // compute variance using var(x) = E((X-EX)^2)

                m_cuda.mul(colTop[0].count(), hTopData, hTopData, m_blobTemp.mutable_gpu_data); // (X-EX)^2

                m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0 / (nNum * nSpatialDim), m_blobTemp.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

                m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobVariance.mutable_gpu_data); // E((X-EX)^2)


                // compute and save moving average

                double dfVal = convertD(m_colBlobs[2].GetData(0));

                dfVal *= m_dfMovingAverageFraction;

                dfVal += 1.0;

                m_colBlobs[2].SetData(dfVal, 0);


                m_cuda.axpby(m_blobMean.count(), 1.0, m_blobMean.gpu_data, m_dfMovingAverageFraction, m_colBlobs[0].mutable_gpu_data);

                int nM = colBottom[0].count() / m_nChannels;

                double dfBiasCorrectionFactor = (nM > 1) ? ((double)nM / (double)(nM - 1)) : 1.0;

                m_cuda.axpby(m_blobVariance.count(), dfBiasCorrectionFactor, m_blobVariance.gpu_data, m_dfMovingAverageFraction, m_colBlobs[1].mutable_gpu_data);

            }


            // normalize variance

            m_cuda.add_scalar(m_blobVariance.count(), m_dfEps, m_blobVariance.mutable_gpu_data);

            m_cuda.sqrt(m_blobVariance.count(), m_blobVariance.gpu_data, m_blobVariance.mutable_gpu_data);


            // replicate variance to input size

            m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobVariance.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobTemp.mutable_gpu_data);

            m_cuda.div(m_blobTemp.count(), hTopData, m_blobTemp.gpu_data, hTopData);

            // The caching is only needed because later in-place layers

            //  might clobber the data.  Can we skip this if they won't?

            m_cuda.copy(m_blobXNorm.count(), hTopData, m_blobXNorm.mutable_gpu_data);

        }


        protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopDiff = 0;


            if (colBottom[0] != colTop[0])

            {

                hTopDiff = colTop[0].gpu_diff;

            }

            else

            {

                m_cuda.copy(m_blobXNorm.count(), colTop[0].gpu_diff, m_blobXNorm.mutable_gpu_diff);

                hTopDiff = m_blobXNorm.gpu_diff;

            }


            long hBottomDiff = colBottom[0].mutable_gpu_diff;

            if (m_bUseGlobalStats)

            {

                m_cuda.div(m_blobTemp.count(), hTopDiff, m_blobTemp.gpu_data, hBottomDiff);

                return;

            }


            long hTopData = m_blobXNorm.gpu_data;

            int nNum = colBottom[0].shape()[0];

            int nSpatialDim = colBottom[0].count() / (m_nChannels * colBottom[0].shape(0));

            // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then

            //

            // dE(y)/dX =

            //   (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)

            //     ./ sqrt(var(X) + eps)

            //

            // where \cdot and ./ are hadamard product and elementwise division,

            // respectively, dE/dY is the top diff, and mean/var/sum are all computed

            // along all dimensions except the channels dimension.  In the above

            // equation, the operations allow for expansion (i.e. broadcast) along all

            // dimensions except the channels dimension where required.


            // sum(dE/dY \cdot Y)

            m_cuda.mul(m_blobTemp.count(), hTopData, hTopDiff, hBottomDiff);

            m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0, hBottomDiff, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);


            // reshape (broadcast) the above

            m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemm(false, false, m_nChannels * nNum, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 0.0, hBottomDiff);


            // sum(dE/dY \cdot Y) \cdot Y

            m_cuda.mul(m_blobTemp.count(), hTopData, hBottomDiff, hBottomDiff);


            // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y

            m_cuda.gemv(false, m_nChannels * nNum, nSpatialDim, 1.0, hTopDiff, m_blobSpaitalSumMultiplier.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemv(true, nNum, m_nChannels, 1.0, m_blobNumByChans.gpu_data, m_blobBatchSumMultiplier.gpu_data, 0.0, m_blobMean.mutable_gpu_data);


            // reshape (broadcast) the above to make

            // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y

            m_cuda.gemm(false, false, nNum, m_nChannels, 1, 1.0, m_blobBatchSumMultiplier.gpu_data, m_blobMean.gpu_data, 0.0, m_blobNumByChans.mutable_gpu_data);

            m_cuda.gemm(false, false, nNum * m_nChannels, nSpatialDim, 1, 1.0, m_blobNumByChans.gpu_data, m_blobSpaitalSumMultiplier.gpu_data, 1.0, hBottomDiff);


            // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y

            m_cuda.axpby(m_blobTemp.count(), 1.0, hTopDiff, -1.0 / (double)(nNum * nSpatialDim), hBottomDiff);


            // Note: blobTemp still contains sqrt(var(X) + eps), computed during the forward

            // pass.

            m_cuda.div(m_blobTemp.count(), hBottomDiff, m_blobTemp.gpu_data, hBottomDiff);

        }


        protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hBottomData = colBottom[0].gpu_data;

            long hTopData = colTop[0].mutable_gpu_data;


            if (colTop[0] == colBottom[0])

                hTopData = m_blobPrivateTop.mutable_gpu_data;


            double dfEps = m_dfEps;

            long hGlobalMean = m_colBlobs[0].gpu_data;

            long hGlobalVar = m_colBlobs[1].gpu_data;

            long hScaleData = (m_bScaleBias) ? m_colBlobs[3].gpu_data : m_blobScaleOnes.gpu_data;

            long hBiasData = (m_bScaleBias) ? m_colBlobs[4].gpu_data : m_blobBiasZeros.gpu_data;


            if (!m_bUseGlobalStats)

            {

                long hSaveMean = m_blobMean.mutable_gpu_data;

                long hSaveVar = m_blobVariance.mutable_gpu_data;


                hGlobalMean = m_colBlobs[0].mutable_gpu_data;

                hGlobalVar = m_colBlobs[1].mutable_gpu_data;


                double dfFactor = 1.0;


                if (m_nIteration > 0)

                    dfFactor = 1 - m_dfMovingAverageFraction;


                m_cuda.BatchNormForward(m_hCuDnn, m_mode, m_tOne, m_tZero,

                                        m_hFwdBottomDesc, hBottomData,

                                        m_hFwdTopDesc, hTopData,

                                        m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,

                                        dfFactor, hGlobalMean, hGlobalVar, dfEps, hSaveMean, hSaveVar, true);

                m_nIteration++;

            }

            else

            {

                m_cuda.BatchNormForward(m_hCuDnn, BATCHNORM_MODE.SPATIAL, m_tOne, m_tZero,

                                        m_hFwdBottomDesc, hBottomData,

                                        m_hFwdTopDesc, hTopData,

                                        m_hFwdScaleBiasMeanVarDesc, hScaleData, hBiasData,

                                        1.0, hGlobalMean, hGlobalVar, dfEps, 0, 0, false);

            }


            if (colTop[0] == colBottom[0])

            {

                m_blobPrivateBottom.CopyFrom(colBottom[0]);

                colTop[0].CopyFrom(m_blobPrivateTop);

            }

        }


        protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopDiff = colTop[0].gpu_diff;

            long hBottomData = colBottom[0].gpu_data;

            long hBottomDiff = colBottom[0].mutable_gpu_diff;

            double dfEps = m_dfEps;

            long hMean = (m_bUseGlobalStats) ? 0 : m_blobMean.gpu_data;

            long hVariance = (m_bUseGlobalStats) ? 0 : m_blobVariance.gpu_data;

            long hScaleData = (m_bScaleBias) ? m_colBlobs[3].gpu_data : m_blobScaleOnes.gpu_data;

            long hScaleDiff = (m_bScaleBias) ? m_colBlobs[3].mutable_gpu_diff : m_blobScaleOnes.mutable_gpu_diff;

            long hBiasDiff = (m_bScaleBias) ? m_colBlobs[4].mutable_gpu_diff : m_blobBiasZeros.mutable_gpu_diff;


            if (colTop[0] == colBottom[0])

            {

                // copy diff from top to private top.

                m_blobPrivateTop.CopyFrom(colTop[0], true);

                hTopDiff = m_blobPrivateTop.gpu_diff;

                hBottomData = m_blobPrivateBottom.gpu_data;

            }


            m_cuda.BatchNormBackward(m_hCuDnn, m_mode, m_tOne, m_tZero, m_tOne, m_tOne,

                                        m_hBwdBottomDesc, hBottomData,

                                        m_hBwdBottomDesc, hTopDiff,

                                        m_hBwdBottomDesc, hBottomDiff,

                                        m_hBwdScaleBiasMeanVarDesc, hScaleData, hScaleDiff, hBiasDiff,

                                        dfEps, hMean, hVariance);

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.ReshapeLike
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
Definition: BlobCollection.cs:214

MyCaffe.common.BlobCollection.CopyFrom
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
Definition: BlobCollection.cs:266

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.Blob
Blob(CudaDnn< T > cuda, Log log, bool bIncludeDiff=true, bool bUseHalfSize=false)
The Blob constructor.
Definition: Blob.cs:64

MyCaffe.common.Blob.num_axes
int num_axes
Returns the number of axes in the Blob.
Definition: Blob.cs:705

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.shape
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.Dispose
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.fillers.Filler
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19

MyCaffe.fillers.Filler.Fill
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50

MyCaffe.fillers.Filler.Create
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79

MyCaffe.layers.BatchNormLayer
The BatchNormLayer normalizes the input to have 0-mean and/or unit (1) variance across the batch....
Definition: BatchNormLayer.cs:46

MyCaffe.layers.BatchNormLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: BatchNormLayer.cs:272

MyCaffe.layers.BatchNormLayer.ExactNumBottomBlobs
override int ExactNumBottomBlobs
Returns the exact number of bottom (input) Blobs required: input
Definition: BatchNormLayer.cs:235

MyCaffe.layers.BatchNormLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation.
Definition: BatchNormLayer.cs:535

MyCaffe.layers.BatchNormLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of top (output) Blobs required: batchnorm
Definition: BatchNormLayer.cs:243

MyCaffe.layers.BatchNormLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: BatchNormLayer.cs:202

MyCaffe.layers.BatchNormLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: BatchNormLayer.cs:120

MyCaffe.layers.BatchNormLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation.
Definition: BatchNormLayer.cs:524

MyCaffe.layers.BatchNormLayer.forward_cuda
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using the native Cuda version.
Definition: BatchNormLayer.cs:546

MyCaffe.layers.BatchNormLayer.BatchNormLayer
BatchNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
Constructor.
Definition: BatchNormLayer.cs:84

MyCaffe.layers.BatchNormLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: BatchNormLayer.cs:427

MyCaffe.layers.BatchNormLayer.ReInitializeParameters
override bool ReInitializeParameters(WEIGHT_TARGET target)
Re-initialize the parameters of the layer.
Definition: BatchNormLayer.cs:252

MyCaffe.layers.BatchNormLayer.backward_cudnn
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using cuDNN.
Definition: BatchNormLayer.cs:736

MyCaffe.layers.BatchNormLayer.backward_cuda
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Perform the backward computation using the native Cuda version.
Definition: BatchNormLayer.cs:615

MyCaffe.layers.BatchNormLayer.forward_cudnn
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Perform the forward compuation using cuDNN.
Definition: BatchNormLayer.cs:683

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.m_tZero
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76

MyCaffe.layers.Layer.m_tOne
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72

MyCaffe.layers.Layer.m_bUseHalfSize
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84

MyCaffe.layers.Layer.convertD
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349

MyCaffe.layers.Layer.reshapeNeeded
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Definition: Layer.cs:622

MyCaffe.layers.Layer.m_phase
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.Layer.m_colBlobs
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55

MyCaffe.layers.Layer.m_bNetReshapeRequest
bool m_bNetReshapeRequest
Specifies whether the reshape is requested from a Net.Reshape call or not.
Definition: Layer.cs:104

MyCaffe.param.BatchNormParameter.bias_filler
FillerParameter bias_filler
Specifies the bias filler used to file the bias value. If null, a constant(0) filler is used.
Definition: BatchNormParameter.cs:93

MyCaffe.param.BatchNormParameter.scale_bias
bool scale_bias
Specifies to use the scale and bias terms, otherwise the scale = 1 and bias = 0 are used to form an i...
Definition: BatchNormParameter.cs:67

MyCaffe.param.BatchNormParameter.eps
double eps
Specifies a small value to add to the variance estimate so that we don't divide by zero.
Definition: BatchNormParameter.cs:136

MyCaffe.param.BatchNormParameter.moving_average_fraction
double moving_average_fraction
Specifies how much the moving average decays each iteration. Smaller values make the moving average d...
Definition: BatchNormParameter.cs:125

MyCaffe.param.BatchNormParameter.scale_filler
FillerParameter scale_filler
Specifies the scale filler used to fill the scale value. If null, a constant(1) filler is used.
Definition: BatchNormParameter.cs:80

MyCaffe.param.BatchNormParameter.useCudnn
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
Definition: BatchNormParameter.cs:50

MyCaffe.param.BatchNormParameter.use_global_stats
bool? use_global_stats
If false, normalization is performed over the current mini-batch and global statistics are accumulate...
Definition: BatchNormParameter.cs:109

MyCaffe.param.FillerParameter
Specifies the filler parameters used to create each Filler.
Definition: FillerParameter.cs:16

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.parameters
List< ParamSpec > parameters
Specifies the ParamSpec parameters of the LayerParameter.
Definition: LayerParameter.cs:1964

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.use_halfsize
bool use_halfsize
Specifies whether or not to use half sized memory or not.
Definition: LayerParameter.cs:1882

MyCaffe.param.LayerParameter.batch_norm_param
BatchNormParameter batch_norm_param
Returns the parameter set when initialized with LayerType.BATCHNORM
Definition: LayerParameter.cs:2054

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.ParamSpec
Specifies training parameters (multipliers on global learning constants, and the name of other settin...
Definition: ParamSpec.cs:19

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.basecode.Phase
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.BLOB_TYPE
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62

MyCaffe.common.BATCHNORM_MODE
BATCHNORM_MODE
Specifies the cuDnn batch norm mode to use.
Definition: CudaDnn.cs:237

MyCaffe.common.WEIGHT_TARGET
WEIGHT_TARGET
Defines the type of weight to target in re-initializations.
Definition: Interfaces.cs:38

MyCaffe.fillers
The MyCaffe.fillers namespace contains all fillers including the Filler class.
Definition: BilinearFiller.cs:10

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11