mycaffe/html/_softmax_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Diagnostics;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.layers

{

    public class SoftmaxLayer<T> : Layer<T>

    {

        int m_nOuterNum;

        int m_nInnerNum;

        int m_nSoftmaxAxis;

        Blob<T> m_blobScale;

        SOFTMAX_ALGORITHM m_algorithm = SOFTMAX_ALGORITHM.DEFAULT;

        SOFTMAX_MODE m_mode = SOFTMAX_MODE.CHANNEL;

        List<int> m_rgScaleDims = null;


        long m_hCudnn = 0;

        long m_hBottomDesc = 0;

        long m_hTopDesc = 0;


        public SoftmaxLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.SOFTMAX;

            m_blobScale = new Blob<T>(cuda, log);

            m_blobScale.Name = m_param.name + " scale";


            setup_internal_blobs(m_colInternalBlobs);

        }


        protected override void dispose()

        {

            if (m_hCudnn != 0)

            {

                m_cuda.FreeCuDNN(m_hCudnn);

                m_hCudnn = 0;

            }


            if (m_hBottomDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hBottomDesc);

                m_hBottomDesc = 0;

            }


            if (m_hTopDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hTopDesc);

                m_hTopDesc = 0;

            }


            m_blobScale.Dispose();

            base.dispose();

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            if (!m_param.softmax_param.useCudnn())

            {

                col.Add(m_blobScale);

            }

        }


        public override int MinBottomBlobs

        {

            get { return 1; }

        }


        public override int MaxBottomBlobs

        {

            get { return 2; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 1; }

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            m_algorithm = m_param.softmax_param.algorithm;

            if (m_param.softmax_param.engine == EngineParameter.Engine.CAFFE)

            {

                if (m_algorithm != SOFTMAX_ALGORITHM.ACCURATE)

                    m_log.WriteLine("WARNING: SoftmaxLayer: Caffe mode does not support the ACCURATE algorithm, the default FAST algorithm will be used instead.");

            }


            if (!m_param.softmax_param.useCudnn())

                return;


            // Initialize cuDNN

            m_hCudnn = m_cuda.CreateCuDNN();

            m_hBottomDesc = m_cuda.CreateTensorDesc();

            m_hTopDesc = m_cuda.CreateTensorDesc();

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            m_nSoftmaxAxis = colBottom[0].CanonicalAxisIndex(m_param.softmax_param.axis);


            if (m_phase == Phase.TRAIN && m_param.softmax_param.algorithm_train.HasValue)

                m_algorithm = m_param.softmax_param.algorithm_train.Value;


            colTop[0].ReshapeLike(colBottom[0]);


            m_nOuterNum = colBottom[0].count(0, m_nSoftmaxAxis);

            m_nInnerNum = colBottom[0].count(m_nSoftmaxAxis + 1);


            if (!m_param.softmax_param.useCudnn())

            {

                shareLayerBlob(m_blobScale, colBottom[0].shape());

                m_blobScale.ReshapeLike(colBottom[0]);


                m_rgScaleDims = Utility.Clone<int>(colBottom[0].shape());

                m_rgScaleDims[m_nSoftmaxAxis] = 1;


                m_blobScale.Reshape(m_rgScaleDims);


                return;

            }


            int nN = m_nOuterNum;

            int nK = colBottom[0].shape(m_nSoftmaxAxis);

            int nH = m_nInnerNum;

            int nW = 1;


            if (nH == 1 && nW == 1)

                m_mode = SOFTMAX_MODE.INSTANCE;


            m_cuda.SetTensorDesc(m_hBottomDesc, nN, nK, nH, nW);

            m_cuda.SetTensorDesc(m_hTopDesc, nN, nK, nH, nW);

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (!m_param.softmax_param.useCudnn())

                forward_cuda(colBottom, colTop);

            else

                forward_cudnn(colBottom, colTop);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            if (!m_param.softmax_param.useCudnn())

                backward_cuda(colTop, rgbPropagateDown, colBottom);

            else

                backward_cudnn(colTop, rgbPropagateDown, colBottom);

        }


        protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hBottomData = colBottom[0].gpu_data;

            long hTopData = colTop[0].mutable_gpu_data;

            long hScaleData = m_blobScale.mutable_gpu_data;

            long hScaleDiff = m_blobScale.mutable_gpu_diff;

            int nCount = colBottom[0].count();

            int nChannels = colTop[0].shape(m_nSoftmaxAxis);


            m_cuda.copy(nCount, hBottomData, hTopData);


            // We need to subtract the max to avoid numerical issues, compute the exp

            // and then normalize.

            // c = x.max(dim=axis)

            m_cuda.channel_max(m_nOuterNum * m_nInnerNum, m_nOuterNum, nChannels, m_nInnerNum, hTopData, hScaleData);


            // Subtract c

            // xm = x - c (along each channel)

            m_cuda.channel_sub(nCount, m_nOuterNum, nChannels, m_nInnerNum, hScaleData, hTopData);


            // exponentiate

            // exp_x = exp(xm)

            m_cuda.exp(nCount, hTopData, hTopData);


            // Sum across each channel after exp

            // exp_sum = exp_x.sum(dim=axis)

            m_cuda.channel_sum(m_nOuterNum * m_nInnerNum, m_nOuterNum, nChannels, m_nInnerNum, hTopData, hScaleDiff);


            if (m_param.softmax_param.algorithm == SOFTMAX_ALGORITHM.LOG)

            {

                // exp_log = exp_sum.log()

                m_cuda.log(m_nOuterNum * m_nInnerNum, hScaleDiff, hScaleDiff);

                // log_z = c + exp_log

                m_cuda.add(m_nOuterNum * m_nInnerNum, hScaleData, hScaleDiff, hScaleData);

                // sm = x - log_z

                m_cuda.copy(nCount, hBottomData, hTopData);

                m_cuda.channel_sub(nCount, m_nOuterNum, nChannels, m_nInnerNum, hScaleData, hTopData);

            }

            else

            {

                // divide

                m_cuda.channel_div(nCount, m_nOuterNum, nChannels, m_nInnerNum, hScaleDiff, hTopData);

            }

        }


        protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopDiff = colTop[0].gpu_diff;

            long hTopData = colTop[0].gpu_data;

            long hBottomDiff = colBottom[0].mutable_gpu_diff;

            long hScaleData = m_blobScale.mutable_gpu_data;

            int nCount = colTop[0].count();

            int nChannels = colTop[0].shape(m_nSoftmaxAxis);


            if (m_param.softmax_param.algorithm == SOFTMAX_ALGORITHM.LOG)

            {

                // sumgy = sum channel diff

                m_blobScale.Reshape(m_rgScaleDims);

                m_cuda.channel_sum(nCount, m_nOuterNum, nChannels, m_nInnerNum, hTopDiff, m_blobScale.mutable_gpu_diff);


                // expy = exp(y)

                m_cuda.exp(nCount, hTopData, hBottomDiff);


                // Fill the expy values across each channel.

                m_blobScale.ReshapeLike(colBottom[0]);

                m_cuda.channel_fillfrom(nCount, m_nOuterNum, 1, nChannels, m_blobScale.gpu_diff, m_blobScale.mutable_gpu_data, DIR.FWD);


                // expy * sumgy

                m_cuda.mul(nCount, hBottomDiff, m_blobScale.gpu_data, hBottomDiff);

                m_blobScale.Reshape(m_rgScaleDims);


                // grad = gy - (expy * sumgy)

                m_cuda.sub(nCount, hTopDiff, hBottomDiff, hBottomDiff);

            }

            else

            {

                m_cuda.copy(nCount, hTopDiff, hBottomDiff);


                // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff.

                m_cuda.channel_dot(m_nOuterNum * m_nInnerNum, m_nOuterNum, nChannels, m_nInnerNum, hTopDiff, hTopData, hScaleData);

                m_cuda.channel_sub(nCount, m_nOuterNum, nChannels, m_nInnerNum, hScaleData, hBottomDiff);


                // elementwise multiplication

                m_cuda.mul(nCount, hBottomDiff, hTopData, hBottomDiff);

            }

        }


        protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hBottomData = colBottom[0].gpu_data;

            long hTopData = colTop[0].mutable_gpu_data;


            m_cuda.SoftmaxForward(m_hCudnn, m_algorithm, m_mode, m_tOne, m_hBottomDesc, hBottomData, m_tZero, m_hTopDesc, hTopData);

        }


        protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopData = colTop[0].gpu_data;

            long hTopDiff = colTop[0].gpu_diff;

            long hBottomData = colBottom[0].gpu_data;

            long hBottomDiff = colBottom[0].mutable_gpu_diff;


            m_cuda.SoftmaxBackward(m_hCudnn, m_algorithm, m_mode, m_tOne, m_hTopDesc, hTopData, m_hTopDesc, hTopDiff, m_tZero, m_hBottomDesc, hBottomDiff);

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.ReshapeLike
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
Definition: BlobCollection.cs:214

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.Dispose
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.shareLayerBlob
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
Definition: Layer.cs:1170

MyCaffe.layers.Layer.m_tZero
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76

MyCaffe.layers.Layer.m_tOne
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72

MyCaffe.layers.Layer.m_colInternalBlobs
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59

MyCaffe.layers.Layer.m_phase
Phase m_phase
Specifies the Phase under which the Layer is run.
Definition: Layer.cs:51

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.SoftmaxLayer
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Definition: SoftmaxLayer.cs:24

MyCaffe.layers.SoftmaxLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: SoftmaxLayer.cs:147

MyCaffe.layers.SoftmaxLayer.MinBottomBlobs
override int MinBottomBlobs
Returns the minimum number of bottom blobs (input) Blobs: input.
Definition: SoftmaxLayer.cs:99

MyCaffe.layers.SoftmaxLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer to run in either Engine.CAFFE or Engine.CUDNN mode.
Definition: SoftmaxLayer.cs:124

MyCaffe.layers.SoftmaxLayer.forward_cuda
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Computes the forward calculation using the Engine.CAFFE mode.
Definition: SoftmaxLayer.cs:230

MyCaffe.layers.SoftmaxLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: SoftmaxLayer.cs:59

MyCaffe.layers.SoftmaxLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t the inputs using either the Engine.CAFFE or Engine....
Definition: SoftmaxLayer.cs:213

MyCaffe.layers.SoftmaxLayer.SoftmaxLayer
SoftmaxLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The SoftmaxLayer constructor.
Definition: SoftmaxLayer.cs:48

MyCaffe.layers.SoftmaxLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: softmax
Definition: SoftmaxLayer.cs:115

MyCaffe.layers.SoftmaxLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Computes the forward calculation using either the Engine.CAFFE or Engine.CUDNN mode.
Definition: SoftmaxLayer.cs:193

MyCaffe.layers.SoftmaxLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: SoftmaxLayer.cs:84

MyCaffe.layers.SoftmaxLayer.backward_cudnn
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t the inputs using either the Engine.CUDNN.
Definition: SoftmaxLayer.cs:359

MyCaffe.layers.SoftmaxLayer.forward_cudnn
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Computes the forward calculation using the Engine.CUDNN mode.
Definition: SoftmaxLayer.cs:339

MyCaffe.layers.SoftmaxLayer.backward_cuda
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t the inputs using either the Engine.CAFFE.
Definition: SoftmaxLayer.cs:288

MyCaffe.layers.SoftmaxLayer.MaxBottomBlobs
override int MaxBottomBlobs
Returns the maximum number of bottom blobs (input) Blobs: input, target (ignored)
Definition: SoftmaxLayer.cs:107

MyCaffe.param.EngineParameter
Specifies whether to use the NVIDIA cuDnn version or Caffe version of a given forward/backward operat...
Definition: EngineParameter.cs:17

MyCaffe.param.EngineParameter.engine
Engine engine
Specifies the Engine in use.
Definition: EngineParameter.cs:49

MyCaffe.param.EngineParameter.Engine
Engine
Defines the type of engine to use.
Definition: EngineParameter.cs:24

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.softmax_param
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
Definition: LayerParameter.cs:2794

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.SoftmaxParameter.algorithm
SOFTMAX_ALGORITHM algorithm
Specifies the softmax algorithm to use during the running and testing.
Definition: SoftmaxParameter.cs:71

MyCaffe.param.SoftmaxParameter.useCudnn
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn. Softmax uses cuDNN as the default.
Definition: SoftmaxParameter.cs:50

MyCaffe.param.SoftmaxParameter.algorithm_train
SOFTMAX_ALGORITHM? algorithm_train
Optionally, specifies the softmax algorithm to use during the training phase, when null,...
Definition: SoftmaxParameter.cs:62

MyCaffe.param.SoftmaxParameter.axis
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
Definition: SoftmaxParameter.cs:83

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.basecode.Phase
Phase
Defines the Phase under which to run a Net.
Definition: Interfaces.cs:61

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.DIR
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22

MyCaffe.common.SOFTMAX_MODE
SOFTMAX_MODE
Specifies the SOFTMAX mode to use.
Definition: CudaDnn.cs:724

MyCaffe.common.SOFTMAX_ALGORITHM
SOFTMAX_ALGORITHM
Specifies the SOFTMAX algorithm to use.
Definition: CudaDnn.cs:701

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11