mycaffe/html/_convolution_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;

using System.Drawing;


namespace MyCaffe.layers

{

    public class ConvolutionLayer<T> : BaseConvolutionLayer<T>

    {

        const int CUDNN_STREAMS_PER_GROUP = 3;


        long[] m_rghCudnn = null;

        long[] m_rghStream = null;


        // algorithms for forward and backward convolutions

        CONV_FWD_ALGO[] m_rgfwdAlgo = null;

        CONV_BWD_FILTER_ALGO[] m_rgbwdFilterAlgo = null;

        CONV_BWD_DATA_ALGO[] m_rgbwdDataAlgo = null;


        List<long> m_rghBottomDesc = new List<long>();

        List<long> m_rghTopDesc = new List<long>();

        long m_hBiasDesc = 0;

        long m_hFilterDesc = 0;

        List<long> m_rghConvDesc = new List<long>();

        int m_nBottomOffset = 0;

        int m_nTopOffset = 0;

        int m_nBiasOffset = 0;


        ulong[] m_rglWorkspaceFwdSizes = null;

        ulong[] m_rglWorkspaceBwdFilterSizes = null;

        ulong[] m_rglWorkspaceBwdDataSizes = null;

        ulong[] m_rglWorkspaceFwdOffsets = null; // offsets into workspace fwd data.

        ulong[] m_rglWorkspaceBwdFilterOffsets = null; // offsets into workspace bwd filter data.

        ulong[] m_rglWorkspaceBwdDataOffsets = null; // offsets into workspace bwd data.

        bool m_bUseTensorCores = false;


        public ConvolutionLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.CONVOLUTION;

        }


        protected override void dispose()

        {

            for (int i = 0; i < m_rghBottomDesc.Count; i++)

            {

                m_cuda.FreeTensorDesc(m_rghBottomDesc[i]);

                m_cuda.FreeTensorDesc(m_rghTopDesc[i]);

                m_cuda.FreeConvolutionDesc(m_rghConvDesc[i]);

            }


            m_rghBottomDesc.Clear();

            m_rghTopDesc.Clear();

            m_rghConvDesc.Clear();


            if (m_hBiasDesc != 0)

            {

                m_cuda.FreeTensorDesc(m_hBiasDesc);

                m_hBiasDesc = 0;

            }


            if (m_hFilterDesc != 0)

            {

                m_cuda.FreeFilterDesc(m_hFilterDesc);

                m_hFilterDesc = 0;

            }


            for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)

            {

                if (m_rghStream != null && m_rghStream[g] != 0)

                    m_cuda.FreeStream(m_rghStream[g]);


                if (m_rghCudnn != null && m_rghCudnn[g] != 0)

                    m_cuda.FreeCuDNN(m_rghCudnn[g]);

            }


            m_rghStream = null;

            m_rghCudnn = null;


            base.dispose();

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            base.LayerSetUp(colBottom, colTop);


            if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes))

                return;


            // Initialize CUDA streams and cuDNN.

            m_rghStream = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];

            m_rghCudnn = new long[m_nGroup * CUDNN_STREAMS_PER_GROUP];


            // Initialize algorithm arrays.

            m_rgfwdAlgo = new CONV_FWD_ALGO[colBottom.Count];

            m_rgbwdFilterAlgo = new CONV_BWD_FILTER_ALGO[colBottom.Count];

            m_rgbwdDataAlgo = new CONV_BWD_DATA_ALGO[colBottom.Count];


            // Initialize the size arrays.

            m_rglWorkspaceFwdSizes = new ulong[colBottom.Count];

            m_rglWorkspaceBwdFilterSizes = new ulong[colBottom.Count];

            m_rglWorkspaceBwdDataSizes = new ulong[colBottom.Count];

            m_rglWorkspaceFwdOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];

            m_rglWorkspaceBwdFilterOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];

            m_rglWorkspaceBwdDataOffsets = new ulong[m_nGroup * CUDNN_STREAMS_PER_GROUP];


            for (int i = 0; i < colBottom.Count; i++)

            {

                // initialize all to default algorithms.

                m_rgfwdAlgo[i] = (CONV_FWD_ALGO)0;

                m_rgbwdFilterAlgo[i] = (CONV_BWD_FILTER_ALGO)0;

                m_rgbwdDataAlgo[i] = (CONV_BWD_DATA_ALGO)0;


                // default algorithms don't require workspace.

                m_rglWorkspaceFwdSizes[i] = 0;

                m_rglWorkspaceBwdFilterSizes[i] = 0;

                m_rglWorkspaceBwdDataSizes[i] = 0;

            }


            for (int g = 0; g < m_nGroup * CUDNN_STREAMS_PER_GROUP; g++)

            {

                m_rghStream[g] = m_cuda.CreateStream(false, g);

                m_rghCudnn[g] = m_cuda.CreateCuDNN(m_rghStream[g]);

                m_rglWorkspaceFwdOffsets[g] = 0;

                m_rglWorkspaceBwdFilterOffsets[g] = 0;

                m_rglWorkspaceBwdDataOffsets[g] = 0;

            }


            m_bUseTensorCores = m_param.convolution_param.cudnn_enable_tensor_cores;

            if (typeof(T) == typeof(double))

            {

                m_log.WriteLine("WARNING: Tensor cores are only supported with the 'float' base type.  Tensor core use will be disabled for the 'double' base type.");

                m_bUseTensorCores = false;

            }


            // Set the indexing parameters.

            m_nBiasOffset = m_nNumOutput / m_nGroup;


            // Create filter descriptor.

            Size szKernel = size_at(m_blobKernelShape);

            m_hFilterDesc = m_cuda.CreateFilterDesc();

            m_cuda.SetFilterDesc(m_hFilterDesc, m_nNumOutput / m_nGroup, m_nChannels / m_nGroup, szKernel.Height, szKernel.Width, m_bUseHalfSize);


            // Create tensor descriptor(s) for data and corresponding convolution(s).

            for (int i = 0; i < colBottom.Count; i++)

            {

                m_rghBottomDesc.Add(m_cuda.CreateTensorDesc());

                m_rghTopDesc.Add(m_cuda.CreateTensorDesc());

                m_rghConvDesc.Add(m_cuda.CreateConvolutionDesc());

            }


            // Tensor descriptor for bias.

            if (m_bBiasTerm)

                m_hBiasDesc = m_cuda.CreateTensorDesc();

        }


        protected override bool reshapeNeeded(BlobCollection<T> colBottom, BlobCollection<T> colTop, bool bReset = true)

        {

            // Memory optimizations require reshaping now on each pass.

            if (!bReset)

                return m_bReshapeOnForwardNeeded;


            if (!compareShapes(colBottom, colTop))

            {

                m_bReshapeOnForwardNeeded = true;

                return true;

            }

            else

            {

                m_bReshapeOnForwardNeeded = false;

                return false;

            }

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            base.Reshape(colBottom, colTop);

            if (!reshapeNeeded(colBottom, colTop, false))

                return;


            setShapes(colBottom, colTop);


            if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes))

                return;


            m_log.CHECK_EQ(2, m_nNumSpatialAxes, "cuDNN Convolution input must have 2 spatial axes (e.g., height and width).  Use 'engine: CAFFE' for general ND convolution.");


            m_nBottomOffset = m_nBottomDim / m_nGroup;

            m_nTopOffset = m_nTopDim / m_nGroup;


            int nHeight = colBottom[0].shape(m_nChannelAxis + 1);

            int nWidth = colBottom[0].shape(m_nChannelAxis + 2);

            int nHeightOut = colTop[0].shape(m_nChannelAxis + 1);

            int nWidthOut = colTop[0].shape(m_nChannelAxis + 2);


            Size szPad = size_at(m_blobPad);

            Size szStride = size_at(m_blobStride);

            Size szDilation = size_at(m_blobDilation);


            ulong lWorkspaceLimitBytes = getWorkspaceLimitInBytes(m_bUseTensorCores);


            for (int i = 0; i < colBottom.Count; i++)

            {

                m_cuda.SetTensorDesc(m_rghBottomDesc[i], m_nNum, m_nChannels / m_nGroup, nHeight, nWidth, m_nChannels * nHeight * nWidth, nHeight * nWidth, nWidth, 1, m_bUseHalfSize);

                m_cuda.SetTensorDesc(m_rghTopDesc[i], m_nNum, m_nNumOutput / m_nGroup, nHeightOut, nWidthOut, m_nNumOutput * m_nOutSpatialDim, m_nOutSpatialDim, nWidthOut, 1, m_bUseHalfSize);

                m_cuda.SetConvolutionDesc(m_rghConvDesc[i], szPad.Height, szPad.Width, szStride.Height, szStride.Width, szDilation.Height, szDilation.Width, m_bUseTensorCores, m_bUseHalfSize);


                // Get the algorithms and workspace sizes needed.

                CONV_FWD_ALGO algoFwd = (CONV_FWD_ALGO)0;

                CONV_BWD_FILTER_ALGO algoBwdFilter = (CONV_BWD_FILTER_ALGO)0;

                CONV_BWD_DATA_ALGO algoBwdData = (CONV_BWD_DATA_ALGO)0;

                ulong lWsSizeFwd = 0;

                ulong lWsSizeBwdFilter = 0;

                ulong lWsSizeBwdData = 0;


                m_cuda.GetConvolutionInfo(m_rghCudnn[0], m_rghBottomDesc[i], m_hFilterDesc, m_rghConvDesc[i], m_rghTopDesc[i], lWorkspaceLimitBytes, m_bUseTensorCores, out algoFwd, out lWsSizeFwd, out algoBwdFilter, out lWsSizeBwdFilter, out algoBwdData, out lWsSizeBwdData);

                m_rgfwdAlgo[i] = algoFwd;

                m_rglWorkspaceFwdSizes[i] = lWsSizeFwd;

                m_rgbwdFilterAlgo[i] = algoBwdFilter;

                m_rglWorkspaceBwdFilterSizes[i] = lWsSizeBwdFilter;

                m_rgbwdDataAlgo[i] = algoBwdData;

                m_rglWorkspaceBwdDataSizes[i] = lWsSizeBwdData;

            }


            // reduce over all workspace sizes to get a maximum to allocate / reallocate

            ulong lTotalWsFwd = 0;

            ulong lTotalWsBwdFilter = 0;

            ulong lTotalWsBwdData = 0;


            for (int i = 0; i < colBottom.Count; i++)

            {

                lTotalWsFwd = Math.Max(lTotalWsFwd, m_rglWorkspaceFwdSizes[i]);

                lTotalWsBwdFilter = Math.Max(lTotalWsBwdFilter, m_rglWorkspaceBwdFilterSizes[i]);

                lTotalWsBwdData = Math.Max(lTotalWsBwdData, m_rglWorkspaceBwdDataSizes[i]);

            }


            // Get max over all oeprations.

            ulong lMaxWorkspace = Math.Max(lTotalWsFwd, Math.Max(lTotalWsBwdFilter, lTotalWsBwdData));


            // Ensure all groups have enough workspace.

            ulong lTotalMaxWorkspace = (ulong)lMaxWorkspace * (ulong)m_nGroup * (ulong)CUDNN_STREAMS_PER_GROUP;

            lTotalMaxWorkspace *= (ulong)CUDNN_STREAMS_PER_GROUP;


            // Initialize the workspace data.

            WorkspaceArgs wsArgs = getWorkspace();


            // This is the total amount of storage needed over all groups + streams.

            setWorkspace(lTotalMaxWorkspace);


            // if we succedd in the allocation, set the offsets for the workspaces.

            for (int g = 0; g < (m_nGroup * CUDNN_STREAMS_PER_GROUP); g++)

            {

                m_rglWorkspaceFwdOffsets[g] = (ulong)g * lTotalWsFwd;

                m_rglWorkspaceBwdFilterOffsets[g] = (ulong)g * lTotalWsBwdFilter;

                m_rglWorkspaceBwdDataOffsets[g] = (ulong)g * lTotalWsBwdData;

            }


            // Tensor descriptor for bias.

            if (m_bBiasTerm)

                m_cuda.SetTensorDesc(m_hBiasDesc, 1, m_nNumOutput / m_nGroup, 1, 1, m_bUseHalfSize);

        }


        protected override bool reverse_dimensions()

        {

            return false;

        }


        protected override void compute_output_shape()

        {

            T[] rgKernelShapeData = m_blobKernelShape.cpu_data;

            T[] rgStrideData = m_blobStride.cpu_data;

            T[] rgPadData = m_blobPad.cpu_data;

            T[] rgDilationData = m_blobDilation.cpu_data;


            m_rgOutputShape.Clear();


            for (int i = 0; i < m_nNumSpatialAxes; i++)

            {

                int nKernel = val_at(rgKernelShapeData, i);

                int nStride = val_at(rgStrideData, i);

                int nPad = val_at(rgPadData, i);

                int nDilation = val_at(rgDilationData, i);


                // i + 1 to skip channel axis.

                int nInputDim = input_shape(i + 1);

                int nKernelExtent = nDilation * (nKernel - 1) + 1;

                int nOutputDim = (nInputDim + 2 * nPad - nKernelExtent) / nStride + 1;


                if (nOutputDim == 0)

                    nOutputDim = 1;


                m_rgOutputShape.Add(nOutputDim);

            }

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes))

                forward_cuda(colBottom, colTop);

            else

                forward_cudnn(colBottom, colTop);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            if (!m_param.convolution_param.useCudnn(m_nNumSpatialAxes))

                backward_cuda(colTop, rgbPropagateDown, colBottom);

            else

                backward_cudnn(colTop, rgbPropagateDown, colBottom);

        }


        protected void forward_cuda(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hWeight = m_colBlobs[0].gpu_data;


            for (int i = 0; i < colBottom.Count; i++)

            {

                long hBottomData = colBottom[i].gpu_data;

                long hTopData = colTop[i].mutable_gpu_data;


                for (int n = 0; n < m_nNum; n++)

                {

                    forward_gemm(hBottomData, n * m_nBottomDim, hWeight, hTopData, n * m_nTopDim);


                    if (m_bBiasTerm)

                        forward_bias(hTopData, n * m_nTopDim, m_colBlobs[1].gpu_data);

                }

            }

        }


        protected void backward_cuda(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hWeight = m_colBlobs[0].gpu_data;

            long hWeightDiff = m_colBlobs[0].mutable_gpu_diff;


            for (int i = 0; i < colTop.Count; i++)

            {

                long hTopDiff = colTop[i].gpu_diff;


                // Bias gradient, if necessary.

                if (m_bBiasTerm && m_rgbParamPropagateDown[1])

                {

                    long hBiasDiff = m_colBlobs[1].mutable_gpu_diff;


                    for (int n = 0; n < m_nNum; n++)

                    {

                        backward_bias(hBiasDiff, hTopDiff, n * m_nTopDim);

                    }

                }


                if (m_rgbParamPropagateDown[0] || rgbPropagateDown[i])

                {

                    long hBottomData = colBottom[i].gpu_data;

                    long hBottomDiff = colBottom[i].mutable_gpu_diff;


                    for (int n = 0; n < m_nNum; n++)

                    {

                        // gradient w.r.t. weight.  Note that we will accumulate diffs.

                        if (m_rgbParamPropagateDown[0])

                            weight_gemm(hBottomData, n * m_nBottomDim, hTopDiff, n * m_nTopDim, hWeightDiff);


                        // gradient w.r.t. bottom data, if necessary.

                        if (rgbPropagateDown[i])

                            backward_gemm(hTopDiff, n * m_nTopDim, hWeight, hBottomDiff, n * m_nBottomDim);

                    }

                }

            }

        }


        protected void forward_cudnn(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hWeight = m_colBlobs[0].gpu_data;

            WorkspaceArgs wsArgs = getWorkspace();


            for (int i = 0; i < colBottom.Count; i++)

            {

                long hBottomData = colBottom[i].gpu_data;

                long hTopData = colTop[i].mutable_gpu_data;


                // Forward through cuDNN in parallel over groups.

                for (int g = 0; g < m_nGroup; g++)

                {

                    // Filters.

                    m_cuda.ConvolutionForward(m_rghCudnn[g],

                                              m_tOne,

                                              m_rghBottomDesc[i],

                                              hBottomData, m_nBottomOffset * g,

                                              m_hFilterDesc,

                                              hWeight, m_nWeightOffset * g,

                                              m_rghConvDesc[i],

                                              m_rgfwdAlgo[i],

                                              wsArgs.WorkspaceData, (int)m_rglWorkspaceFwdOffsets[g], m_rglWorkspaceFwdSizes[i],

                                              m_tZero,

                                              m_rghTopDesc[i],

                                              hTopData, m_nTopOffset * g,

                                              false);

                }


                // Synchronize the work across groups, each of which went into its own stream.

                for (int g = 0; g < m_nGroup; g++)

                {

                    m_cuda.SynchronizeStream(m_rghStream[g]);

                }


                // Bias.

                if (m_bBiasTerm)

                {

                    for (int g=0; g<m_nGroup; g++)

                    {

                        long hBiasData = m_colBlobs[1].gpu_data;


                        m_cuda.AddTensor(m_rghCudnn[g],

                                              m_tOne,

                                              m_hBiasDesc,

                                              hBiasData, m_nBiasOffset * g,

                                              m_tOne,

                                              m_rghTopDesc[i],

                                              hTopData, m_nTopOffset * g);

                    }


                    // Synchronize the work across groups, each of which went into its own stream.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.SynchronizeStream(m_rghStream[g]);

                    }

                }

            }

        }


        protected void backward_cudnn(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            WorkspaceArgs wsArgs = getWorkspace();


            // Gradient w.r.t. bias.

            if (m_bBiasTerm && m_rgbParamPropagateDown[1])

            {

                long hBiasDiff = m_colBlobs[1].mutable_gpu_diff;


                for (int i = 0; i < colTop.Count; i++)

                {

                    long hTopDiff = colTop[i].mutable_gpu_diff;


                    // Backward through cuDNN in parallel over groups and gradients.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.ConvolutionBackwardBias(m_rghCudnn[0 * m_nGroup + g],

                                m_tOne, m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,

                                m_tOne, m_hBiasDesc, hBiasDiff, m_nBiasOffset * g,

                                false);

                    }

                    // Synchronize the work across groups, each of which went into its own stream.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.SynchronizeStream(m_rghStream[0 * m_nGroup + g]);

                    }

                }

            }


            // Gradient w.r.t weights.

            if (m_rgbParamPropagateDown[0])

            {

                long hWeightDiff = m_colBlobs[0].mutable_gpu_diff;


                for (int i = 0; i < colTop.Count; i++)

                {

                    long hTopDiff = colTop[i].mutable_gpu_diff;

                    long hBottomData = colBottom[i].gpu_data;


                    // Backward through cuDNN in parallel over groups and gradients.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.ConvolutionBackwardFilter(m_rghCudnn[1 * m_nGroup + g],

                                                       m_tOne,

                                                       m_rghBottomDesc[i], hBottomData, m_nBottomOffset * g,

                                                       m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,

                                                       m_rghConvDesc[i],

                                                       m_rgbwdFilterAlgo[i],

                                                       wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdFilterOffsets[1 * m_nGroup + g],

                                                       m_rglWorkspaceBwdFilterSizes[i],

                                                       m_tOne,

                                                       m_hFilterDesc, hWeightDiff, m_nWeightOffset * g,

                                                       false);

                    }

                    // Synchronize the work across groups, each of which went into its own stream.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.SynchronizeStream(m_rghStream[1 * m_nGroup + g]);

                    }

                }

            }


            // Gradient w.r.t. bottom data.

            long hWeight = m_colBlobs[0].gpu_data;


            for (int i=0; i<colTop.Count; i++)

            {

                if (rgbPropagateDown[i])

                {

                    long hTopDiff = colTop[i].mutable_gpu_diff;

                    long hBottomDiff = colBottom[i].mutable_gpu_diff;


                    // Backward through cuDNN in parallel over groups and gradients.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.ConvolutionBackwardData(m_rghCudnn[2 * m_nGroup + g],

                                                      m_tOne,

                                                      m_hFilterDesc, hWeight, m_nWeightOffset * g,

                                                      m_rghTopDesc[i], hTopDiff, m_nTopOffset * g,

                                                      m_rghConvDesc[i],

                                                      m_rgbwdDataAlgo[i],

                                                      wsArgs.WorkspaceData, (int)m_rglWorkspaceBwdDataOffsets[2 * m_nGroup + g],

                                                      m_rglWorkspaceBwdDataSizes[i],

                                                      m_tZero,

                                                      m_rghBottomDesc[i], hBottomDiff, m_nBottomOffset * g,

                                                      false);

                    }

                    // Synchronize the work across groups, each of which went into its own stream.

                    for (int g = 0; g < m_nGroup; g++)

                    {

                        m_cuda.SynchronizeStream(m_rghStream[2 * m_nGroup + g]);

                    }

                }

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.common.WorkspaceArgs
The WorkspaceArgs are passed to both the Layer::OnSetWorkspace and Layer::OnGetWorkspace events.
Definition: EventArgs.cs:17

MyCaffe.common.WorkspaceArgs.WorkspaceData
long WorkspaceData
Get/set the handle to workspace data in GPU memory.
Definition: EventArgs.cs:36

MyCaffe.layers.BaseConvolutionLayer
The BaseConvolutionLayer is an abstract base class that factors out BLAS code common to ConvolutionLa...
Definition: BaseConvolutionLayer.cs:18

MyCaffe.layers.BaseConvolutionLayer.m_nGroup
int m_nGroup
The group.
Definition: BaseConvolutionLayer.cs:78

MyCaffe.layers.BaseConvolutionLayer.m_nBottomDim
int m_nBottomDim
The bottom dimension.
Definition: BaseConvolutionLayer.cs:58

MyCaffe.layers.BaseConvolutionLayer.m_nNumOutput
int m_nNumOutput
The number of outputs.
Definition: BaseConvolutionLayer.cs:90

MyCaffe.layers.BaseConvolutionLayer.m_nTopDim
int m_nTopDim
The top dimension.
Definition: BaseConvolutionLayer.cs:62

MyCaffe.layers.BaseConvolutionLayer.m_rgOutputShape
List< int > m_rgOutputShape
The spatial dimensions of the output.
Definition: BaseConvolutionLayer.cs:46

MyCaffe.layers.BaseConvolutionLayer.backward_bias
void backward_bias(long hBias, long hInput, int nInputOffset)
Helper function that abstracts away the column buffer and gemm arguments.
Definition: BaseConvolutionLayer.cs:840

MyCaffe.layers.BaseConvolutionLayer.m_nOutSpatialDim
int m_nOutSpatialDim
The output spatial dimension.
Definition: BaseConvolutionLayer.cs:82

MyCaffe.layers.BaseConvolutionLayer.m_nChannelAxis
int m_nChannelAxis
The channel axis.
Definition: BaseConvolutionLayer.cs:66

MyCaffe.layers.BaseConvolutionLayer.m_nChannels
int m_nChannels
The number of channels in each item.
Definition: BaseConvolutionLayer.cs:74

MyCaffe.layers.BaseConvolutionLayer.forward_gemm
void forward_gemm(long hInput, int nInputOffset, long hWeights, long hOutput, int nOutputOffset, bool bSkipIm2Col=false)
Helper function that abstract away the column buffer and gemm arguments.
Definition: BaseConvolutionLayer.cs:735

MyCaffe.layers.BaseConvolutionLayer.getWorkspace
override WorkspaceArgs getWorkspace()
Retruns the WorkspaceArgs containing the workspace used by this Layer.
Definition: BaseConvolutionLayer.cs:230

MyCaffe.layers.BaseConvolutionLayer.m_nNumSpatialAxes
int m_nNumSpatialAxes
The number of spatial axes.
Definition: BaseConvolutionLayer.cs:54

MyCaffe.layers.BaseConvolutionLayer.getWorkspaceLimitInBytes
ulong getWorkspaceLimitInBytes(bool bUseTensorCores=false)
Returns the workspace limit in bytes based on the cudnn_workspace_limit setting.
Definition: BaseConvolutionLayer.cs:187

MyCaffe.layers.BaseConvolutionLayer.weight_gemm
void weight_gemm(long hInput, int nInputOffset, long hOutput, int nOutputOffset, long hWeights)
Helper function that abstract away the column buffer and gemm arguments.
Definition: BaseConvolutionLayer.cs:811

MyCaffe.layers.BaseConvolutionLayer.m_blobStride
Blob< T > m_blobStride
The spatial dimensions of the stride.
Definition: BaseConvolutionLayer.cs:26

MyCaffe.layers.BaseConvolutionLayer.m_blobDilation
Blob< T > m_blobDilation
The spatial dimentions of the dilation.
Definition: BaseConvolutionLayer.cs:34

MyCaffe.layers.BaseConvolutionLayer.m_blobKernelShape
Blob< T > m_blobKernelShape
The spatial dimensions of the filter kernel.
Definition: BaseConvolutionLayer.cs:22

MyCaffe.layers.BaseConvolutionLayer.m_nWeightOffset
int m_nWeightOffset
The weight offset used.
Definition: BaseConvolutionLayer.cs:86

MyCaffe.layers.BaseConvolutionLayer.m_blobPad
Blob< T > m_blobPad
The spatial dimensions of the padding.
Definition: BaseConvolutionLayer.cs:30

MyCaffe.layers.BaseConvolutionLayer.m_nNum
int m_nNum
The number of items in the batch.
Definition: BaseConvolutionLayer.cs:70

MyCaffe.layers.BaseConvolutionLayer.forward_bias
void forward_bias(long hOutput, int nOutputOffset, long hBias)
Helper function that abstracts away the column buffer and gemm arguments.
Definition: BaseConvolutionLayer.cs:762

MyCaffe.layers.BaseConvolutionLayer.setWorkspace
override bool setWorkspace(ulong lSizeInBytes)
If not already set, allocates the workspace needed in GPU memory.
Definition: BaseConvolutionLayer.cs:246

MyCaffe.layers.BaseConvolutionLayer.backward_gemm
void backward_gemm(long hOutput, int nOutputOffset, long hWeights, long hInput, int nInputOffset)
Helper function that abstract away the column buffer and gemm arguments.
Definition: BaseConvolutionLayer.cs:778

MyCaffe.layers.BaseConvolutionLayer.input_shape
int input_shape(int i)
Returns the spatial dimensions of the input.
Definition: BaseConvolutionLayer.cs:850

MyCaffe.layers.BaseConvolutionLayer.m_bBiasTerm
bool m_bBiasTerm
Whether or not to use bias.
Definition: BaseConvolutionLayer.cs:94

MyCaffe.layers.ConvolutionLayer
The ConvolutionLayer convolves the input image with a bank of learned filters, and (optionally) adds ...
Definition: ConvolutionLayer.cs:48

MyCaffe.layers.ConvolutionLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: ConvolutionLayer.cs:125

MyCaffe.layers.ConvolutionLayer.backward_cuda
void backward_cuda(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using the Engine.CAFFE mode as specified in the LayerParameter.
Definition: ConvolutionLayer.cs:459

MyCaffe.layers.ConvolutionLayer.reshapeNeeded
override bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Definition: ConvolutionLayer.cs:251

MyCaffe.layers.ConvolutionLayer.backward_cudnn
void backward_cudnn(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using the Engine CUDNN mode as specified in the LayerParameter.
Definition: ConvolutionLayer.cs:569

MyCaffe.layers.ConvolutionLayer.forward_cuda
void forward_cuda(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using the Engine.CAFFE mode as specified in the LayerParameter.
Definition: ConvolutionLayer.cs:434

MyCaffe.layers.ConvolutionLayer.reverse_dimensions
override bool reverse_dimensions()
Returns false, for we want convolution, not deconvolution.
Definition: ConvolutionLayer.cs:366

MyCaffe.layers.ConvolutionLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Run the Backward computation using either the Engine.CAFFE or Engine.CUDNN mode as specified in the L...
Definition: ConvolutionLayer.cs:421

MyCaffe.layers.ConvolutionLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer for use with both Engine.CAFFE and Engine.CUDNN modes.
Definition: ConvolutionLayer.cs:170

MyCaffe.layers.ConvolutionLayer.ConvolutionLayer
ConvolutionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The ConvolutionLayer constructor.
Definition: ConvolutionLayer.cs:118

MyCaffe.layers.ConvolutionLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using either the Engine.CAFFE or Engine.CUDNN mode as specified in the La...
Definition: ConvolutionLayer.cs:407

MyCaffe.layers.ConvolutionLayer.forward_cudnn
void forward_cudnn(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Run the Forward computation using the Engine CUDNN mode as specified in the LayerParameter.
Definition: ConvolutionLayer.cs:503

MyCaffe.layers.ConvolutionLayer.compute_output_shape
override void compute_output_shape()
Computes the output shape used by the BaseConvolutionLayer.
Definition: ConvolutionLayer.cs:374

MyCaffe.layers.ConvolutionLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: ConvolutionLayer.cs:274

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.setShapes
void setShapes(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Set the internal shape sizes - used when determining if a Reshape is necessary.
Definition: Layer.cs:685

MyCaffe.layers.Layer.val_at
int val_at(T[] rg, int nIdx)
Returns the integer value at a given index in a generic array.
Definition: Layer.cs:1434

MyCaffe.layers.Layer.m_tZero
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76

MyCaffe.layers.Layer.m_tOne
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72

MyCaffe.layers.Layer.compareShapes
bool compareShapes(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Compare the shapes of the top and bottom and if the same, return true, otherwise false.
Definition: Layer.cs:648

MyCaffe.layers.Layer.m_bUseHalfSize
bool m_bUseHalfSize
Specifies that the half size of the top (if any) should be converted to the base size.
Definition: Layer.cs:84

MyCaffe.layers.Layer.size_at
Size size_at(Blob< T > b)
Returns the Size of a given two element Blob, such as one that stores Blob size information.
Definition: Layer.cs:1444

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_bReshapeOnForwardNeeded
bool m_bReshapeOnForwardNeeded
Specifies whether or not the reshape on forward is needed or not.
Definition: Layer.cs:100

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.Layer.m_colBlobs
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55

MyCaffe.layers.Layer.m_rgbParamPropagateDown
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63

MyCaffe.param.ConvolutionParameter.cudnn_enable_tensor_cores
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the convolution which is faster but not sup...
Definition: ConvolutionParameter.cs:106

MyCaffe.param.ConvolutionParameter.useCudnn
bool useCudnn(int nNumSpatialAxes=2)
Queries whether or not to use NVIDIA's cuDnn.
Definition: ConvolutionParameter.cs:61

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.convolution_param
ConvolutionParameter convolution_param
Returns the parameter set when initialized with LayerType.CONVOLUTION
Definition: LayerParameter.cs:2126

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.CONV_BWD_FILTER_ALGO
CONV_BWD_FILTER_ALGO
Specifies the cuDnn convolution backward filter algorithm to use.
Definition: CudaDnn.cs:305

MyCaffe.common.CONV_FWD_ALGO
CONV_FWD_ALGO
Specifies the cuDnn convolution forward algorithm to use.
Definition: CudaDnn.cs:259

MyCaffe.common.CONV_BWD_DATA_ALGO
CONV_BWD_DATA_ALGO
Specifies the cuDnn convolution backward data algorithm to use.
Definition: CudaDnn.cs:331

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11