mycaffe/html/_l_s_t_m_attention_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;

using MyCaffe.fillers;

using MyCaffe.layers.beta;


namespace MyCaffe.layers

{

    public class LSTMAttentionLayer<T> : Layer<T>

    {

        int m_nI;   // input dimension.

        int m_nH;   // number of hidden units.

        int m_nT;   // length of sequence.

        int m_nN;   // batch size.


        double m_dfClippingThreshold; // threshold for clipped gradient.

        Blob<T> m_blobBiasMultiplier;


        Blob<T> m_blobCell;     // Memory cell.

        Blob<T> m_blobPreGate;  // gate values before nonlinearity.

        Blob<T> m_blobGate;     // gate values after nonlinearity.


        Blob<T> m_blob_C_0;      // previous cell state value.

        Blob<T> m_blob_H_0;      // previous hidden activation value.

        Blob<T> m_blob_C_T;      // next cell state value.

        Blob<T> m_blob_H_T;      // next hidden activation value.


        // Intermediate values.

        Blob<T> m_blob_H_to_Gate;

        Blob<T> m_blob_H_to_H;

        Blob<T> m_blob_C_to_Gate = null;

        Blob<T> m_blobEOutputWhd = null;

        int m_nWeightItoHidx;

        int m_nWeightHtoHidx;

        int m_nWeightBiasidx;

        int m_nWeightCtoHidx;

        int m_nWeightWhdidx;

        int m_nWeightWhdbidx;


        // MaxT

        Blob<T> m_blobMaxT = null;

        int? m_nMaxT = null;


        // Attention values

        Layer<T> m_attention = null;

        Blob<T> m_blobContext = null;

        Blob<T> m_blobPrevCt = null;

        Blob<T> m_blobContextFull = null;

        BlobCollection<T> m_colInternalBottom = new BlobCollection<T>();

        BlobCollection<T> m_colInternalTop = new BlobCollection<T>();


        public LSTMAttentionLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.LSTM_ATTENTION;


            m_blobBiasMultiplier = new Blob<T>(m_cuda, m_log);

            m_blobBiasMultiplier.Name = m_param.name + " biasmult";

            m_blobCell = new Blob<T>(m_cuda, m_log);

            m_blobCell.Name = m_param.name + " cell";

            m_blobPreGate = new Blob<T>(m_cuda, m_log);

            m_blobPreGate.Name = m_param.name + " pregate";

            m_blobGate = new Blob<T>(m_cuda, m_log);

            m_blobGate.Name = m_param.name + " gate";

            m_blob_C_0 = new Blob<T>(m_cuda, m_log);

            m_blob_C_0.Name = m_param.name + " c_0";

            m_blob_H_0 = new Blob<T>(m_cuda, m_log);

            m_blob_H_0.Name = m_param.name + " h_0";

            m_blob_C_T = new Blob<T>(m_cuda, m_log);

            m_blob_C_T.Name = m_param.name + " c_t";

            m_blob_H_T = new Blob<T>(m_cuda, m_log);

            m_blob_H_T.Name = m_param.name + " h_t";

            m_blob_H_to_Gate = new Blob<T>(m_cuda, m_log);

            m_blob_H_to_Gate.Name = m_param.name + "h_to_gate";

            m_blob_H_to_H = new Blob<T>(m_cuda, m_log);

            m_blob_H_to_H.Name = m_param.name + " h_to_h";

            m_blobMaxT = new Blob<T>(m_cuda, m_log);

            m_blobMaxT.Name = m_param.name + " maxT";


            if (m_param.lstm_attention_param.num_output_ip > 0)

            {

                m_blobEOutputWhd = new Blob<T>(m_cuda, m_log);

                m_blobEOutputWhd.Name = m_param.name + " ip";

            }

        }


        protected override void dispose()

        {

            base.dispose();


            dispose(ref m_attention);

            dispose(ref m_blobContext);

            dispose(ref m_blobPrevCt);

            dispose(ref m_blobContextFull);


            dispose(ref m_blobBiasMultiplier);

            dispose(ref m_blobCell);

            dispose(ref m_blobPreGate);

            dispose(ref m_blobGate);

            dispose(ref m_blob_C_0);

            dispose(ref m_blob_C_T);

            dispose(ref m_blob_H_0);

            dispose(ref m_blob_H_T);

            dispose(ref m_blob_H_to_Gate);

            dispose(ref m_blob_H_to_H);

            dispose(ref m_blob_C_to_Gate);


            dispose(ref m_blobMaxT);

            dispose(ref m_blobEOutputWhd);

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            col.Add(m_blobBiasMultiplier);

            col.Add(m_blobCell);

            col.Add(m_blobPreGate);

            col.Add(m_blobGate);

            col.Add(m_blob_C_0);

            col.Add(m_blob_H_0);

            col.Add(m_blob_C_T);

            col.Add(m_blob_H_T);

            col.Add(m_blob_H_to_Gate);

            col.Add(m_blob_H_to_H);

            col.Add(m_blobMaxT);


            if (m_blobEOutputWhd != null)

                col.Add(m_blobEOutputWhd);


            if (m_attention != null)

            {

                col.Add(m_blob_C_to_Gate);

                col.Add(m_blobPrevCt);


                foreach (Blob<T> b in m_attention.internal_blobs)

                {

                    col.Add(b);

                }

            }

        }


        public override int MinBottomBlobs

        {

            get { return 1; }

        }


        public override int MaxBottomBlobs

        {

            get { return 5; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 1; }

        }


        private void addInternal(Blob<T> bottom, Blob<T> top)

        {

            m_colInternalBottom.Clear();

            m_colInternalBottom.Add(bottom);


            m_colInternalTop.Clear();

            m_colInternalTop.Add(top);

        }


        private void addInternal(List<Blob<T>> rgBottom, Blob<T> top)

        {

            m_colInternalBottom.Clear();


            for (int i = 0; i < rgBottom.Count; i++)

            {

                m_colInternalBottom.Add(rgBottom[i]);

            }


            m_colInternalTop.Clear();

            m_colInternalTop.Add(top);

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            LSTMAttentionParameter p = m_param.lstm_attention_param;


            if (m_param.lstm_attention_param.enable_attention)

            {

                m_log.CHECK_GE(colBottom.Count, 4, "When using attention, four bottoms are required: x, xClip, encoding, encodingClip.");

                m_log.CHECK_LE(colBottom.Count, 5, "When using attention, four bottoms are required: x, xClip, encoding, encodingClip, vocabcount (optional).");


                if (colBottom.Count == 5)

                {

                    if (p.num_output_ip != 0)

                        p.num_output_ip = (uint)convertF(colBottom[4].GetData(0));

                }

            }

            else

            {

                m_log.CHECK_GE(colBottom.Count, 1, "When not using attention, at least one bottom is required: x.");

                m_log.CHECK_LE(colBottom.Count, 2, "When not using attention, no more than two bottoms is required: x, clip.");

            }


            m_dfClippingThreshold = p.clipping_threshold;

            m_nN = colBottom[0].channels;

            m_nH = (int)p.num_output;      // number of hidden units.

            m_nI = colBottom[0].count(2);  // input dimension.


            // Check if we need to set up the weights.

            if (m_colBlobs.Count > 0)

            {

                m_log.WriteLine("Skipping parameter initialization.");

            }

            else

            {

                m_colBlobs = new BlobCollection<T>();


                Filler<T> weight_filler = Filler<T>.Create(m_cuda, m_log, p.weight_filler);

                Filler<T> bias_filler = Filler<T>.Create(m_cuda, m_log, p.bias_filler);


                // input-to-hidden weights

                // Initialize the weight.

                List<int> rgShape1 = new List<int>() { 4 * m_nH, m_nI };

                Blob<T> blobWeights_I_H = new Blob<T>(m_cuda, m_log);

                blobWeights_I_H.Name = m_param.name + " weights I to H";

                blobWeights_I_H.type = BLOB_TYPE.WEIGHT;


                if (!shareParameter(blobWeights_I_H, rgShape1))

                {

                    blobWeights_I_H.Reshape(rgShape1);

                    weight_filler.Fill(blobWeights_I_H);

                }

                m_nWeightItoHidx = m_colBlobs.Count;

                m_colBlobs.Add(blobWeights_I_H);


                // hidden-to-hidden weights

                // Initialize the weight.

                List<int> rgShape2 = new List<int>() { 4 * m_nH, m_nH };

                Blob<T> blobWeights_H_H = new Blob<T>(m_cuda, m_log);

                blobWeights_H_H.Name = m_param.name + " weights H to H";

                blobWeights_H_H.type = BLOB_TYPE.WEIGHT;


                if (!shareParameter(blobWeights_H_H, rgShape2))

                {

                    blobWeights_H_H.Reshape(rgShape2);

                    weight_filler.Fill(blobWeights_H_H);

                }

                m_nWeightHtoHidx = m_colBlobs.Count;

                m_colBlobs.Add(blobWeights_H_H);


                // If necessary, initialize and fill the bias term.

                List<int> rgShape3 = new List<int>() { 4 * m_nH };

                Blob<T> blobBias = new Blob<T>(m_cuda, m_log);

                blobBias.Name = m_param.name + " bias weights";

                blobBias.type = BLOB_TYPE.WEIGHT;


                if (!shareParameter(blobBias, rgShape3))

                {

                    blobBias.Reshape(rgShape3);

                    bias_filler.Fill(blobBias);

                }

                m_nWeightBiasidx = m_colBlobs.Count;

                m_colBlobs.Add(blobBias);


                // Initialize the bias for the forget gate to 5.0 as described in the

                // Clockwork RNN paper:

                // [1] Koutnik, J., Greff, K., Gomez, F., Schmidhuber, J., 'A Clockwork RNN', 2014"

                if (p.enable_clockwork_forgetgate_bias)

                {

                    double[] rgBias = convertD(blobBias.mutable_cpu_data);


                    for (int i=m_nH; i<2*m_nH; i++)

                    {

                        rgBias[i] = 5.0;

                    }


                    blobBias.mutable_cpu_data = convert(rgBias);

                }


                if (m_param.lstm_attention_param.num_output_ip > 0)

                {

                    Blob<T> blobWeightWhd = new Blob<T>(m_cuda, m_log);

                    blobWeightWhd.Name = m_param.name + " weights Whd";

                    blobWeightWhd.type = BLOB_TYPE.WEIGHT;


                    List<int> rgShapeWhd = new List<int>() { m_nH, (int)m_param.lstm_attention_param.num_output_ip };

                    if (!shareParameter(blobWeightWhd, rgShapeWhd))

                    {

                        blobWeightWhd.Reshape(rgShapeWhd);

                        weight_filler.Fill(blobWeightWhd);

                    }

                    m_nWeightWhdidx = m_colBlobs.Count;

                    m_colBlobs.Add(blobWeightWhd);


                    Blob<T> blobWeightWhdb = new Blob<T>(m_cuda, m_log);

                    blobWeightWhdb.Name = m_param.name + " weights Whdb";

                    blobWeightWhdb.type = BLOB_TYPE.WEIGHT;


                    List<int> rgShapeWhdb = new List<int>() { 1, (int)m_param.lstm_attention_param.num_output_ip };

                    if (!shareParameter(blobWeightWhdb, rgShape1))

                    {

                        blobWeightWhdb.Reshape(rgShapeWhdb);

                        bias_filler.Fill(blobWeightWhdb);

                    }

                    m_nWeightWhdbidx = m_colBlobs.Count;

                    m_colBlobs.Add(blobWeightWhdb);

                }


                if (m_param.lstm_attention_param.enable_attention)

                {

                    // context-to-hidden weights

                    // Initialize the weight.

                    Blob<T> blobWeights_C_H = new Blob<T>(m_cuda, m_log);

                    blobWeights_C_H.Name = m_param.name + " weights C to H";

                    blobWeights_C_H.type = BLOB_TYPE.WEIGHT;


                    if (!shareParameter(blobWeights_C_H, rgShape1))

                    {

                        blobWeights_C_H.Reshape(rgShape1); // same shape as I to H

                        weight_filler.Fill(blobWeights_C_H);

                    }

                    m_nWeightCtoHidx = m_colBlobs.Count;

                    m_colBlobs.Add(blobWeights_C_H);

                }

            }


            m_rgbParamPropagateDown = new DictionaryMap<bool>(m_colBlobs.Count, true);


            List<int> rgCellShape = new List<int>() { m_nN, m_nH };

            m_blob_C_0.Reshape(rgCellShape);

            m_blob_H_0.Reshape(rgCellShape);

            m_blob_C_T.Reshape(rgCellShape);

            m_blob_H_T.Reshape(rgCellShape);

            m_blob_H_to_H.Reshape(rgCellShape);


            List<int> rgGateShape = new List<int>() { m_nN, 4, m_nH };

            m_blob_H_to_Gate.Reshape(rgGateShape);


            // Attention settings

            if (m_param.lstm_attention_param.enable_attention)

            {

                m_blob_C_to_Gate = new Blob<T>(m_cuda, m_log, false);

                m_blob_C_to_Gate.Name = m_param.name + "c_to_gate";

                m_blob_C_to_Gate.Reshape(rgGateShape);


                m_blobContext = new Blob<T>(m_cuda, m_log);

                m_blobContext.Name = "context_out";


                m_blobContextFull = new Blob<T>(m_cuda, m_log);

                m_blobContextFull.Name = "context_full";


                m_blobPrevCt = new Blob<T>(m_cuda, m_log);

                m_blobPrevCt.Name = "prev_ct";


                LayerParameter attentionParam = new LayerParameter(LayerParameter.LayerType.ATTENTION);

                attentionParam.attention_param.axis = 2;

                attentionParam.attention_param.dim = m_param.lstm_attention_param.num_output;

                attentionParam.attention_param.weight_filler = m_param.lstm_attention_param.weight_filler;

                attentionParam.attention_param.bias_filler = m_param.lstm_attention_param.bias_filler;


                if (m_param is LayerParameterEx<T>)

                {

                    LayerParameterEx<T> pEx = m_param as LayerParameterEx<T>;

                    attentionParam = new LayerParameterEx<T>(attentionParam, pEx.SharedBlobs, pEx.SharedLayerBlobs, pEx.SharedLayer);

                }


                m_attention = new AttentionLayer<T>(m_cuda, m_log, attentionParam);


                Blob<T> blobEncoding = colBottom[2];

                Blob<T> blobEncodingClip = colBottom[3];

                addInternal(new List<Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);

                m_attention.Setup(m_colInternalBottom, m_colInternalTop);


                foreach (Blob<T> b in m_attention.blobs)

                {

                    m_colBlobs.Add(b);

                }

            }

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (m_bNetReshapeRequest)

            {

                m_nMaxT = null;

                m_nN = colBottom[0].channels;

                m_bNetReshapeRequest = false;

            }

            else

            {

                if (!reshapeNeeded(colBottom, colTop))

                    return;

            }


            // Figure out the dimensions.

            m_nT = colBottom[0].num;  // length of sequence.

            m_log.CHECK_EQ(colBottom[0].count() / m_nT / m_nN, m_nI, "The input size is incompatible with inner product parameters.");


            // Gate initialization.

            List<int> rgGateShape = new List<int>() { m_nT, m_nN, 4, m_nH };

            m_blobPreGate.Reshape(rgGateShape);

            m_blobGate.Reshape(rgGateShape);

            m_blob_H_to_Gate.Reshape(rgGateShape);


            List<int> rgTopShape = new List<int>() { m_nT, m_nN, m_nH };

            m_blobCell.Reshape(rgTopShape);

            colTop[0].Reshape(rgTopShape);


            // Setup the bias multipler.

            List<int> rgMultiplierShape = new List<int>() { m_nT, m_nN };

            m_blobBiasMultiplier.Reshape(rgMultiplierShape);

            m_blobBiasMultiplier.SetData(1.0);


            List<int> rgCellShape = new List<int>() { m_nN, m_nH };

            m_blob_C_0.Reshape(rgCellShape);

            m_blob_H_0.Reshape(rgCellShape);

            m_blob_C_T.Reshape(rgCellShape);

            m_blob_H_T.Reshape(rgCellShape);

            m_blob_H_to_H.Reshape(rgCellShape);


            if (colBottom.Count > 1)

                m_blobMaxT.Reshape(new List<int>() { 1, colBottom[1].channels });


            if (m_param.lstm_attention_param.num_output_ip > 0)

            {

                List<int> rgIpShape = new List<int>() { m_nT, m_nN, (int)m_param.lstm_attention_param.num_output_ip };

                m_blobEOutputWhd.Reshape(rgIpShape);

                colTop[0].Reshape(rgIpShape);

            }


            // Attention reshape

            if (m_param.lstm_attention_param.enable_attention)

            {

                m_blob_C_to_Gate.Reshape(rgGateShape);


                Blob<T> blobEncoding = colBottom[2];

                Blob<T> blobEncodingClip = colBottom[3];

                addInternal(new List<Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);

                m_attention.Reshape(m_colInternalBottom, m_colInternalTop);


                List<int> rgShape = Utility.Clone<int>(m_blobContext.shape());

                rgShape[0] = m_nT;

                m_blobContextFull.Reshape(rgShape);


                m_blobPrevCt.ReshapeLike(m_blobCell);

            }

        }


        // Find the longest clip length.

        private int calculate_maxT(Blob<T> blob, out int nInitialClip)

        {

            int nMax = 1;


            if (blob.count() > 1)

            {

                m_blobMaxT.SetData(0);


                for (int t = 0; t < blob.num; t++)

                {

                    int nSrcIdx = t * blob.channels;

                    m_cuda.add(m_blobMaxT.count(), blob.gpu_data, m_blobMaxT.gpu_data, m_blobMaxT.mutable_gpu_data, 1.0, 1.0, nSrcIdx, 0, 0);

                }


                long lPos;

                nMax = (int)m_cuda.max(m_blobMaxT.count(), m_blobMaxT.gpu_data, out lPos);

                if (convertF(blob.GetData(0)) == 0)

                    nMax++;

            }


            nInitialClip = (int)convertF(blob.GetData(0));


            return nMax;

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hTopData = colTop[0].mutable_gpu_data;

            long hBottomData = colBottom[0].gpu_data;

            long hClipData = 0;

            int nMaxT = m_nT;

            int nInitialClip = 0;

            double dfOriginalClip = 0;


            if (colBottom.Count > 1)

            {

                hClipData = colBottom[1].gpu_data;

                m_log.CHECK_EQ(colBottom[0].count(0, 2), colBottom[1].count(), "The bottom[1].count() should equal the bottom[0].count(0,2).");


                m_nMaxT = calculate_maxT(colBottom[1], out nInitialClip);

                nMaxT = m_nMaxT.Value;

            }


            long hWeight_i = m_colBlobs[m_nWeightItoHidx].gpu_data;

            long hWeight_h = m_colBlobs[m_nWeightHtoHidx].gpu_data;

            long hBias = m_colBlobs[m_nWeightBiasidx].gpu_data;

            long hWeight_c = (m_param.lstm_attention_param.enable_attention) ? m_colBlobs[m_nWeightCtoHidx].gpu_data : 0;

            long hPreGateData = m_blobPreGate.mutable_gpu_data;

            long hGateData = m_blobGate.mutable_gpu_data;

            long hCellData = m_blobCell.mutable_gpu_data;

            long hHtoGateData = m_blob_H_to_Gate.mutable_gpu_data;

            long hCtoGateData = 0;


            // Initialize previous state.

            if (hClipData != 0 && nInitialClip != 0)

            {

                m_cuda.copy(m_blob_C_0.count(), m_blob_C_T.gpu_data, m_blob_C_0.mutable_gpu_data);

                m_cuda.copy(m_blob_H_0.count(), m_blob_H_T.gpu_data, m_blob_H_0.mutable_gpu_data);

            }

            else

            {

                m_blob_C_0.SetData(0.0);

                m_blob_H_0.SetData(0.0);

            }


            m_cuda.gemm(false, true, m_nT * m_nN, 4 * m_nH, m_nI, m_tOne, hBottomData, hWeight_i, m_tZero, hPreGateData);

            m_cuda.gemm(false, false, m_nT * m_nN, 4 * m_nH, 1, m_tOne, m_blobBiasMultiplier.gpu_data, hBias, m_tOne, hPreGateData);


            if (m_param.lstm_attention_param.enable_attention)

            {

                m_blobContextFull.SetData(0);

                if (nInitialClip == 0)

                    m_blobPrevCt.SetData(0);


                // Reset the clip for we want to use the initial context.

                dfOriginalClip = convertD(colBottom[1].GetData(0));

                colBottom[1].SetData(1.0, 0);

            }


            // Compute recurrent forward propagation

            for (int t = 0; t < nMaxT; t++)

            {

                int nTopOffset = colTop[0].offset(t);

                int nCellOffset = m_blobCell.offset(t);

                int nPreGateOffset = m_blobPreGate.offset(t);

                int nGateOffset = m_blobGate.offset(t);

                int nClipOffset = (hClipData != 0) ? colBottom[1].offset(t) : 0;

                int nHT1Offset;

                long hHT1Data;

                int nCT1Offset;

                long hCT1Data;

                long hContext = 0;


                if (t == 0)

                {

                    hHT1Data = m_blob_H_0.gpu_data;

                    nHT1Offset = 0;

                    hCT1Data = m_blob_C_0.gpu_data;

                    nCT1Offset = 0;

                }

                else

                {

                    hHT1Data = m_blob_H_T.gpu_data;

                    nHT1Offset = -colTop[0].offset(1);

                    hCT1Data = m_blob_C_T.gpu_data;

                    nCT1Offset = -m_blobCell.offset(1);

                }


                if (m_param.lstm_attention_param.enable_attention)

                {

                    Blob<T> blobEncoding = colBottom[2];

                    Blob<T> blobEncodingClip = colBottom[3];


                    addInternal(new List<Blob<T>>() { blobEncoding, m_blobPrevCt, blobEncodingClip }, m_blobContext);

                    m_attention.Forward(m_colInternalBottom, m_colInternalTop);

                    hContext = m_blobContext.gpu_data;

                    hCtoGateData = m_blob_C_to_Gate.mutable_gpu_data;


                    int nCount = m_blobContext.count();

                    m_cuda.copy(nCount, hContext, m_blobContextFull.mutable_gpu_data, 0, t * nCount);

                }


                m_cuda.lstm_fwd(t,

                                m_nN,

                                m_nH,

                                m_nI,

                                hWeight_h,

                                hWeight_i,

                                hClipData,

                                nClipOffset,

                                hTopData,       // h_t data

                                nTopOffset,     // h_t offset

                                hCellData,      // c_t data

                                nCellOffset,    // c_t offset

                                hPreGateData,

                                nPreGateOffset,

                                hGateData,

                                nGateOffset,

                                hHT1Data,

                                nHT1Offset,

                                hCT1Data,

                                nCT1Offset,

                                hHtoGateData,

                                hContext,

                                hWeight_c,

                                hCtoGateData);


                if (m_param.lstm_attention_param.enable_attention)

                    m_blobPrevCt.CopyFrom(m_blobCell);

            }


            // Preserve cell state and output value for truncated BPTT

            m_cuda.copy(m_nN * m_nH, hCellData, m_blob_C_T.mutable_gpu_data, m_blobCell.offset(nMaxT - 1));

            m_cuda.copy(m_nN * m_nH, hTopData, m_blob_H_T.mutable_gpu_data, colTop[0].offset(nMaxT - 1));


            if (m_param.lstm_attention_param.num_output_ip > 0)

            {

                int nM = m_nT * m_nN;

                int nN = (int)m_param.lstm_attention_param.num_output_ip;

                int nK = m_nH;

                m_cuda.gemm(false, false, nM, nN, nK, Blob<T>.One, hTopData, m_colBlobs[m_nWeightWhdidx].gpu_data, Blob<T>.Zero, m_blobEOutputWhd.mutable_gpu_data);

                m_cuda.add(colTop[0].count(), m_blobEOutputWhd.gpu_data, m_colBlobs[m_nWeightWhdbidx].gpu_data, m_blobEOutputWhd.mutable_gpu_data);

                colTop[0].CopyFrom(m_blobEOutputWhd);

            }


            if (m_param.lstm_attention_param.enable_attention)

            {

                // Reset the clip to original value.

                colBottom[1].SetData(dfOriginalClip, 0);

            }

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopData = colTop[0].gpu_data;

            long hBottomData = colBottom[0].gpu_data;

            long hClipData = 0;

            int nMaxT = m_nT;


            List<bool> rgbPropagate = new List<bool>() { true, true };


            if (colBottom.Count > 1)

            {

                hClipData = colBottom[1].gpu_data;

                m_cuda.sign(colBottom[1].count(), hClipData, hClipData); // Set to 1 or 0.

                m_log.CHECK_EQ(colBottom[0].count(0, 2), colBottom[1].count(), "The bottom[1].count() should equal the bottom[0].count(0,2).");

                nMaxT = m_nMaxT.Value;

            }


            long hWeight_i = m_colBlobs[m_nWeightItoHidx].gpu_data;

            long hWeight_h = m_colBlobs[m_nWeightHtoHidx].gpu_data;

            long hGateData = m_blobGate.gpu_data;

            long hCellData = m_blobCell.gpu_data;


            long hTopDiff = colTop[0].mutable_gpu_diff;

            long hPreGateDiff = m_blobPreGate.mutable_gpu_diff;

            long hGateDiff = m_blobGate.mutable_gpu_diff;

            long hCellDiff = m_blobCell.mutable_gpu_diff;

            long hHtoHDiff = m_blob_H_to_H.mutable_gpu_diff;


            m_blobCell.SetDiff(0);

            m_blobGate.SetDiff(0);

            m_blobPreGate.SetDiff(0);

            m_blob_H_to_H.SetDiff(0);

            m_blob_H_to_Gate.SetDiff(0);


            long hWeight_c = 0;

            long hContextData = 0;

            long hContextDiff = 0;


            if (m_param.lstm_attention_param.num_output_ip > 0)

            {

                int nM = m_nT * m_nN;

                int nN = (int)m_param.lstm_attention_param.num_output_ip;

                int nK = m_nH;


                m_cuda.copy(colTop[0].count(), colTop[0].gpu_diff, m_blobEOutputWhd.mutable_gpu_diff);

                m_cuda.add(colTop[0].count(), colTop[0].gpu_diff, m_colBlobs[m_nWeightWhdbidx].gpu_diff, m_colBlobs[m_nWeightWhdbidx].mutable_gpu_diff);

                m_cuda.gemm(false, true, nM, nK, nN, Blob<T>.One, m_blobEOutputWhd.gpu_diff, m_colBlobs[m_nWeightWhdidx].gpu_data, Blob<T>.Zero, m_blob_H_T.mutable_gpu_diff);

                m_cuda.gemm(true, false, nK, nN, nM, Blob<T>.One, m_blob_H_T.gpu_data, m_blobEOutputWhd.gpu_diff, Blob<T>.One, m_colBlobs[m_nWeightWhdidx].mutable_gpu_diff);

                hTopDiff = m_blob_H_T.gpu_diff;

                hTopData = m_blob_H_T.gpu_data;

            }


            if (m_param.lstm_attention_param.enable_attention)

            {

                m_blobContext.SetDiff(0);

                m_blob_C_to_Gate.SetDiff(0);

                hWeight_c = m_colBlobs[m_nWeightCtoHidx].gpu_data;

                hContextData = m_blobContext.gpu_data;

                hContextDiff = m_blobContext.mutable_gpu_diff;

                m_cuda.sign(colBottom[3].count(), colBottom[3].gpu_data, colBottom[3].mutable_gpu_data); // Set to 1 or 0.

            }


            m_blob_C_T.SetDiff(0);

            m_cuda.copy(m_nN * m_nH, m_blob_C_T.gpu_diff, hCellDiff, 0, m_blobCell.offset(nMaxT - 1));


            for (int t = nMaxT - 1; t >= 0; t--)

            {

                int nTopOffset = colTop[0].offset(t);

                int nCellOffset = m_blobCell.offset(t);

                int nGateOffset = m_blobGate.offset(t);

                int nPreGateOffset = m_blobPreGate.offset(t);

                int nClipOffset = (hClipData == 0) ? 0 : colBottom[1].offset(t);

                int nCT1Offset;

                long hCT1Data;

                int nDHT1Offset;

                long hDHT1Diff;

                int nDCT1Offset;

                long hDCT1Diff;


                if (t == 0)

                {

                    nCT1Offset = 0;

                    hCT1Data = m_blob_C_0.gpu_data;

                    nDHT1Offset = 0;

                    hDHT1Diff = m_blob_H_0.mutable_gpu_diff;

                    nDCT1Offset = 0;

                    hDCT1Diff = m_blob_C_0.mutable_gpu_diff;

                }

                else

                {

                    nCT1Offset = m_blobCell.offset(t - 1);

                    hCT1Data = hCellData;

                    nDHT1Offset = colTop[0].offset(t - 1);

                    hDHT1Diff = hTopDiff;

                    nDCT1Offset = m_blobCell.offset(t - 1);

                    hDCT1Diff = hCellDiff;

                }


                m_cuda.lstm_bwd(t,

                                m_nN,

                                m_nH,

                                m_nI,

                                m_dfClippingThreshold,

                                hWeight_h,

                                hClipData,

                                nClipOffset,

                                hTopDiff,

                                nTopOffset,

                                hCellData,

                                hCellDiff,

                                nCellOffset,

                                hPreGateDiff,

                                nPreGateOffset,

                                hGateData,

                                hGateDiff,

                                nGateOffset,

                                hCT1Data,

                                nCT1Offset,

                                hDHT1Diff,

                                nDHT1Offset,

                                hDCT1Diff,

                                nDCT1Offset,

                                hHtoHDiff,

                                hContextDiff,

                                hWeight_c);


                if (m_param.lstm_attention_param.enable_attention)

                {

                    Blob<T> blobEncoding = colBottom[2];

                    Blob<T> blobEncodingClip = colBottom[3];

                    addInternal(new List<Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);

                    m_attention.Backward(m_colInternalTop, rgbPropagate, m_colInternalBottom);


                    int nCount = m_blobContext.count();

                    m_cuda.copy(nCount, hContextDiff, m_blobContextFull.mutable_gpu_diff, 0, t * nCount);

                }

            }


            if (m_rgbParamPropagateDown[0])

            {

                // Gradient w.r.t input-to-hidden weight

                m_cuda.gemm(true, false, 4 * m_nH, m_nI, m_nT * m_nN, m_tOne, hPreGateDiff, hBottomData, m_tOne, m_colBlobs[m_nWeightItoHidx].mutable_gpu_diff);

            }


            if (m_rgbParamPropagateDown[1])

            {

                // Gradient w.r.t. hidden-to-hidden weight

                m_cuda.gemm(true, false, 4 * m_nH, m_nH, (m_nT - 1) * m_nN, m_tOne, hPreGateDiff, hTopData, m_tOne, m_colBlobs[m_nWeightHtoHidx].mutable_gpu_diff, m_blobPreGate.offset(1));


                // Add gradient from previous time-step.

                m_cuda.gemm(true, false, 4 * m_nH, m_nH, 1, m_tOne, hPreGateDiff, m_blob_H_0.gpu_data, m_tOne, m_colBlobs[m_nWeightHtoHidx].mutable_gpu_diff);

            }


            if (m_rgbParamPropagateDown[2])

            {

                // Gradient w.r.t. bias.

                m_cuda.gemv(true, m_nT * m_nN, 4 * m_nH, m_tOne, hPreGateDiff, m_blobBiasMultiplier.gpu_data, m_tOne, m_colBlobs[m_nWeightBiasidx].mutable_gpu_diff);

            }


            if (m_rgbParamPropagateDown[3] && m_param.lstm_attention_param.enable_attention)

            {

                // Gradient w.r.t. context data.

                m_cuda.gemm(true, false, 4 * m_nH, m_nI, m_nT * m_nN, m_tOne, hPreGateDiff, m_blobContextFull.gpu_data, m_tOne, m_colBlobs[m_nWeightCtoHidx].mutable_gpu_diff);

            }


            if (rgbPropagateDown[0])

            {

                // Gradient w.r.t. bottom data.

                m_cuda.gemm(false, false, m_nT * m_nN, m_nI, 4 * m_nH, m_tOne, hPreGateDiff, hWeight_i, m_tZero, colBottom[0].mutable_gpu_diff);

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.basecode.Log.CHECK_LE
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
Definition: Log.cs:263

MyCaffe.basecode.Log.CHECK_GE
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
Definition: Log.cs:287

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.SetData
void SetData(double df)
Set all blob data to the value specified.
Definition: BlobCollection.cs:323

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.Clear
void Clear(bool bDispose=false)
Remove all items from the collection.
Definition: BlobCollection.cs:135

MyCaffe.common.BlobCollection.Reshape
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
Definition: BlobCollection.cs:238

MyCaffe.common.BlobCollection.CopyFrom
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
Definition: BlobCollection.cs:266

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.mutable_cpu_data
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461

MyCaffe.common.Blob.One
static T One
Returns One (1) in type T.
Definition: Blob.cs:268

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.type
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
Definition: Blob.cs:2761

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.shape
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684

MyCaffe.common.Blob.Zero
static T Zero
Returns Zero (0) in type T.
Definition: Blob.cs:260

MyCaffe.common.Blob.GetData
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
Definition: Blob.cs:1893

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.offset
int offset(int n, int c=0, int h=0, int w=0)
Returns the flat offset given the number, channel, height and width.
Definition: Blob.cs:850

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.num
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.fillers.Filler
Abstract Filler class used to fill blobs with values.
Definition: Filler.cs:19

MyCaffe.fillers.Filler.Fill
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
Definition: Filler.cs:50

MyCaffe.fillers.Filler.Create
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
Definition: Filler.cs:79

MyCaffe.layers.AttentionLayer
[DEPRECIATED] The AttentionLayer provides focus for LSTM based encoder/decoder models.
Definition: AttentionLayer.cs:31

MyCaffe.layers.LSTMAttentionLayer
The LSTMAttentionLayer adds attention to the long-short term memory layer and is used in encoder/deco...
Definition: LSTMAttentionLayer.cs:45

MyCaffe.layers.LSTMAttentionLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: LSTMAttentionLayer.cs:144

MyCaffe.layers.LSTMAttentionLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
Definition: LSTMAttentionLayer.cs:461

MyCaffe.layers.LSTMAttentionLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: output (ht).
Definition: LSTMAttentionLayer.cs:227

MyCaffe.layers.LSTMAttentionLayer.MinBottomBlobs
override int MinBottomBlobs
Returns the minimum number of required bottom (input) Blobs: input
Definition: LSTMAttentionLayer.cs:206

MyCaffe.layers.LSTMAttentionLayer.MaxBottomBlobs
override int MaxBottomBlobs
Returns the maximum number of required bottom (input) Blobs: input, inputClip, encoding,...
Definition: LSTMAttentionLayer.cs:219

MyCaffe.layers.LSTMAttentionLayer.LSTMAttentionLayer
LSTMAttentionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The AttentionDecodeLayer constructor.
Definition: LSTMAttentionLayer.cs:108

MyCaffe.layers.LSTMAttentionLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: LSTMAttentionLayer.cs:258

MyCaffe.layers.LSTMAttentionLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the inputs.
Definition: LSTMAttentionLayer.cs:722

MyCaffe.layers.LSTMAttentionLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
Definition: LSTMAttentionLayer.cs:565

MyCaffe.layers.LSTMAttentionLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: LSTMAttentionLayer.cs:170

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.convert
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535

MyCaffe.layers.Layer.m_tZero
T m_tZero
Specifies a generic type equal to 0.0.
Definition: Layer.cs:76

MyCaffe.layers.Layer.Backward
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815

MyCaffe.layers.Layer.m_tOne
T m_tOne
Specifies a generic type equal to 1.0.
Definition: Layer.cs:72

MyCaffe.layers.Layer.shareParameter
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
Definition: Layer.cs:1152

MyCaffe.layers.Layer.Forward
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728

MyCaffe.layers.Layer.convertF
float convertF(T df)
Converts a generic to a float value.
Definition: Layer.cs:1359

MyCaffe.layers.Layer.Reshape
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.

MyCaffe.layers.Layer.convertD
double convertD(T df)
Converts a generic to a double value.
Definition: Layer.cs:1349

MyCaffe.layers.Layer.reshapeNeeded
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
Definition: Layer.cs:622

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.Setup
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.Layer.blobs
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875

MyCaffe.layers.Layer.internal_blobs
BlobCollection< T > internal_blobs
Returns the collection of internal Blobs used by the Layer.
Definition: Layer.cs:883

MyCaffe.layers.Layer.m_colBlobs
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
Definition: Layer.cs:55

MyCaffe.layers.Layer.m_rgbParamPropagateDown
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
Definition: Layer.cs:63

MyCaffe.layers.Layer.m_bNetReshapeRequest
bool m_bNetReshapeRequest
Specifies whether the reshape is requested from a Net.Reshape call or not.
Definition: Layer.cs:104

MyCaffe.layers.LayerParameterEx
The LayerParameterEx class is used when sharing another Net to conserve GPU memory and extends the La...
Definition: Layer.cs:1750

MyCaffe.layers.LayerParameterEx.SharedBlobs
BlobCollection< T > SharedBlobs
Returns the shared parameter Blobs.
Definition: Layer.cs:1782

MyCaffe.layers.LayerParameterEx.SharedLayer
Layer< T > SharedLayer
Returns the layer in the shared Net that matches this one.
Definition: Layer.cs:1774

MyCaffe.layers.LayerParameterEx.SharedLayerBlobs
BlobCollection< T > SharedLayerBlobs
Returns the shared Layer Blobs.
Definition: Layer.cs:1790

MyCaffe.param.AttentionParameter.axis
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
Definition: AttentionParameter.cs:36

MyCaffe.param.AttentionParameter.bias_filler
FillerParameter bias_filler
The filler for the bias.
Definition: AttentionParameter.cs:67

MyCaffe.param.AttentionParameter.weight_filler
FillerParameter weight_filler
The filler for the weights.
Definition: AttentionParameter.cs:56

MyCaffe.param.AttentionParameter.dim
uint dim
Specifies the dim of the attention unit which should match the LSTM output size.
Definition: AttentionParameter.cs:45

MyCaffe.param.LSTMAttentionParameter
Specifies the parameters for the LSTMAttentionLayer that provides an attention based LSTM layer used ...
Definition: LSTMAttentionParameter.cs:29

MyCaffe.param.LSTMAttentionParameter.clipping_threshold
double clipping_threshold
Specifies the gradient clipping threshold, default = 0.0 (i.e. no clipping).
Definition: LSTMAttentionParameter.cs:68

MyCaffe.param.LSTMAttentionParameter.enable_clockwork_forgetgate_bias
bool enable_clockwork_forgetgate_bias
When enabled, the forget gate bias is set to 5.0.
Definition: LSTMAttentionParameter.cs:103

MyCaffe.param.LSTMAttentionParameter.num_output_ip
uint num_output_ip
Specifies the number of IP outputs for the layer. Note, when 0, no inner product is performed.
Definition: LSTMAttentionParameter.cs:58

MyCaffe.param.LSTMAttentionParameter.bias_filler
FillerParameter bias_filler
Specifies the filler parameters for the bias filler.
Definition: LSTMAttentionParameter.cs:90

MyCaffe.param.LSTMAttentionParameter.weight_filler
FillerParameter weight_filler
Specifies the filler parameters for the weight filler.
Definition: LSTMAttentionParameter.cs:79

MyCaffe.param.LSTMAttentionParameter.num_output
uint num_output
Specifies the number of outputs for the layer.
Definition: LSTMAttentionParameter.cs:48

MyCaffe.param.LSTMAttentionParameter.enable_attention
bool enable_attention
(default=false) When enabled, attention is applied to the input state on each cycle through the LSTM....
Definition: LSTMAttentionParameter.cs:114

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.attention_param
AttentionParameter attention_param
Returns the parameter set when initialized with LayerType.ATTENTION
Definition: LayerParameter.cs:2172

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.LayerParameter.lstm_attention_param
LSTMAttentionParameter lstm_attention_param
Returns the parameter set when initialized with LayerType.LSTM_ATTENTION
Definition: LayerParameter.cs:2939

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.BLOB_TYPE
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62

MyCaffe.fillers
The MyCaffe.fillers namespace contains all fillers including the Filler class.
Definition: BilinearFiller.cs:10

MyCaffe.layers.beta
The MyCaffe.layers.beta namespace contains all beta stage layers.
Definition: LayerFactory.cs:9

MyCaffe.layers
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11