mycaffe/html/_multi_head_attention_interp_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Diagnostics;

using System.Linq;

using System.Reflection;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.layers.tft

{

    public class MultiHeadAttentionInterpLayer<T> : Layer<T>

    {

        List<int> m_rgShapeQ;

        List<int> m_rgShapeK;

        List<int> m_rgShapeV;

        List<int> m_rgShapeMask;

        int m_nNumHeads;

        int m_nDModel;

        int m_nAllHeadsDim;

        int m_nNumFut = 0;

        int m_nNumHist = 0;

        int m_nBlocks = 0;

        double m_dfScale;

        Layer<T> m_ipQLayer;

        Layer<T> m_ipKLayer;

        Layer<T> m_ipVLayer;

        Layer<T> m_transpose;

        Layer<T> m_softmax;

        Layer<T> m_ipOutLayer;

        Blob<T> m_blobQ;

        Blob<T> m_blobK;

        Blob<T> m_blobV;

        Blob<T> m_blobIpQ;

        Blob<T> m_blobIpK;

        Blob<T> m_blobIpV;

        Blob<T> m_blobMask;

        Blob<T> m_blobIpVfull;

        Blob<T> m_blobIpQt;

        Blob<T> m_blobIpKt;

        Blob<T> m_blobIpKt1;

        Blob<T> m_blobIpVt;

        Blob<T> m_blobAttnScores1;

        Blob<T> m_blobAttnScoresAllHeads;

        Blob<T> m_blobAttnOutputAllHeads;

        Blob<T> m_blobWork;

        BlobCollection<T> m_colTop = new BlobCollection<T>();

        BlobCollection<T> m_colBtm = new BlobCollection<T>();

        List<int> m_rgShape = new List<int>(4);


        public MultiHeadAttentionInterpLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.MULTIHEAD_ATTENTION_INTERP;


            m_blobQ = new Blob<T>(cuda, log);

            m_blobQ.Name = p.name + ".q";

            m_blobK = new Blob<T>(cuda, log);

            m_blobQ.Name = p.name + ".k";

            m_blobV = new Blob<T>(cuda, log);

            m_blobV.Name = p.name + ".v";

            m_blobIpQ = new Blob<T>(cuda, log);

            m_blobIpQ.Name = p.name + ".ipq";

            m_blobIpK = new Blob<T>(cuda, log);

            m_blobIpK.Name = p.name + ".ipk";

            m_blobIpV = new Blob<T>(cuda, log);

            m_blobIpV.Name = p.name + ".ipv";

            m_blobMask = new Blob<T>(cuda, log, false);

            m_blobMask.Name = p.name + ".mask";

            m_blobIpVfull = new Blob<T>(cuda, log);

            m_blobIpVfull.Name = p.name + ".ipvfull";

            m_blobIpQt = new Blob<T>(cuda, log);

            m_blobIpQt.Name = p.name + ".ipqt";

            m_blobIpKt = new Blob<T>(cuda, log);

            m_blobIpKt.Name = p.name + ".ipkt";

            m_blobIpKt1 = new Blob<T>(cuda, log);

            m_blobIpKt1.Name = p.name + ".ipkt1";

            m_blobIpVt = new Blob<T>(cuda, log);

            m_blobIpVt.Name = p.name + ".ipvt";

            m_blobAttnScores1 = new Blob<T>(cuda, log);

            m_blobAttnScores1.Name = p.name + ".attn_scores";

            m_blobAttnScoresAllHeads = new Blob<T>(cuda, log);

            m_blobAttnScoresAllHeads.Name = p.name + ".attn_scr_allhd";

            m_blobAttnOutputAllHeads = new Blob<T>(cuda, log);

            m_blobAttnOutputAllHeads.Name = p.name + ".attn_out_allhd";

            m_blobWork = new Blob<T>(cuda, log);

            m_blobWork.Name = p.name + ".work";

        }


        protected override void dispose()

        {

            dispose(ref m_blobQ);

            dispose(ref m_blobK);

            dispose(ref m_blobV);

            dispose(ref m_blobIpQ);

            dispose(ref m_blobIpK);

            dispose(ref m_blobIpV);

            dispose(ref m_blobMask);

            dispose(ref m_blobIpVfull);

            dispose(ref m_blobIpQt);

            dispose(ref m_blobIpKt);

            dispose(ref m_blobIpKt1);

            dispose(ref m_blobIpVt);

            dispose(ref m_blobAttnScores1);

            dispose(ref m_blobAttnScoresAllHeads);

            dispose(ref m_blobAttnOutputAllHeads);

            dispose(ref m_blobWork);


            dispose(ref m_ipQLayer);

            dispose(ref m_ipKLayer);

            dispose(ref m_ipVLayer);

            dispose(ref m_transpose);

            dispose(ref m_softmax);

            dispose(ref m_ipOutLayer);

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            col.Add(m_blobQ);

            col.Add(m_blobK);

            col.Add(m_blobV);

            col.Add(m_blobIpQ);

            col.Add(m_blobIpK);

            col.Add(m_blobIpV);

            col.Add(m_blobMask);

            col.Add(m_blobIpVfull);

            col.Add(m_blobIpQt);

            col.Add(m_blobIpKt);

            col.Add(m_blobIpKt1);

            col.Add(m_blobIpVt);

            col.Add(m_blobAttnScores1);

            col.Add(m_blobAttnScoresAllHeads);

            col.Add(m_blobAttnOutputAllHeads);

            col.Add(m_blobWork);

        }


        public override int MinBottomBlobs

        {

            get { return 1; }

        }


        public override int MaxBottomBlobs

        {

            get { return 4; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 3; }

        }


        private void addBtmTop(Blob<T> btm, Blob<T> top)

        {

            m_colBtm.Clear();

            m_colBtm.Add(btm);

            m_colTop.Clear();

            m_colTop.Add(top);

        }


        private void reshapeRepeat(Blob<T> b, List<int> rgShape, int nRepeat)

        {

            m_rgShape.Clear();

            m_rgShape.AddRange(rgShape);

            m_rgShape[3] *= nRepeat;

            b.Reshape(m_rgShape);

        }


        private void reshapeFwd(Blob<T> b, int nNumHeads, List<int> rgShape = null)

        {

            m_rgShape.Clear();


            if (rgShape == null)

                rgShape = b.shape();


            m_rgShape.Add(rgShape[0]);

            m_rgShape.Add(rgShape[1]);

            m_rgShape.Add(nNumHeads);

            m_rgShape.Add(rgShape[2] / nNumHeads);

            b.Reshape(m_rgShape);

        }


        private void reshapeBwd(Blob<T> b, int nNumHeads, List<int> rgShape = null)

        {

            m_rgShape.Clear();


            if (rgShape == null)

                rgShape = b.shape();


            m_rgShape.Add(rgShape[0]);

            m_rgShape.Add(rgShape[1]);

            m_rgShape.Add(rgShape[2] * rgShape[3]);

            b.Reshape(m_rgShape);

        }


        private void reshapeSansHead(Blob<T> b, List<int> rgShape)

        {

            m_rgShape.Clear();

            m_rgShape.AddRange(rgShape);

            m_rgShape.RemoveAt(1);

            b.Reshape(m_rgShape);

        }


        private void calculateChannelMeanAcrossChannelsFwd(Blob<T> bBtm, Blob<T> bTop)

        {

            int nN = bBtm.num;

            int nC = bBtm.channels;

            int nSpatialDim = bBtm.count(2);

            int nSpatialDimDst = bTop.count(1);


            m_log.CHECK_EQ(bTop.num, nN, "Both src and dst must have same 'num'.");

            m_log.CHECK_EQ(nSpatialDim, bTop.count(1), "Both src and dst must have the same spatial dim.");


            bTop.SetData(0);

            m_blobWork.ReshapeLike(bTop);


            for (int i = 0; i < nC; i++)

            {

                m_cuda.channel_copy(m_blobWork.count(), nN, 1, nC, nSpatialDim, i, bBtm.gpu_data, m_blobWork.gpu_data, DIR.FWD);

                m_cuda.add(m_blobWork.count(), m_blobWork.gpu_data, bTop.gpu_data, bTop.mutable_gpu_data);

            }


            bTop.scale_data(1.0 / nC);

        }


        private void calculateChannelMeanAcrossChannelsBwd(Blob<T> bBtm, Blob<T> bTop)

        {

            int nN = bBtm.num;

            int nC = bBtm.channels;

            int nSpatialDim = bBtm.count(2);


            m_log.CHECK_EQ(bTop.num, nN, "Both src and dst must have same 'num'.");

            m_log.CHECK_EQ(nSpatialDim, bTop.count(1), "Both src and dst must have the same spatial dim.");


            bBtm.SetDiff(0);


            for (int i = 0; i < nC; i++)

            {

                m_cuda.channel_copy(bTop.count(), nN, 1, nC, nSpatialDim, i, bBtm.gpu_diff, bTop.gpu_diff, DIR.BWD);

            }


            bBtm.scale_diff(1.0 / nC);

        }


        private void generate_mask(Blob<T> mask)

        {

            m_rgShape.Clear();

            m_rgShape.Add(m_nNumFut);

            m_rgShape.Add(m_nNumFut + m_nNumHist);

            mask.Reshape(m_rgShape);


            int nRow = m_nNumFut + m_nNumHist;

            int nOutSeqLen = m_nNumFut; //- m_nTargetWindowStartIdx;  not used

            float[] rgData = new float[mask.count()];


            for (int i = 0; i < m_nNumFut; i++)

            {

                for (int j = 0; j < m_nNumHist + nOutSeqLen; j++)

                {

                    int nIdx = i * nRow + j;


                    if (j > m_nNumHist && j-m_nNumHist > i)

                        rgData[nIdx] = 1;

                }

            }


            mask.mutable_cpu_data = convert(rgData);

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (m_param.multihead_attention_interp_param.enable_self_attention)

                m_log.CHECK_EQ(colBottom.Count, 1, "When using self-attention, there should only be one bottom.");

            else

                m_log.CHECK_EQ(colBottom.Count, 3, "When not using self-attention, there should be three bottom values: q, k, v");


            m_nNumHeads = (int)m_param.multihead_attention_interp_param.num_heads;

            m_nDModel = (int)m_param.multihead_attention_interp_param.embed_dim;

            m_nAllHeadsDim = m_nNumHeads * m_nDModel;

            m_dfScale = 1.0 / Math.Sqrt(m_nDModel);


            m_log.CHECK(colBottom.Count == 1 || colBottom.Count == 4, "The bottom count must be 1 (input ->q,k,v, mask generated) or 4 for q,k,q,mask");


            m_nNumFut = (int)m_param.multihead_attention_interp_param.num_future_steps;

            m_log.CHECK_GT(m_nNumFut, 0, "The number of future steps must be greater than zero.");

            m_nNumHist = (int)m_param.multihead_attention_interp_param.num_historical_steps;

            m_log.CHECK_GT(m_nNumHist, 0, "The number of historical steps must be greater than zero.");

            m_log.CHECK_EQ(m_nNumFut + m_nNumHist, colBottom[0].channels, "The number of future + historical steps must equal the bottom(0).channels.");

            m_log.CHECK_EQ(m_nNumHist % m_nNumFut, 0, "The historical steps must be a multiple of the future steps!  For example, historical steps = 90 and future steps = 30.");

            m_nBlocks = (m_nNumHist + m_nNumFut) / m_nNumFut;


            if (colBottom.Count == 1)

                generate_mask(m_blobMask);

            else

                m_blobMask.ShareData(colBottom[3]);


            if (m_ipQLayer == null)

            {

                LayerParameter ipQ = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".ipQ");

                ipQ.inner_product_param.num_output = (uint)m_nAllHeadsDim;

                ipQ.inner_product_param.axis = 2;

                ipQ.inner_product_param.bias_term = true;

                ipQ.inner_product_param.enable_noise = m_param.multihead_attention_interp_param.enable_noise;

                ipQ.inner_product_param.sigma_init = m_param.multihead_attention_interp_param.sigma_init;

                ipQ.inner_product_param.bias_filler = m_param.multihead_attention_interp_param.bias_filler;

                ipQ.inner_product_param.weight_filler = m_param.multihead_attention_interp_param.weight_filler;

                ipQ.inner_product_param.bias_grad_scale = 1000000.0; // helps improve bias gradient accuracy.


                m_ipQLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ipQ, m_param), null);


                if (colBottom.Count == 1)

                {

                    m_rgShape.Clear();

                    m_rgShape.Add(colBottom[0].num);

                    m_rgShape.Add(m_nNumFut);

                    m_rgShape.Add(colBottom[0].count(2));

                    m_blobQ.Reshape(m_rgShape);

                }

                else

                {

                    m_blobQ.ReshapeLike(colBottom[0]);

                }


                addBtmTop(m_blobQ, m_blobIpQ);

                m_ipQLayer.Setup(m_colBtm, m_colTop);

                blobs.Add(m_ipQLayer.blobs);

            }


            if (m_ipKLayer == null)

            {

                LayerParameter ipK = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".ipK");

                ipK.inner_product_param.num_output = (uint)m_nAllHeadsDim;

                ipK.inner_product_param.axis = 2;

                ipK.inner_product_param.bias_term = true;

                ipK.inner_product_param.enable_noise = m_param.multihead_attention_interp_param.enable_noise;

                ipK.inner_product_param.sigma_init = m_param.multihead_attention_interp_param.sigma_init;

                ipK.inner_product_param.bias_filler = m_param.multihead_attention_interp_param.bias_filler;

                ipK.inner_product_param.weight_filler = m_param.multihead_attention_interp_param.weight_filler;

                ipK.inner_product_param.bias_grad_scale = 1000000.0; // helps improve bias gradient accuracy.


                m_ipKLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ipK, m_param), null);

                m_blobK.ReshapeLike((colBottom.Count == 1) ? colBottom[0] : colBottom[1]);


                addBtmTop(m_blobK, m_blobIpK);

                m_ipKLayer.Setup(m_colBtm, m_colTop);

                blobs.Add(m_ipKLayer.blobs);

            }


            if (m_ipVLayer == null)

            {

                LayerParameter ipV = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".ipV");

                ipV.inner_product_param.num_output = (uint)m_param.multihead_attention_interp_param.embed_dim;

                ipV.inner_product_param.axis = 2;

                ipV.inner_product_param.bias_term = true;

                ipV.inner_product_param.enable_noise = m_param.multihead_attention_interp_param.enable_noise;

                ipV.inner_product_param.sigma_init = m_param.multihead_attention_interp_param.sigma_init;

                ipV.inner_product_param.bias_filler = m_param.multihead_attention_interp_param.bias_filler;

                ipV.inner_product_param.weight_filler = m_param.multihead_attention_interp_param.weight_filler;


                m_ipVLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ipV, m_param), null);

                m_blobV.ReshapeLike((colBottom.Count == 1) ? colBottom[0] : colBottom[1]);


                addBtmTop(m_blobV, m_blobIpV);

                m_ipVLayer.Setup(m_colBtm, m_colTop);

                blobs.Add(m_ipVLayer.blobs);

            }


            // Transpose

            if (m_transpose == null)

            {

                // Reshape q, k, v projections to the following sizes

                // queries tensor - q: [num_samples x num_future_steps x state_size]

                // keys tensor    - k: [num_samples x num_total_steps x state_size]

                // values tensor  - v: [num_samples x num_total_steps x state_size]

                reshapeFwd(m_blobIpQ, m_nNumHeads);

                reshapeFwd(m_blobIpK, m_nNumHeads);

                reshapeFwd(m_blobIpV, m_nNumHeads);

                reshapeRepeat(m_blobIpVfull, m_blobIpV.shape(), m_nNumHeads);


                LayerParameter transpose = new LayerParameter(LayerParameter.LayerType.TRANSPOSE, m_param.name + ".trans");

                transpose.transpose_param.dim[1] = 2;

                transpose.transpose_param.dim[2] = 1;

                m_transpose = Layer<T>.Create(m_cuda, m_log, convertLayerParam(transpose, m_param), null);


                addBtmTop(m_blobIpQ, m_blobIpQt);

                m_transpose.Setup(m_colBtm, m_colTop);

                addBtmTop(m_blobIpK, m_blobIpKt);

                m_transpose.Setup(m_colBtm, m_colTop);

                addBtmTop(m_blobIpVfull, m_blobIpVt);

                m_transpose.Setup(m_colBtm, m_colTop);

            }


            // Transpose

            if (m_blobIpKt1.count() == 0)

            {

                List<int> rgShape = Utility.Clone<int>(m_blobIpKt.shape());

                int nTemp = rgShape[2];

                rgShape[2] = rgShape[3];

                rgShape[3] = nTemp;

                m_blobIpKt1.Reshape(rgShape);


                m_blobAttnScores1.MatMul(m_blobIpQt, m_blobIpKt1, true);

            }


            // Softmax

            if (m_softmax == null)

            {

                LayerParameter softmax = new LayerParameter(LayerParameter.LayerType.SOFTMAX, m_param.name + ".softmax");

                softmax.softmax_param.axis = -1;

                softmax.softmax_param.engine = EngineParameter.Engine.CUDNN;

                m_softmax = Layer<T>.Create(m_cuda, m_log, convertLayerParam(softmax, m_param), null);


                addBtmTop(m_blobAttnScores1, m_blobAttnScoresAllHeads);

                m_softmax.Setup(m_colBtm, m_colTop);


                m_blobAttnOutputAllHeads.MatMul(m_blobAttnScoresAllHeads, m_blobIpVt, true);

            }


            if (m_ipOutLayer == null)

            {

                LayerParameter ipOut = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT, m_param.name + ".ipOut");

                ipOut.inner_product_param.num_output = (uint)m_param.multihead_attention_interp_param.embed_dim;

                ipOut.inner_product_param.axis = 2;

                ipOut.inner_product_param.bias_term = true;

                ipOut.inner_product_param.enable_noise = m_param.multihead_attention_interp_param.enable_noise;

                ipOut.inner_product_param.sigma_init = m_param.multihead_attention_interp_param.sigma_init;

                ipOut.inner_product_param.bias_filler = m_param.multihead_attention_interp_param.bias_filler;

                ipOut.inner_product_param.weight_filler = m_param.multihead_attention_interp_param.weight_filler;


                m_ipOutLayer = Layer<T>.Create(m_cuda, m_log, convertLayerParam(ipOut, m_param), null);


                reshapeSansHead(colTop[1], m_blobAttnOutputAllHeads.shape());

                reshapeSansHead(colTop[2], m_blobAttnScoresAllHeads.shape());


                addBtmTop(colTop[1], colTop[0]);

                m_ipOutLayer.Setup(m_colBtm, m_colTop);

                blobs.Add(m_ipOutLayer.blobs);

            }

        }


        protected override bool reshapeNeeded(BlobCollection<T> colBottom, BlobCollection<T> colTop, bool bReset = false)

        {

            if (bReset)

                return true;


            bool bShapeQDirty = m_rgShapeQ == null || !colBottom[0].CompareShape(m_rgShapeQ);

            bool bShapeKDirty = (colBottom.Count == 1) ? bShapeQDirty : m_rgShapeK == null || !colBottom[1].CompareShape(m_rgShapeK);

            bool bShapeVDirty = (colBottom.Count == 1) ? bShapeQDirty : m_rgShapeV == null || !colBottom[2].CompareShape(m_rgShapeV);

            bool bShapeMaskDirty = false;


            m_rgShapeQ = Utility.Clone<int>(colBottom[0].shape());

            m_rgShapeK = m_rgShapeQ;

            m_rgShapeV = m_rgShapeQ;


            if (colBottom.Count > 1)

                m_rgShapeK = Utility.Clone<int>(colBottom[1].shape());

            if (colBottom.Count > 2)

                m_rgShapeV = Utility.Clone<int>(colBottom[2].shape());


            if (colBottom.Count > 3)

            {

                bShapeMaskDirty = m_rgShapeMask == null || !colBottom[3].CompareShape(m_rgShapeMask);

                m_rgShapeMask = Utility.Clone<int>(colBottom[3].shape());

            }


            if (bShapeQDirty || bShapeKDirty || bShapeVDirty || bShapeMaskDirty)

                return true;


            return false;

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            if (!reshapeNeeded(colBottom, colTop))

                return;


            if (colBottom.Count == 1)

            {

                m_rgShape.Clear();

                m_rgShape.Add(colBottom[0].num);

                m_rgShape.Add(m_nNumFut);

                m_rgShape.Add(colBottom[0].count(2));

                m_blobQ.Reshape(m_rgShape);

            }

            else

            {

                m_blobQ.ReshapeLike(colBottom[0]);

            }


            addBtmTop(m_blobQ, m_blobIpQ);

            m_ipQLayer.Reshape(m_colBtm, m_colTop);


            m_blobK.ReshapeLike((colBottom.Count == 1) ? colBottom[0] : colBottom[1]);

            addBtmTop(m_blobK, m_blobIpK);

            m_ipKLayer.Reshape(m_colBtm, m_colTop);


            m_blobV.ReshapeLike((colBottom.Count == 1) ? colBottom[0] : colBottom[2]);

            addBtmTop(m_blobV, m_blobIpV);

            m_ipVLayer.Reshape(m_colBtm, m_colTop);


            // Reshape q, k, v projections to the following sizes

            // queries tensor - q: [num_samples x num_future_steps x state_size]

            // keys tensor    - k: [num_samples x num_total_steps x state_size]

            // values tensor  - v: [num_samples x num_total_steps x state_size]

            reshapeFwd(m_blobIpQ, m_nNumHeads);

            reshapeFwd(m_blobIpK, m_nNumHeads);

            reshapeFwd(m_blobIpV, m_nNumHeads);

            reshapeRepeat(m_blobIpVfull, m_blobIpV.shape(), m_nNumHeads);


            addBtmTop(m_blobIpQ, m_blobIpQt);

            m_transpose.Reshape(m_colBtm, m_colTop);


            addBtmTop(m_blobIpK, m_blobIpKt);

            m_transpose.Reshape(m_colBtm, m_colTop);


            addBtmTop(m_blobIpVfull, m_blobIpVt);

            m_transpose.Reshape(m_colBtm, m_colTop);


            List<int> rgShape = Utility.Clone<int>(m_blobIpKt.shape());

            int nTemp = rgShape[2];

            rgShape[2] = rgShape[3];

            rgShape[3] = nTemp;

            m_blobIpKt1.Reshape(rgShape);


            m_blobAttnScores1.MatMul(m_blobIpQt, m_blobIpKt1, true);


            addBtmTop(m_blobAttnScores1, m_blobAttnScoresAllHeads);

            m_softmax.Reshape(m_colBtm, m_colTop);


            colTop[1].MatMul(m_blobAttnScoresAllHeads, m_blobIpVt, true);

            m_blobWork.ReshapeLike(colTop[1]);


            reshapeSansHead(colTop[1], m_blobAttnOutputAllHeads.shape());

            reshapeSansHead(colTop[2], m_blobAttnScoresAllHeads.shape());

            colTop[2].type = BLOB_TYPE.ATTENTION;


            addBtmTop(colTop[1], colTop[0]);

            m_ipOutLayer.Reshape(m_colBtm, m_colTop);

        }


        private void copy_to_q_fwd(int nCount, Blob<T> bBtm, Blob<T> bTop)

        {

            if (nCount == 1)

            {

                // Copy just the future items to the top, so if future = 30,

                // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top

                int nOuterNum = bBtm.num;

                int nChannels = m_nBlocks;

                int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);

                m_cuda.channel_copy(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks-1, bBtm.gpu_data, bTop.mutable_gpu_data, DIR.FWD);

            }

            else

            {

                bTop.CopyFrom(bBtm);

            }

        }


        private void copy_to_q_bwd(int nCount, Blob<T> bBtm, Blob<T> bTop)

        {

            if (nCount == 1)

            {

                // Copy just the future items to the top, so if future = 30,

                // with input shape is btm(256,120,64) just the last (256,30,64) are copied to top

                int nOuterNum = bBtm.num;

                int nChannels = m_nBlocks;

                int nInnerNum = (bBtm.channels / m_nBlocks) * bBtm.count(2);

                m_cuda.channel_add(bTop.count(), nOuterNum, nChannels, m_nBlocks, nInnerNum, m_nBlocks-1, bBtm.mutable_gpu_diff, bTop.gpu_diff, DIR.BWD);

            }

            else

            {

                bTop.CopyFrom(bBtm, true);

            }

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            // Calculate q, k, v projections

            copy_to_q_fwd(colBottom.Count, colBottom[0], m_blobQ);


            addBtmTop(m_blobQ, m_blobIpQ);

            m_ipQLayer.Forward(m_colBtm, m_colTop);


            m_blobK.CopyFrom((colBottom.Count == 1) ? colBottom[0] : colBottom[1]);


            addBtmTop(m_blobK, m_blobIpK);

            m_ipKLayer.Forward(m_colBtm, m_colTop);


            m_blobV.CopyFrom((colBottom.Count == 1) ? colBottom[0] : colBottom[2]);


            addBtmTop(m_blobV, m_blobIpV);

            m_ipVLayer.Forward(m_colBtm, m_colTop);


            // Reshape q, k, v projections to the following sizes

            // queries tensor - q: [num_samples x num_future_steps x num_heads x state_size]

            // keys tensor    - k: [num_samples x num_total_steps x num_heads x state_size]

            // values tensor  - v: [num_samples x num_total_steps x num_heads x state_size]

            reshapeFwd(m_blobIpQ, m_nNumHeads);

            reshapeFwd(m_blobIpK, m_nNumHeads);

            reshapeFwd(m_blobIpV, m_nNumHeads);

            reshapeRepeat(m_blobIpVfull, m_blobIpV.shape(), m_nNumHeads);


            // repeat blobIpV width to V full.

            int nInnerNum = m_blobIpV.count(2);

            for (int i = 0; i < m_nNumHeads; i++)

            {

                m_cuda.channel_copy(m_blobIpV.count(), m_blobIpV.num, m_blobIpV.channels, m_nNumHeads, nInnerNum, i, m_blobIpVfull.mutable_gpu_data, m_blobIpV.gpu_data, DIR.BWD);

            }


            // Transpose to get the new shapes

            // queries tensor - q: [num_samples x num_heads x num_future_steps x state_size]

            // keys tensor    - k: [num_samples x num_heads x num_total_steps x state_size]

            // values tensor  - v: [num_samples x num_heads x num_total_steps x state_size]


            addBtmTop(m_blobIpQ, m_blobIpQt);

            m_transpose.Forward(m_colBtm, m_colTop);


            addBtmTop(m_blobIpK, m_blobIpKt);

            m_transpose.Forward(m_colBtm, m_colTop);


            addBtmTop(m_blobIpVfull, m_blobIpVt);

            m_transpose.Forward(m_colBtm, m_colTop);


            //-----------------------------------------

            // Calculate the attention

            //-----------------------------------------

            {

                // Apply the scaled dot product

                m_blobIpKt1.CopyFromAndTransposeHeightWidth(m_blobIpKt);

                m_blobAttnScores1.MatMul(m_blobIpQt, m_blobIpKt1, true);

                m_blobAttnScores1.scale_data(m_dfScale);


                // Decoder masking is applied to the multi-head attention layer to ensure that each temporal dimension can

                // only attend to the preceding features.

                if (m_blobMask != null)

                {

                    // Apply mask to attention matrix

                    // att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))

                    float fInf = 1e29f;

                    // all masked items set to -inf.

                    m_cuda.mask_batch(m_blobAttnScores1.count(), 1, m_blobMask.count(), convert(1.0), convert(-1 * fInf), m_blobAttnScores1.gpu_data, m_blobMask.gpu_data, m_blobAttnScores1.mutable_gpu_data);

                }


                // Calculate the softmax to find the most imporant parts of the data (e.g. where to focus the attention)

                addBtmTop(m_blobAttnScores1, m_blobAttnScoresAllHeads);

                m_softmax.Forward(m_colBtm, m_colTop);


                // Multiply the softmax with the values to get the attention outputs.

                m_blobAttnOutputAllHeads.MatMul(m_blobAttnScoresAllHeads, m_blobIpVt, true);


                // attention scores -> colTop[2], shape [num_samples x num_heads x num_future_steps x num_total_steps]

                // attention output -> colTop[1], shape [num_samples x num_heads x num_future_steps x state_size]

            }


            // Average along all heads.

            calculateChannelMeanAcrossChannelsFwd(m_blobAttnOutputAllHeads, colTop[1]);

            calculateChannelMeanAcrossChannelsFwd(m_blobAttnScoresAllHeads, colTop[2]);


            // Weight the attention outputs (in colTop[1]) placing the results in colTop[0]

            addBtmTop(colTop[1], colTop[0]);

            m_ipOutLayer.Forward(m_colBtm, m_colTop);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            // Calculate grad for the attention output weights (colTop[0] grad -> colTop[1] attn output grad)

            addBtmTop(colTop[1], colTop[0]);

            m_ipOutLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            // Average along all heads.

            calculateChannelMeanAcrossChannelsBwd(m_blobAttnOutputAllHeads, colTop[1]);


            //-----------------------------------------

            // Calculate the attention gradients

            //-----------------------------------------

            {

                // Multiply the softmax with the values to get the attention outputs.

                m_blobAttnOutputAllHeads.MatMulGrad(m_blobAttnScoresAllHeads, m_blobIpVt, m_blobWork);


                // Calculate the softmax gradient for the most imporant parts of the data (e.g. where to focus the attention)

                addBtmTop(m_blobAttnScores1, m_blobAttnScoresAllHeads);

                m_softmax.Backward(m_colTop, rgbPropagateDown, m_colBtm);


                // Calculate the Qt and Kt1 gradients.

                m_blobAttnScores1.MatMulGrad(m_blobIpQt, m_blobIpKt1, m_blobWork, m_dfScale);


                // Transform the gradients back to Kt.

                m_blobIpKt.CopyFromAndTransposeHeightWidth(m_blobIpKt1, true);

            }


            // Transpose the gradients back to Q, K and V

            addBtmTop(m_blobIpQ, m_blobIpQt);

            m_transpose.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            addBtmTop(m_blobIpK, m_blobIpKt);

            m_transpose.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            addBtmTop(m_blobIpVfull, m_blobIpVt);

            m_transpose.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            // Copy each IpVFull block to IpV

            m_blobIpV.SetDiff(0);


            int nOuterNum = m_blobIpVfull.count(0, 2);

            m_cuda.channel_copy(m_blobIpV.count(), nOuterNum, 1, m_nNumHeads, m_blobIpVfull.width, 0, m_blobIpVfull.gpu_diff, m_blobIpV.mutable_gpu_diff, DIR.FWD);


            for (int i = 1; i < m_nNumHeads; i++)

            {

                m_cuda.channel_add(m_blobIpV.count(), nOuterNum, 1, m_nNumHeads, m_blobIpVfull.width, i, m_blobIpVfull.gpu_diff, m_blobIpV.mutable_gpu_diff, DIR.FWD);

            }


            // Reshape back to original q, k, v projection shapes

            // queries tensor - q: [num_samples x num_future_steps x state_size]

            // keys tensor    - k: [num_samples x num_total_steps x state_size]

            // values tensor  - v: [num_samples x num_total_steps x state_size]

            reshapeBwd(m_blobIpQ, m_nNumHeads);

            reshapeBwd(m_blobIpK, m_nNumHeads);

            reshapeBwd(m_blobIpV, m_nNumHeads);


            // Calculate q, k, v projection gradients

            addBtmTop(m_blobQ, m_blobIpQ);

            m_ipQLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            addBtmTop(m_blobK, m_blobIpK);

            m_ipKLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            addBtmTop(m_blobV, m_blobIpV);

            m_ipVLayer.Backward(m_colTop, rgbPropagateDown, m_colBtm);


            if (colBottom.Count == 1)

            {

                colBottom[0].SetDiff(0);

                copy_to_q_bwd(colBottom.Count, colBottom[0], m_blobQ);

                m_cuda.add(colBottom[0].count(), colBottom[0].gpu_diff, m_blobK.gpu_diff, colBottom[0].mutable_gpu_diff);

                m_cuda.add(colBottom[0].count(), colBottom[0].gpu_diff, m_blobV.gpu_diff, colBottom[0].mutable_gpu_diff);

            }

            else

            {

                colBottom[0].CopyFrom(m_blobQ, true);

                colBottom[1].CopyFrom(m_blobK, true);

                colBottom[2].CopyFrom(m_blobV, true);

            }

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.CHECK
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227

MyCaffe.basecode.Log.CHECK_EQ
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
Definition: Log.cs:239

MyCaffe.basecode.Log.CHECK_GT
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
Definition: Log.cs:299

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.SetDiff
void SetDiff(double df)
Set all blob diff to the value specified.
Definition: BlobCollection.cs:311

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.Clear
void Clear(bool bDispose=false)
Remove all items from the collection.
Definition: BlobCollection.cs:135

MyCaffe.common.BlobCollection.CopyFrom
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
Definition: BlobCollection.cs:266

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.channels
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
Definition: Blob.cs:800

MyCaffe.common.Blob.SetData
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
Definition: Blob.cs:1922

MyCaffe.common.Blob.ShareData
void ShareData(Blob< T > b)
Set the data to point to the data of the other blob – useful in Layers which simply perform a copy in...
Definition: Blob.cs:1813

MyCaffe.common.Blob.MatMul
void MatMul(Blob< T > blobA, Blob< T > blobB, bool bReshape=false, bool bTransA=false, bool bTransB=false, double dfScale=1.0, bool bADiff=false, bool bBDiff=false, bool bCDiff=false)
MatMul blobA with blobB and place the result in this blob (e.g. this = matmul(A, B))....
Definition: Blob.cs:3922

MyCaffe.common.Blob.MatMulGrad
void MatMulGrad(Blob< T > blobA, Blob< T > blobB, Blob< T > blobWork, double dfScale=1.0)
Calculates and propagates the gradient for blobA and BlobB given the input gradient in this blob's di...
Definition: Blob.cs:3974

MyCaffe.common.Blob.mutable_gpu_diff
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1555

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.CopyFromAndTransposeHeightWidth
void CopyFromAndTransposeHeightWidth(Blob< T > blobSrc, bool bCopyDiff=false, bool bUseCuda=true)
Copy from a source Blob and transpose the height and width of the copy.
Definition: Blob.cs:1002

MyCaffe.common.Blob.mutable_cpu_data
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
Definition: Blob.cs:1461

MyCaffe.common.Blob.Reshape
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
Definition: Blob.cs:442

MyCaffe.common.Blob.CopyFrom
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
Definition: Blob.cs:903

MyCaffe.common.Blob.scale_data
void scale_data(double df)
Scale the data by a scaling factor.
Definition: Blob.cs:1754

MyCaffe.common.Blob.width
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
Definition: Blob.cs:816

MyCaffe.common.Blob.shape
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
Definition: Blob.cs:684

MyCaffe.common.Blob.count
int count()
Returns the total number of items in the Blob.
Definition: Blob.cs:739

MyCaffe.common.Blob.ReshapeLike
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
Definition: Blob.cs:648

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.gpu_diff
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1541

MyCaffe.common.Blob.scale_diff
void scale_diff(double df)
Scale the diff by a scaling factor.
Definition: Blob.cs:1763

MyCaffe.common.Blob.SetDiff
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
Definition: Blob.cs:1981

MyCaffe.common.Blob.num
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
Definition: Blob.cs:792

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.m_param
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Definition: Layer.cs:47

MyCaffe.layers.Layer.convert
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535

MyCaffe.layers.Layer.Backward
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
Definition: Layer.cs:815

MyCaffe.layers.Layer.Forward
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
Definition: Layer.cs:728

MyCaffe.layers.Layer.Reshape
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.Setup
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
Definition: Layer.cs:439

MyCaffe.layers.Layer.Create
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
Definition: Layer.cs:1468

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.Layer.blobs
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
Definition: Layer.cs:875

MyCaffe.layers.Layer.convertLayerParam
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
Definition: Layer.cs:1134

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer
The MultiHeadAttentionInterpLayer implements the Multi-head Attention Interpretive Layer
Definition: MultiHeadAttentionInterpLayer.cs:30

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.dispose
override void dispose()
Releases all GPU and host resources used by the Layer.
Definition: MultiHeadAttentionInterpLayer.cs:114

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.MaxBottomBlobs
override int MaxBottomBlobs
Returns the max number of required bottom (input) Blobs: q, k, v, mask
Definition: MultiHeadAttentionInterpLayer.cs:177

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: MultiHeadAttentionInterpLayer.cs:142

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
Definition: MultiHeadAttentionInterpLayer.cs:639

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
Definition: MultiHeadAttentionInterpLayer.cs:741

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: MultiHeadAttentionInterpLayer.cs:311

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.reshapeNeeded
override bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=false)
Determines if a reshape is needed or not.
Definition: MultiHeadAttentionInterpLayer.cs:489

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
Definition: MultiHeadAttentionInterpLayer.cs:525

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.MinBottomBlobs
override int MinBottomBlobs
Returns the min number of required bottom (input) Blobs: input -> q,k,v, mask is generated
Definition: MultiHeadAttentionInterpLayer.cs:169

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.MultiHeadAttentionInterpLayer
MultiHeadAttentionInterpLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
Definition: MultiHeadAttentionInterpLayer.cs:74

MyCaffe.layers.tft.MultiHeadAttentionInterpLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y, attn_out, attn_scores
Definition: MultiHeadAttentionInterpLayer.cs:185

MyCaffe.param.EngineParameter
Specifies whether to use the NVIDIA cuDnn version or Caffe version of a given forward/backward operat...
Definition: EngineParameter.cs:17

MyCaffe.param.EngineParameter.engine
Engine engine
Specifies the Engine in use.
Definition: EngineParameter.cs:49

MyCaffe.param.EngineParameter.Engine
Engine
Defines the type of engine to use.
Definition: EngineParameter.cs:24

MyCaffe.param.InnerProductParameter.sigma_init
double sigma_init
Specifies the initialization value for the sigma weight and sigma bias used when 'enable_noise' = tru...
Definition: InnerProductParameter.cs:75

MyCaffe.param.InnerProductParameter.weight_filler
FillerParameter weight_filler
The filler for the weights.
Definition: InnerProductParameter.cs:119

MyCaffe.param.InnerProductParameter.axis
int axis
Specifies the first axis to be lumped into a single inner product computation; all preceding axes are...
Definition: InnerProductParameter.cs:142

MyCaffe.param.InnerProductParameter.enable_noise
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
Definition: InnerProductParameter.cs:65

MyCaffe.param.InnerProductParameter.bias_grad_scale
double bias_grad_scale
Specifies a scaling value applied to the bias mutliplier and then unapplied after calculating the bia...
Definition: InnerProductParameter.cs:52

MyCaffe.param.InnerProductParameter.bias_filler
FillerParameter bias_filler
The filler for the bias.
Definition: InnerProductParameter.cs:130

MyCaffe.param.InnerProductParameter.num_output
uint num_output
The number of outputs for the layer.
Definition: InnerProductParameter.cs:85

MyCaffe.param.InnerProductParameter.bias_term
bool bias_term
Whether to have bias terms or not.
Definition: InnerProductParameter.cs:108

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.softmax_param
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
Definition: LayerParameter.cs:2794

MyCaffe.param.LayerParameter.multihead_attention_interp_param
MultiHeadAttentionInterpParameter multihead_attention_interp_param
Returns the parameter set when initialized with LayerType.MULTIHEAD_ATTENTION_INTERP
Definition: LayerParameter.cs:2209

MyCaffe.param.LayerParameter.inner_product_param
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
Definition: LayerParameter.cs:2452

MyCaffe.param.LayerParameter.transpose_param
TransposeParameter transpose_param
Returns the parameter set when initialized with LayerType.TRANSPOSE
Definition: LayerParameter.cs:2876

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.param.SoftmaxParameter.axis
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
Definition: SoftmaxParameter.cs:83

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.common.DIR
DIR
Defines the direction of data flow.
Definition: CudaDnn.cs:22

MyCaffe.common.BLOB_TYPE
BLOB_TYPE
Defines the tpe of data held by a given Blob.
Definition: Interfaces.cs:62

MyCaffe.layers.tft
The MyCaffe.layers.tft namespace contains all TFT related layers.
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11