mycaffe/html/_positional_encoding_layer_8cs_source.html

using System;

using System.Collections.Generic;

using System.Diagnostics;

using System.Linq;

using System.Text;

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.param;


namespace MyCaffe.layers.gpt

{

    public class PositionalEncodingLayer<T> : Layer<T>

    {

        Blob<T> m_blobPosEnc;

        List<int> m_rgShape = new List<int>() { 1, 1, 1 };

        double m_dfScale;

        int m_nBlockSize;

        int m_nEmbed;


        public PositionalEncodingLayer(CudaDnn<T> cuda, Log log, LayerParameter p)

            : base(cuda, log, p)

        {

            m_type = LayerParameter.LayerType.POSITIONAL_ENCODER;

            m_nBlockSize = (int)p.positional_encoder_param.block_size;

            m_nEmbed = (int)p.positional_encoder_param.embed;

            m_dfScale = Math.Sqrt(m_nEmbed);


            m_blobPosEnc = new Blob<T>(m_cuda, m_log, false);

            m_blobPosEnc.Name = p.name + " posenc";


            setup_internal_blobs(m_colInternalBlobs);

        }


        protected override void dispose()

        {

            dispose(ref m_blobPosEnc);

            base.dispose();

        }


        protected override void setup_internal_blobs(BlobCollection<T> col)

        {

            if (col.Count > 0)

                return;


            col.Add(m_blobPosEnc);

        }


        public override int ExactNumBottomBlobs

        {

            get { return 1; }

        }


        public override int ExactNumTopBlobs

        {

            get { return 1; }

        }


        public override void LayerSetUp(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

        }


        public override void Reshape(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            colTop[0].ReshapeLike(colBottom[0]);


            int nBatch = colBottom[0].num;

            m_rgShape[0] = nBatch;

            m_rgShape[1] = m_nBlockSize;

            m_rgShape[2] = m_nEmbed;


            shareLayerBlob(m_blobPosEnc, m_rgShape);

            if (!m_blobPosEnc.CompareShape(m_rgShape, true))

            {

                m_blobPosEnc.Reshape(m_rgShape);

                m_blobPosEnc.Reshape(1, m_rgShape[1], m_rgShape[2], 1);

                int nDim = m_nBlockSize * m_nEmbed;


                if (typeof(T) == typeof(float))

                {

                    float[] rgPosEnc1 = new float[nDim];

                    for (int pos = 0; pos < m_nBlockSize; pos++)

                    {

                        for (int i = 0; i < m_nEmbed; i++)

                        {

                            int nIdx = pos * m_nEmbed + i;

                            double df1 = 2 * i / (double)m_nEmbed;

                            double dfPow = Math.Pow(10000, df1);

                            double dfPos = pos / dfPow;


                            if (i % 2 == 0)

                            {

                                double dfSin = Math.Sin(dfPos);

                                rgPosEnc1[nIdx] = (float)dfSin;

                            }

                            else if (i % 2 == 1)

                            {

                                double dfCos = Math.Cos(dfPos);

                                rgPosEnc1[nIdx] = (float)dfCos;

                            }

                        }

                    }


                    m_blobPosEnc.mutable_cpu_data = convert(rgPosEnc1);

                }

                else

                {

                    double[] rgPosEnc1 = new double[nDim];

                    for (int pos = 0; pos < m_nBlockSize; pos++)

                    {

                        for (int i = 0; i < m_nEmbed; i++)

                        {

                            int nIdx = pos * m_nEmbed + i;

                            double df1 = 2 * i / (double)m_nEmbed;

                            double dfPow = Math.Pow(10000, df1);

                            double dfPos = pos / dfPow;


                            if (i % 2 == 0)

                            {

                                double dfSin = Math.Sin(dfPos);

                                rgPosEnc1[nIdx] = dfSin;

                            }

                            else if (i % 2 == 1)

                            {

                                double dfCos = Math.Cos(dfPos);

                                rgPosEnc1[nIdx] = dfCos;

                            }

                        }

                    }


                    m_blobPosEnc.mutable_cpu_data = convert(rgPosEnc1);

                }


                if (nBatch > 1)

                {

                    m_blobPosEnc.Reshape(m_rgShape);


                    for (int i = 1; i < nBatch; i++)

                    {

                        m_cuda.copy(nDim, m_blobPosEnc.gpu_data, m_blobPosEnc.mutable_gpu_data, 0, i * nDim);

                    }

                }

            }

        }


        protected override void forward(BlobCollection<T> colBottom, BlobCollection<T> colTop)

        {

            long hBottomData = colBottom[0].gpu_data;

            long hTopData = colTop[0].mutable_gpu_data;

            int nCount = colBottom[0].count();


            m_cuda.add(nCount, m_blobPosEnc.gpu_data, hBottomData, hTopData, m_dfScale);

        }


        protected override void backward(BlobCollection<T> colTop, List<bool> rgbPropagateDown, BlobCollection<T> colBottom)

        {

            long hTopDiff = colTop[0].gpu_diff;

            long hBottomDiff = colBottom[0].mutable_gpu_diff;

            int nCount = colBottom[0].count();


            m_cuda.scale(nCount, m_dfScale, hTopDiff, hBottomDiff);

        }

    }

}

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.ReshapeLike
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
Definition: BlobCollection.cs:214

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.layers.Layer
An interface for the units of computation which can be composed into a Net.
Definition: Layer.cs:31

MyCaffe.layers.Layer.m_log
Log m_log
Specifies the Log for output.
Definition: Layer.cs:43

MyCaffe.layers.Layer.convert
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
Definition: Layer.cs:535

MyCaffe.layers.Layer.shareLayerBlob
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
Definition: Layer.cs:1170

MyCaffe.layers.Layer.m_colInternalBlobs
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
Definition: Layer.cs:59

MyCaffe.layers.Layer.m_cuda
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
Definition: Layer.cs:39

MyCaffe.layers.Layer.m_type
LayerParameter.LayerType m_type
Specifies the Layer type.
Definition: Layer.cs:35

MyCaffe.layers.gpt.PositionalEncodingLayer
The PositionalEncodingLayer is a neuron layer that adds positional encoding to the input.
Definition: PositionalEncodingLayer.cs:20

MyCaffe.layers.gpt.PositionalEncodingLayer.setup_internal_blobs
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
Definition: PositionalEncodingLayer.cs:58

MyCaffe.layers.gpt.PositionalEncodingLayer.LayerSetUp
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
Definition: PositionalEncodingLayer.cs:87

MyCaffe.layers.gpt.PositionalEncodingLayer.ExactNumBottomBlobs
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: embed
Definition: PositionalEncodingLayer.cs:70

MyCaffe.layers.gpt.PositionalEncodingLayer.dispose
override void dispose()
Release any resources used.
Definition: PositionalEncodingLayer.cs:51

MyCaffe.layers.gpt.PositionalEncodingLayer.PositionalEncodingLayer
PositionalEncodingLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The PositionalEncoderLayer constructor.
Definition: PositionalEncodingLayer.cs:34

MyCaffe.layers.gpt.PositionalEncodingLayer.ExactNumTopBlobs
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: embed
Definition: PositionalEncodingLayer.cs:78

MyCaffe.layers.gpt.PositionalEncodingLayer.forward
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
Definition: PositionalEncodingLayer.cs:192

MyCaffe.layers.gpt.PositionalEncodingLayer.Reshape
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the data as needed by the layer.
Definition: PositionalEncodingLayer.cs:96

MyCaffe.layers.gpt.PositionalEncodingLayer.backward
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the PositionalEncoder value inputs.
Definition: PositionalEncodingLayer.cs:216

MyCaffe.param.LayerParameter
Specifies the base parameter for all layers.
Definition: LayerParameter.cs:24

MyCaffe.param.LayerParameter.name
string name
Specifies the name of this LayerParameter.
Definition: LayerParameter.cs:1865

MyCaffe.param.LayerParameter.positional_encoder_param
PositionalEncoderParameter positional_encoder_param
Returns the parameter set when initialized with LayerType.POSITIONAL_ENCODER
Definition: LayerParameter.cs:2218

MyCaffe.param.LayerParameter.LayerType
LayerType
Specifies the layer type.
Definition: LayerParameter.cs:110

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.layers.gpt
The MyCaffe.layers.gpt namespace contains all GPT related layers.
Definition: LayerFactory.cs:15

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11