2using System.Collections.Generic;
22 List<int> m_rgShape =
new List<int>() { 1, 1, 1 };
40 m_dfScale = Math.Sqrt(m_nEmbed);
43 m_blobPosEnc.Name = p.
name +
" posenc";
63 col.
Add(m_blobPosEnc);
100 int nBatch = colBottom[0].num;
101 m_rgShape[0] = nBatch;
102 m_rgShape[1] = m_nBlockSize;
103 m_rgShape[2] = m_nEmbed;
106 if (!m_blobPosEnc.CompareShape(m_rgShape,
true))
108 m_blobPosEnc.Reshape(m_rgShape);
109 m_blobPosEnc.Reshape(1, m_rgShape[1], m_rgShape[2], 1);
110 int nDim = m_nBlockSize * m_nEmbed;
112 if (typeof(T) == typeof(
float))
114 float[] rgPosEnc1 =
new float[nDim];
115 for (
int pos = 0; pos < m_nBlockSize; pos++)
117 for (
int i = 0; i < m_nEmbed; i++)
119 int nIdx = pos * m_nEmbed + i;
120 double df1 = 2 * i / (double)m_nEmbed;
121 double dfPow = Math.Pow(10000, df1);
122 double dfPos = pos / dfPow;
126 double dfSin = Math.Sin(dfPos);
127 rgPosEnc1[nIdx] = (float)dfSin;
131 double dfCos = Math.Cos(dfPos);
132 rgPosEnc1[nIdx] = (float)dfCos;
137 m_blobPosEnc.mutable_cpu_data =
convert(rgPosEnc1);
141 double[] rgPosEnc1 =
new double[nDim];
142 for (
int pos = 0; pos < m_nBlockSize; pos++)
144 for (
int i = 0; i < m_nEmbed; i++)
146 int nIdx = pos * m_nEmbed + i;
147 double df1 = 2 * i / (double)m_nEmbed;
148 double dfPow = Math.Pow(10000, df1);
149 double dfPos = pos / dfPow;
153 double dfSin = Math.Sin(dfPos);
154 rgPosEnc1[nIdx] = dfSin;
158 double dfCos = Math.Cos(dfPos);
159 rgPosEnc1[nIdx] = dfCos;
164 m_blobPosEnc.mutable_cpu_data =
convert(rgPosEnc1);
169 m_blobPosEnc.Reshape(m_rgShape);
171 for (
int i = 1; i < nBatch; i++)
173 m_cuda.copy(nDim, m_blobPosEnc.gpu_data, m_blobPosEnc.mutable_gpu_data, 0, i * nDim);
194 long hBottomData = colBottom[0].gpu_data;
195 long hTopData = colTop[0].mutable_gpu_data;
196 int nCount = colBottom[0].count();
198 m_cuda.add(nCount, m_blobPosEnc.gpu_data, hBottomData, hTopData, m_dfScale);
218 long hTopDiff = colTop[0].gpu_diff;
219 long hBottomDiff = colBottom[0].mutable_gpu_diff;
220 int nCount = colBottom[0].count();
222 m_cuda.scale(nCount, m_dfScale, hTopDiff, hBottomDiff);
The Log class provides general output in text form.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
The Blob is the main holder of data that moves through the Layers of the Net.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
bool shareLayerBlob(Blob< T > b, List< int > rgMinShape)
Attempts to share a Layer Blob if another parameter Blob with the same name and acceptable size is fo...
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
LayerParameter.LayerType m_type
Specifies the Layer type.
The PositionalEncodingLayer is a neuron layer that adds positional encoding to the input.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override int ExactNumBottomBlobs
Returns the exact number of required bottom (input) Blobs: embed
override void dispose()
Release any resources used.
PositionalEncodingLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The PositionalEncoderLayer constructor.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: embed
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the data as needed by the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the PositionalEncoder value inputs.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
PositionalEncoderParameter positional_encoder_param
Returns the parameter set when initialized with LayerType.POSITIONAL_ENCODER
LayerType
Specifies the layer type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe.layers.gpt namespace contains all GPT related layers.
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...