2using System.Collections.Generic;
37 Blob<T> m_blobGateAddResidual =
null;
40 List<int> m_rgShape =
new List<int>(4);
55 m_blobDrop =
new Blob<T>(cuda, log);
59 m_blobResidual =
new Blob<T>(cuda, log);
60 m_blobResidual.
Name = p.
name +
".residual";
61 m_blobGate =
new Blob<T>(cuda, log);
63 m_blobGateAddResidual =
new Blob<T>(cuda, log);
64 m_blobGateAddResidual.
Name = p.
name +
".gateres";
72 dispose(ref m_blobGateAddResidual);
86 if (m_blobDrop !=
null)
89 col.
Add(m_blobResidual);
132 Blob<T> blobBtm = colBottom[0];
134 if (colBottom.
Count > 1)
139 if (colBottom[1].channels % nDiff != 0)
140 m_log.
FAIL(
"The number bottom(1).channels must be divisible by the bottom(1).channels - the residual channel offset. For example if bottom(1).channels = 120 and redidual_channel_offset = 90, the difference = 30 which is a factor of both 120 and 90.");
146 if (m_dropout ==
null)
152 addBtmTop(colBottom[0], m_blobDrop);
153 m_dropout.
Setup(m_colBtm, m_colTop);
155 blobBtm = m_blobDrop;
164 addBtmTop(blobBtm, m_blobGate);
165 m_gate.
Setup(m_colBtm, m_colTop);
170 if (m_layerNorm ==
null)
175 addBtmTop(m_blobGate, colTop[0]);
176 m_layerNorm.
Setup(m_colBtm, m_colTop);
189 Blob<T> blobBtm = colBottom[0];
191 if (colBottom.
Count > 1)
196 m_log.
CHECK_EQ(colBottom[1].channels % nDiff, 0,
"The bottom(1).channels must be divisible by the bottom(1).channels - residual_channel_offset!");
197 m_nBlocks = colBottom[1].channels / nDiff;
199 int nQTimeSteps = nDiff;
201 m_rgShape.Add(colBottom[0].num);
202 m_rgShape.Add(nQTimeSteps);
203 m_rgShape.Add(colBottom[0].count(2));
204 m_blobResidual.
Reshape(m_rgShape);
212 if (m_dropout !=
null)
214 addBtmTop(colBottom[0], m_blobDrop);
215 m_dropout.
Reshape(m_colBtm, m_colTop);
216 blobBtm = m_blobDrop;
219 addBtmTop(blobBtm, m_blobGate);
220 m_gate.
Reshape(m_colBtm, m_colTop);
223 addBtmTop(m_blobGate, colTop[0]);
224 m_layerNorm.
Reshape(m_colBtm, m_colTop);
229 if (nIdx >= colBtm.
Count)
238 int nOuterNum = bBtm.
num;
239 int nChannels = m_nBlocks;
251 if (nIdx >= colBtm.
Count)
260 int nOuterNum = bBtm.
num;
261 int nChannels = m_nBlocks;
273 if (nIdx >= colBtm.
Count)
282 int nOuterNum = bBtm.
num;
283 int nChannels = m_nBlocks;
306 Blob<T> blobBtm = colBottom[0];
307 copy_to_fwd(colBottom, 1, m_blobResidual);
309 if (m_dropout !=
null)
311 addBtmTop(colBottom[0], m_blobDrop);
312 m_dropout.
Forward(m_colBtm, m_colTop);
313 blobBtm = m_blobDrop;
316 addBtmTop(blobBtm, m_blobGate);
317 m_gate.
Forward(m_colBtm, m_colTop);
319 if (colBottom.
Count > 1)
322 m_blobGateAddResidual.
CopyFrom(m_blobGate);
324 addBtmTop(m_blobGateAddResidual, colTop[0]);
325 m_layerNorm.
Forward(m_colBtm, m_colTop);
346 addBtmTop(m_blobGateAddResidual, colTop[0]);
347 m_layerNorm.
Backward(m_colTop, rgbPropagateDown, m_colBtm);
350 copy_to_bwd(colBottom, 1, m_blobGateAddResidual);
351 m_blobGate.
CopyFrom(m_blobGateAddResidual,
true);
352 if (colBottom.
Count > 1)
353 m_blobResidual.
CopyFrom(m_blobGateAddResidual,
true);
355 addBtmTop(colBottom[0], m_blobGate);
356 m_gate.
Backward(m_colTop, rgbPropagateDown, m_colBtm);
358 if (m_dropout !=
null)
360 addBtmTop(m_blobDrop, colBottom[0]);
361 m_dropout.
Backward(m_colTop, rgbPropagateDown, m_colBtm);
362 colBottom[0].
CopyFrom(m_blobDrop,
true);
The Log class provides general output in text form.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
BlobCollection< T > m_colInternalBlobs
Specifies internal blobs used by the layer.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
LayerParameter.LayerType m_type
Specifies the Layer type.
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
The GateAddNormLayer implements the Dropout, Gated Linear Unit layer, LayerNorm while adding in the r...
override int MaxBottomBlobs
Returns the max number of required bottom (input) Blobs: x, residual
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
GateAddNormLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The constructor.
override int MinBottomBlobs
Returns the min number of required bottom (input) Blobs: x
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the stacked embedding numeric and categorical value inputs.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: y
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
override void Copy(LayerParameterBase src)
Copy on parameter to another.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
LayerNormParameter layer_norm_param
Returns the parameter set when initialized with LayerType.LAYERNORM
GluParameter glu_param
Returns the parameter set when initialized with LayerType.GLU
GateAddNormParameter gateaddnorm_param
Returns the parameter set when initialized with LayerType.GLU
LayerType
Specifies the layer type.
DropoutParameter dropout_param
Returns the parameter set when initialized with LayerType.DROPOUT
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
DIR
Defines the direction of data flow.
The MyCaffe.layers.tft namespace contains all TFT related layers.
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...