2using System.Collections.Generic;
51 double m_dfClippingThreshold;
66 Blob<T> m_blob_C_to_Gate =
null;
67 Blob<T> m_blobEOutputWhd =
null;
83 Blob<T> m_blobContextFull =
null;
151 dispose(ref m_blobContextFull);
153 dispose(ref m_blobBiasMultiplier);
175 col.
Add(m_blobBiasMultiplier);
177 col.
Add(m_blobPreGate);
183 col.
Add(m_blob_H_to_Gate);
184 col.
Add(m_blob_H_to_H);
187 if (m_blobEOutputWhd !=
null)
188 col.
Add(m_blobEOutputWhd);
190 if (m_attention !=
null)
192 col.
Add(m_blob_C_to_Gate);
193 col.
Add(m_blobPrevCt);
233 m_colInternalBottom.
Clear();
234 m_colInternalBottom.
Add(bottom);
236 m_colInternalTop.
Clear();
237 m_colInternalTop.
Add(top);
242 m_colInternalBottom.
Clear();
244 for (
int i = 0; i < rgBottom.Count; i++)
246 m_colInternalBottom.
Add(rgBottom[i]);
249 m_colInternalTop.
Clear();
250 m_colInternalTop.
Add(top);
264 m_log.
CHECK_GE(colBottom.
Count, 4,
"When using attention, four bottoms are required: x, xClip, encoding, encodingClip.");
265 m_log.
CHECK_LE(colBottom.
Count, 5,
"When using attention, four bottoms are required: x, xClip, encoding, encodingClip, vocabcount (optional).");
267 if (colBottom.
Count == 5)
275 m_log.
CHECK_GE(colBottom.
Count, 1,
"When not using attention, at least one bottom is required: x.");
276 m_log.
CHECK_LE(colBottom.
Count, 2,
"When not using attention, no more than two bottoms is required: x, clip.");
280 m_nN = colBottom[0].channels;
282 m_nI = colBottom[0].count(2);
298 List<int> rgShape1 =
new List<int>() { 4 * m_nH, m_nI };
305 blobWeights_I_H.
Reshape(rgShape1);
306 weight_filler.
Fill(blobWeights_I_H);
313 List<int> rgShape2 =
new List<int>() { 4 * m_nH, m_nH };
320 blobWeights_H_H.
Reshape(rgShape2);
321 weight_filler.
Fill(blobWeights_H_H);
327 List<int> rgShape3 =
new List<int>() { 4 * m_nH };
335 bias_filler.
Fill(blobBias);
347 for (
int i=m_nH; i<2*m_nH; i++)
364 blobWeightWhd.
Reshape(rgShapeWhd);
365 weight_filler.
Fill(blobWeightWhd);
377 blobWeightWhdb.
Reshape(rgShapeWhdb);
378 bias_filler.
Fill(blobWeightWhdb);
394 blobWeights_C_H.
Reshape(rgShape1);
395 weight_filler.
Fill(blobWeights_C_H);
404 List<int> rgCellShape =
new List<int>() { m_nN, m_nH };
405 m_blob_C_0.
Reshape(rgCellShape);
406 m_blob_H_0.
Reshape(rgCellShape);
407 m_blob_C_T.
Reshape(rgCellShape);
408 m_blob_H_T.
Reshape(rgCellShape);
409 m_blob_H_to_H.
Reshape(rgCellShape);
411 List<int> rgGateShape =
new List<int>() { m_nN, 4, m_nH };
412 m_blob_H_to_Gate.
Reshape(rgGateShape);
419 m_blob_C_to_Gate.
Reshape(rgGateShape);
422 m_blobContext.
Name =
"context_out";
425 m_blobContextFull.
Name =
"context_full";
428 m_blobPrevCt.
Name =
"prev_ct";
444 Blob<T> blobEncoding = colBottom[2];
445 Blob<T> blobEncodingClip = colBottom[3];
446 addInternal(
new List<
Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);
447 m_attention.
Setup(m_colInternalBottom, m_colInternalTop);
466 m_nN = colBottom[0].channels;
476 m_nT = colBottom[0].num;
477 m_log.
CHECK_EQ(colBottom[0].count() / m_nT / m_nN, m_nI,
"The input size is incompatible with inner product parameters.");
480 List<int> rgGateShape =
new List<int>() { m_nT, m_nN, 4, m_nH };
481 m_blobPreGate.
Reshape(rgGateShape);
482 m_blobGate.
Reshape(rgGateShape);
483 m_blob_H_to_Gate.
Reshape(rgGateShape);
485 List<int> rgTopShape =
new List<int>() { m_nT, m_nN, m_nH };
486 m_blobCell.
Reshape(rgTopShape);
490 List<int> rgMultiplierShape =
new List<int>() { m_nT, m_nN };
491 m_blobBiasMultiplier.
Reshape(rgMultiplierShape);
492 m_blobBiasMultiplier.
SetData(1.0);
494 List<int> rgCellShape =
new List<int>() { m_nN, m_nH };
495 m_blob_C_0.
Reshape(rgCellShape);
496 m_blob_H_0.
Reshape(rgCellShape);
497 m_blob_C_T.
Reshape(rgCellShape);
498 m_blob_H_T.
Reshape(rgCellShape);
499 m_blob_H_to_H.
Reshape(rgCellShape);
501 if (colBottom.
Count > 1)
502 m_blobMaxT.
Reshape(
new List<int>() { 1, colBottom[1].channels });
507 m_blobEOutputWhd.
Reshape(rgIpShape);
514 m_blob_C_to_Gate.
Reshape(rgGateShape);
516 Blob<T> blobEncoding = colBottom[2];
517 Blob<T> blobEncodingClip = colBottom[3];
518 addInternal(
new List<
Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);
519 m_attention.
Reshape(m_colInternalBottom, m_colInternalTop);
521 List<int> rgShape =
Utility.Clone<
int>(m_blobContext.
shape());
523 m_blobContextFull.
Reshape(rgShape);
530 private int calculate_maxT(
Blob<T> blob, out
int nInitialClip)
534 if (blob.
count() > 1)
538 for (
int t = 0; t < blob.
num; t++)
567 long hTopData = colTop[0].mutable_gpu_data;
568 long hBottomData = colBottom[0].gpu_data;
571 int nInitialClip = 0;
572 double dfOriginalClip = 0;
574 if (colBottom.
Count > 1)
576 hClipData = colBottom[1].gpu_data;
577 m_log.
CHECK_EQ(colBottom[0].count(0, 2), colBottom[1].count(),
"The bottom[1].count() should equal the bottom[0].count(0,2).");
579 m_nMaxT = calculate_maxT(colBottom[1], out nInitialClip);
580 nMaxT = m_nMaxT.Value;
583 long hWeight_i =
m_colBlobs[m_nWeightItoHidx].gpu_data;
584 long hWeight_h =
m_colBlobs[m_nWeightHtoHidx].gpu_data;
585 long hBias =
m_colBlobs[m_nWeightBiasidx].gpu_data;
591 long hCtoGateData = 0;
594 if (hClipData != 0 && nInitialClip != 0)
605 m_cuda.gemm(
false,
true, m_nT * m_nN, 4 * m_nH, m_nI,
m_tOne, hBottomData, hWeight_i,
m_tZero, hPreGateData);
611 if (nInitialClip == 0)
615 dfOriginalClip =
convertD(colBottom[1].GetData(0));
620 for (
int t = 0; t < nMaxT; t++)
622 int nTopOffset = colTop[0].offset(t);
623 int nCellOffset = m_blobCell.
offset(t);
624 int nPreGateOffset = m_blobPreGate.
offset(t);
625 int nGateOffset = m_blobGate.
offset(t);
626 int nClipOffset = (hClipData != 0) ? colBottom[1].offset(t) : 0;
643 nHT1Offset = -colTop[0].offset(1);
645 nCT1Offset = -m_blobCell.
offset(1);
650 Blob<T> blobEncoding = colBottom[2];
651 Blob<T> blobEncodingClip = colBottom[3];
653 addInternal(
new List<
Blob<T>>() { blobEncoding, m_blobPrevCt, blobEncodingClip }, m_blobContext);
654 m_attention.
Forward(m_colInternalBottom, m_colInternalTop);
658 int nCount = m_blobContext.
count();
697 int nM = m_nT * m_nN;
702 colTop[0].
CopyFrom(m_blobEOutputWhd);
708 colBottom[1].
SetData(dfOriginalClip, 0);
724 long hTopData = colTop[0].gpu_data;
725 long hBottomData = colBottom[0].gpu_data;
729 List<bool> rgbPropagate =
new List<bool>() {
true,
true };
731 if (colBottom.
Count > 1)
733 hClipData = colBottom[1].gpu_data;
734 m_cuda.sign(colBottom[1].count(), hClipData, hClipData);
735 m_log.
CHECK_EQ(colBottom[0].count(0, 2), colBottom[1].count(),
"The bottom[1].count() should equal the bottom[0].count(0,2).");
736 nMaxT = m_nMaxT.Value;
739 long hWeight_i =
m_colBlobs[m_nWeightItoHidx].gpu_data;
740 long hWeight_h =
m_colBlobs[m_nWeightHtoHidx].gpu_data;
741 long hGateData = m_blobGate.
gpu_data;
742 long hCellData = m_blobCell.
gpu_data;
744 long hTopDiff = colTop[0].mutable_gpu_diff;
757 long hContextData = 0;
758 long hContextDiff = 0;
762 int nM = m_nT * m_nN;
767 m_cuda.add(colTop[0].count(), colTop[0].gpu_diff,
m_colBlobs[m_nWeightWhdbidx].gpu_diff,
m_colBlobs[m_nWeightWhdbidx].mutable_gpu_diff);
778 hWeight_c =
m_colBlobs[m_nWeightCtoHidx].gpu_data;
779 hContextData = m_blobContext.
gpu_data;
781 m_cuda.sign(colBottom[3].count(), colBottom[3].gpu_data, colBottom[3].mutable_gpu_data);
787 for (
int t = nMaxT - 1; t >= 0; t--)
789 int nTopOffset = colTop[0].offset(t);
790 int nCellOffset = m_blobCell.
offset(t);
791 int nGateOffset = m_blobGate.
offset(t);
792 int nPreGateOffset = m_blobPreGate.
offset(t);
793 int nClipOffset = (hClipData == 0) ? 0 : colBottom[1].offset(t);
812 nCT1Offset = m_blobCell.
offset(t - 1);
813 hCT1Data = hCellData;
814 nDHT1Offset = colTop[0].offset(t - 1);
815 hDHT1Diff = hTopDiff;
816 nDCT1Offset = m_blobCell.
offset(t - 1);
817 hDCT1Diff = hCellDiff;
824 m_dfClippingThreshold,
850 Blob<T> blobEncoding = colBottom[2];
851 Blob<T> blobEncodingClip = colBottom[3];
852 addInternal(
new List<
Blob<T>>() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext);
853 m_attention.
Backward(m_colInternalTop, rgbPropagate, m_colInternalBottom);
855 int nCount = m_blobContext.
count();
863 m_cuda.gemm(
true,
false, 4 * m_nH, m_nI, m_nT * m_nN,
m_tOne, hPreGateDiff, hBottomData,
m_tOne,
m_colBlobs[m_nWeightItoHidx].mutable_gpu_diff);
869 m_cuda.gemm(
true,
false, 4 * m_nH, m_nH, (m_nT - 1) * m_nN,
m_tOne, hPreGateDiff, hTopData,
m_tOne,
m_colBlobs[m_nWeightHtoHidx].mutable_gpu_diff, m_blobPreGate.
offset(1));
887 if (rgbPropagateDown[0])
890 m_cuda.gemm(
false,
false, m_nT * m_nN, m_nI, 4 * m_nH,
m_tOne, hPreGateDiff, hWeight_i,
m_tZero, colBottom[0].mutable_gpu_diff);
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
The Utility class provides general utility funtions.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
static T One
Returns One (1) in type T.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
BLOB_TYPE type
Returns the BLOB_TYPE of the Blob.
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
static T Zero
Returns Zero (0) in type T.
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
int offset(int n, int c=0, int h=0, int w=0)
Returns the flat offset given the number, channel, height and width.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Abstract Filler class used to fill blobs with values.
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
[DEPRECIATED] The AttentionLayer provides focus for LSTM based encoder/decoder models.
The LSTMAttentionLayer adds attention to the long-short term memory layer and is used in encoder/deco...
override void dispose()
Releases all GPU and host resources used by the Layer.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: output (ht).
override int MinBottomBlobs
Returns the minimum number of required bottom (input) Blobs: input
override int MaxBottomBlobs
Returns the maximum number of required bottom (input) Blobs: input, inputClip, encoding,...
LSTMAttentionLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The AttentionDecodeLayer constructor.
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Computes the error gradient w.r.t. the inputs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
T m_tZero
Specifies a generic type equal to 0.0.
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
T m_tOne
Specifies a generic type equal to 1.0.
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
float convertF(T df)
Converts a generic to a float value.
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
double convertD(T df)
Converts a generic to a double value.
virtual bool reshapeNeeded(BlobCollection< T > colBottom, BlobCollection< T > colTop, bool bReset=true)
Tests the shapes of both the bottom and top blobs and if they are the same as the previous sizing,...
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
LayerParameter.LayerType m_type
Specifies the Layer type.
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
BlobCollection< T > internal_blobs
Returns the collection of internal Blobs used by the Layer.
BlobCollection< T > m_colBlobs
Specifies the learnable parameter Blobs of the Layer.
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
bool m_bNetReshapeRequest
Specifies whether the reshape is requested from a Net.Reshape call or not.
The LayerParameterEx class is used when sharing another Net to conserve GPU memory and extends the La...
BlobCollection< T > SharedBlobs
Returns the shared parameter Blobs.
Layer< T > SharedLayer
Returns the layer in the shared Net that matches this one.
BlobCollection< T > SharedLayerBlobs
Returns the shared Layer Blobs.
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
FillerParameter bias_filler
The filler for the bias.
FillerParameter weight_filler
The filler for the weights.
uint dim
Specifies the dim of the attention unit which should match the LSTM output size.
Specifies the parameters for the LSTMAttentionLayer that provides an attention based LSTM layer used ...
double clipping_threshold
Specifies the gradient clipping threshold, default = 0.0 (i.e. no clipping).
bool enable_clockwork_forgetgate_bias
When enabled, the forget gate bias is set to 5.0.
uint num_output_ip
Specifies the number of IP outputs for the layer. Note, when 0, no inner product is performed.
FillerParameter bias_filler
Specifies the filler parameters for the bias filler.
FillerParameter weight_filler
Specifies the filler parameters for the weight filler.
uint num_output
Specifies the number of outputs for the layer.
bool enable_attention
(default=false) When enabled, attention is applied to the input state on each cycle through the LSTM....
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
AttentionParameter attention_param
Returns the parameter set when initialized with LayerType.ATTENTION
LayerType
Specifies the layer type.
LSTMAttentionParameter lstm_attention_param
Returns the parameter set when initialized with LayerType.LSTM_ATTENTION
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.beta namespace contains all beta stage layers.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...