2using System.Collections.Generic;
 
    5using System.Threading.Tasks;
 
   36        Net<T> m_unrolledNet = 
null;
 
   58        int m_nLastLayerIndex;
 
   64        bool m_bExposeHiddenInput;
 
   70        bool m_bExposeHiddenOutput;
 
   99        long m_hDropoutStates;
 
  104        ulong m_nWorkspaceSizeInBytes;
 
  105        bool m_bWorkspaceOwned = 
true;
 
  107        ulong m_nReservedSizeInBytes;
 
  108        bool m_bReservedOwned = 
true;
 
  110        bool m_bUseTensors = 
false;
 
  111        List<int> m_rgShape = 
new List<int>(4);
 
  112        bool m_bWarningShown = 
false;
 
  113        bool m_bCudnnRnn8Supported = 
false;
 
  114        bool m_bUseCudnnRnn8 = 
false;
 
  127            m_evtCancel = evtCancel;
 
  135        private void free_tensor(ref 
long h)
 
  149            if (m_unrolledNet != 
null)
 
  152                m_unrolledNet = 
null;
 
  165            free_tensor(ref m_hHxDesc);
 
  166            free_tensor(ref m_hCxDesc);
 
  167            free_tensor(ref m_hHyDesc);
 
  168            free_tensor(ref m_hCyDesc);
 
  170            if (m_hWeightDesc != 0)
 
  172                m_cuda.FreeFilterDesc(m_hWeightDesc);
 
  178                m_cuda.FreeRnnDesc(m_hRnnDesc);
 
  182            if (m_hDropoutDesc != 0)
 
  184                m_cuda.FreeDropoutDesc(m_hDropoutDesc);
 
  188            if (m_hDropoutStates != 0)
 
  190                m_cuda.FreeMemory(m_hDropoutStates);
 
  191                m_hDropoutStates = 0;
 
  196                m_cuda.FreeRnnDataDesc(m_hXDesc);
 
  202                m_cuda.FreeRnnDataDesc(m_hYDesc);
 
  206            if (m_hWorkspace != 0)
 
  208                if (m_bWorkspaceOwned)
 
  209                    m_cuda.FreeMemory(m_hWorkspace);
 
  213            if (m_hReserved != 0)
 
  215                if (m_bReservedOwned)
 
  216                    m_cuda.FreeMemory(m_hReserved);
 
  222                m_cuda.FreeCuDNN(m_hCuDnn);
 
  226            if (m_transposeData != 
null)
 
  228                m_transposeData.Dispose();
 
  229                m_transposeData = 
null;
 
  232            if (m_transposeClip != 
null)
 
  235                m_transposeClip = 
null;
 
  247            if (m_unrolledNet == 
null)
 
  262            base.ResetOnDebug(fn);
 
  264            if (m_unrolledNet == 
null)
 
  288            Blob<T> blobBtm0 = colBottom[0];
 
  289            Blob<T> blobBtm1 = colBottom[1];
 
  291            m_bWarningShown = 
false;
 
  292            m_bCudnnRnn8Supported = 
m_cuda.IsRnn8Supported();
 
  294                m_bUseCudnnRnn8 = 
true;
 
  315                addBtmTop(colBottom[0], m_blobBtmData);
 
  316                m_transposeData.Setup(m_colBtm, m_colTop);
 
  317                blobBtm0 = m_blobBtmData;
 
  327                addBtmTop(colBottom[1], m_blobBtmClip);
 
  328                m_transposeClip.
Setup(m_colBtm, m_colTop);
 
  331                m_rgShape.Add(m_blobBtmClip.
num);
 
  332                m_rgShape.Add(m_blobBtmClip.
channels);
 
  333                m_blobBtmClip.
Reshape(m_rgShape);
 
  335                blobBtm1 = m_blobBtmClip;
 
  338            m_log.
CHECK_GE(blobBtm0.
num_axes, 2, 
"Bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
 
  343                m_nInputSize = colBottom[0].count(2);
 
  345            m_log.
WriteLine(
"Initializing recurrent layer: assuming input batch contains " + 
m_nT.ToString() + 
" timesteps of " + 
m_nN.ToString() + 
" independent streams.");
 
  347            m_log.
CHECK_EQ(blobBtm1.
num_axes, 2, 
"Bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
 
  359                layerSetUpCuDnn(colBottom, colTop);
 
  361                layerSetUpCaffe(colBottom, colTop);
 
  367                layerSetupCudnnRnn8(colBottom, colTop);
 
  369                layerSetupCudnnRnn(colBottom, colTop);
 
  372        private void setupSharedWorkspaceAndReserved(ulong ulWsInBytes, ulong ulResInBytes)
 
  374            m_nWorkspaceSizeInBytes = ulWsInBytes;
 
  375            m_bWorkspaceOwned = 
true;
 
  376            m_nReservedSizeInBytes = ulResInBytes;
 
  377            m_bReservedOwned = 
true;
 
  380                m_hWorkspace = 
m_cuda.AllocMemory((
long)m_nWorkspaceSizeInBytes);
 
  381            if (ulResInBytes > 0)
 
  382                m_hReserved = 
m_cuda.AllocMemory((
long)ulResInBytes);
 
  390                    m_log.
WriteLine(
"WARNING: RNN8 currently does not support Tensor Cores, disabling Tensor Cores for RNN8.");
 
  395                m_hCuDnn = 
m_cuda.CreateCuDNN();
 
  414                blobs.Add(m_blobWts);
 
  418                m_hRnn8 = 
m_cuda.CreateRnn8();
 
  429                               m_nHiddenSize * nBidirectionalScale, 
 
  436                Blob<T> blobBtm0 = colBottom[0];
 
  438                    blobBtm0 = m_blobBtmData;
 
  446                m_blobHx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  447                m_blobCx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  450                m_blobHy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  451                m_blobCy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  461                ulong ulWorkspaceSizeInBytes;
 
  462                ulong ulReservedSizeInBytes;
 
  463                m_cuda.GetRnn8MemorySizes(m_hCuDnn, m_hRnn8, out szWtCount, out ulWorkspaceSizeInBytes, out ulReservedSizeInBytes);
 
  465                List<int> rgWtShape = 
new List<int>() { (int)szWtCount, 1, 1 };
 
  469                setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
 
  488                        throw new Exception(
"Currently the RNN2 weights only support 'constant' and 'xavier' fillers.");
 
  490                    double dfBiasVal = 0;
 
  491                    double dfBiasVal2 = 0;
 
  504                        throw new Exception(
"Currently the RNN2 bias' only support 'constant' and 'xavier' fillers.");
 
  506                    m_cuda.InitializeRnn8Weights(m_hCuDnn, m_hRnn8, m_blobWts.
mutable_gpu_data, ftWt, dfWtVal, dfWtVal2, ftBias, dfBiasVal, dfBiasVal2);
 
  511            catch (Exception excpt)
 
  528                m_hCuDnn = 
m_cuda.CreateCuDNN();
 
  547                blobs.Add(m_blobWts);
 
  549                m_hXDesc = 
m_cuda.CreateRnnDataDesc();
 
  550                m_hYDesc = 
m_cuda.CreateRnnDataDesc();
 
  552                m_hHxDesc = 
m_cuda.CreateTensorDesc();
 
  553                m_hCxDesc = 
m_cuda.CreateTensorDesc();
 
  554                m_hHyDesc = 
m_cuda.CreateTensorDesc();
 
  555                m_hCyDesc = 
m_cuda.CreateTensorDesc();
 
  558                m_hRnnDesc = 
m_cuda.CreateRnnDesc();
 
  559                m_hWeightDesc = 
m_cuda.CreateFilterDesc();
 
  560                m_hDropoutDesc = 
m_cuda.CreateDropoutDesc();
 
  567                Blob<T> blobBtm0 = colBottom[0];
 
  569                    blobBtm0 = m_blobBtmData;
 
  577                m_blobHx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  578                m_blobCx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  581                m_blobHy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  582                m_blobCy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, nDir);
 
  593                int[] rgDimA = 
new int[3];
 
  594                int[] rgStrideA = 
new int[3];
 
  598                rgDimA[2] = m_nHiddenSize;
 
  600                rgStrideA[0] = rgDimA[2] * rgDimA[1];
 
  601                rgStrideA[1] = rgDimA[2];
 
  604                m_cuda.SetTensorNdDesc(m_hHxDesc, rgDimA, rgStrideA);
 
  605                m_cuda.SetTensorNdDesc(m_hCxDesc, rgDimA, rgStrideA);
 
  606                m_cuda.SetTensorNdDesc(m_hHyDesc, rgDimA, rgStrideA);
 
  607                m_cuda.SetTensorNdDesc(m_hCyDesc, rgDimA, rgStrideA);
 
  611                ulong ulReservedCount;
 
  612                m_cuda.GetDropoutInfo(m_hCuDnn, 0, out ulStateCount, out ulReservedCount);
 
  613                m_hDropoutStates = 
m_cuda.AllocMemory((
long)ulStateCount);
 
  618                m_cuda.SetRnnDesc(m_hCuDnn, m_hRnnDesc, m_nHiddenSize, m_nNumLayers, m_hDropoutDesc, m_rnnMode, m_bUseTensors, dir);
 
  622                int nCount = 
m_cuda.GetRnnParamCount(m_hCuDnn, m_hRnnDesc, m_hXDesc);
 
  623                List<int> rgWtShape = 
new List<int>() { nCount, 1, 1 };
 
  626                int[] rgDimW = 
new int[3];
 
  631                m_cuda.SetFilterNdDesc(m_hWeightDesc, rgDimW);
 
  634                ulong ulReservedSizeInBytes;
 
  635                ulong ulWorkspaceSizeInBytes = 
m_cuda.GetRnnWorkspaceCount(m_hCuDnn, m_hRnnDesc, m_hXDesc, out ulReservedSizeInBytes);
 
  638                setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
 
  643                    int nNumLinearLayers = (m_rnnMode == 
RNN_MODE.LSTM) ? 8 : 2;
 
  652                    for (
int i = 0; i < m_nNumLayers * nBidir; i++)
 
  654                        for (
int j = 0; j < nNumLinearLayers; j++)
 
  656                            m_cuda.GetRnnLinLayerParams(m_hCuDnn, m_hRnnDesc, i, m_hXDesc, m_hWeightDesc, m_blobWts.
gpu_data, j, out nWtCount, out hWt, out nBiasCount, out hBias);
 
  658                            if (nWtCount % 2 != 0)
 
  668                                fillerWt.
Fill(nWtCount, hWt);
 
  671                            if (nBiasCount % 2 != 0)
 
  681                                fillerBias.
Fill(nBiasCount, hBias);
 
  684                            m_cuda.FreeMemoryPointer(hWt);
 
  685                            m_cuda.FreeMemoryPointer(hBias);
 
  692            catch (Exception excpt)
 
  704                m_log.
FAIL(
"The 'auto_repeat_hidden_states_across_layers' setting is not supported in the Caffe implementation, use the cuDNN implementation instead.");
 
  706            Blob<T> blobBtm0 = colBottom[0];
 
  707            Blob<T> blobBtm1 = colBottom[1];
 
  710                blobBtm0 = m_blobBtmData;
 
  711                blobBtm1 = m_blobBtmClip;
 
  715            List<string> rgOutputNames = 
new List<string>();
 
  718            List<string> rgRecurInputNames = 
new List<string>();
 
  721            List<string> rgRecurOutputNames = 
new List<string>();
 
  724            int nNumRecurBlobs = rgRecurInputNames.Count;
 
  725            m_log.
CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, 
"The number of recurrent input names must equal the number of recurrent output names.");
 
  728            int nNumHiddenExposed = (m_bExposeHiddenOutput) ? nNumRecurBlobs : 0;
 
  729            int nBottomCount = (m_bExposeHiddenInput) ? 4 : 2;
 
  734                m_log.
CHECK_GE(colBottom[2].num_axes, 1, 
"When static input is present, the bottom[2].num_axes must be >= 1");
 
  735                m_log.
CHECK_EQ(
m_nN, colBottom[2].shape(1), 
"When static input is present, the bottom[2].shape(1) must = N which is " + 
m_nN.ToString());
 
  746            input_layer.
top.Add(
"x");
 
  748            for (
int i = 0; i < blobBtm0.
num_axes; i++)
 
  750                input_shape1.
dim.Add(blobBtm0.
shape(i));
 
  754            input_layer.
top.Add(
"cont");
 
  756            for (
int i = 0; i < blobBtm1.
num_axes; i++)
 
  758                input_shape2.
dim.Add(blobBtm1.
shape(i));
 
  764                input_layer.
top.Add(
"x_static");
 
  766                for (
int i = 0; i < colBottom[2].num_axes; i++)
 
  768                    input_shape3.
dim.Add(colBottom[2].shape(i));
 
  773            net_param.
layer.Add(input_layer);
 
  781            if (strLayerName.Length > 0)
 
  783                for (
int i = 0; i < net_param.
layer.Count; i++)
 
  786                    layer.
name = strLayerName + 
"_" + layer.
name;
 
  793            List<string> rgPseudoLosses = 
new List<string>();
 
  794            for (
int i = 0; i < rgOutputNames.Count; i++)
 
  796                rgPseudoLosses.Add(rgOutputNames[i] + 
"_pseudoloss");
 
  798                layer.
bottom.Add(rgOutputNames[i]);
 
  799                layer.
top.Add(rgPseudoLosses[i]);
 
  801                net_param.
layer.Add(layer);
 
  806            if (
m_param is LayerParameterEx<T>)
 
  808                RecurrentLayer<T> sharedLayer = ((LayerParameterEx<T>)
m_param).SharedLayer as RecurrentLayer<T>;
 
  809                if (sharedLayer != 
null)
 
  810                    sharedNet = sharedLayer.m_unrolledNet;
 
  818            m_blobContInputBlob = m_unrolledNet.
blob_by_name(
"cont");
 
  821                m_blobXStaticInputBlob = m_unrolledNet.
blob_by_name(
"x_static");
 
  827            for (
int i = 0; i < nNumRecurBlobs; i++)
 
  829                m_colRecurInputBlobs.
Add(m_unrolledNet.
blob_by_name(rgRecurInputNames[i]));
 
  830                m_colRecurOutputBlobs.Add(m_unrolledNet.
blob_by_name(rgRecurOutputNames[i]));
 
  834            m_log.
CHECK_EQ(colTop.
Count() - nNumHiddenExposed, rgOutputNames.Count, 
"OutputBlobNames must provide output blob name for each top.");
 
  836            for (
int i = 0; i < rgOutputNames.Count; i++)
 
  844            m_log.
CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.
input_blobs.Count, 
"The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + 
") + static inputs (" + nStaticInput.ToString() + 
")");
 
  850            for (
int i = 0; i < m_unrolledNet.
parameters.Count; i++)
 
  861            for (
int i = 0; i < m_unrolledNet.
layers.Count; i++)
 
  863                for (
int j = 0; j < m_unrolledNet.
layers[i].blobs.Count; j++)
 
  865                    m_log.
CHECK(m_unrolledNet.
layers[i].param_propagate_down(j), 
"param_propagate_down not set for layer " + i.ToString() + 
", param " + j.ToString());
 
  872            for (
int i = 0; i < m_colRecurOutputBlobs.Count; i++)
 
  874                m_colRecurOutputBlobs[i].SetDiff(0);
 
  879            List<string> rgLayerNames = m_unrolledNet.
layer_names;
 
  880            m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
 
  881            for (
int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
 
  883                m_log.
CHECK(rgLayerNames[i] == rgPseudoLosses[j], 
"The last layer at idx " + i.ToString() + 
" should be the pseudo layer named " + rgPseudoLosses[j]);
 
  893            blob = m_colRecurInputBlobs[0];
 
  898            if (m_colRecurInputBlobs.
Count > 1)
 
  903                blob = m_colRecurInputBlobs[1];
 
  912            blob = m_colRecurOutputBlobs[0];
 
  917            if (m_colRecurOutputBlobs.Count > 1)
 
  922                blob = m_colRecurOutputBlobs[1];
 
  936            Blob<T> blobBtm0 = colBottom[0];
 
  937            Blob<T> blobBtm1 = colBottom[1];
 
  941                addBtmTop(colBottom[0], m_blobBtmData);
 
  942                m_transposeData.Reshape(m_colBtm, m_colTop);
 
  943                blobBtm0 = m_blobBtmData;
 
  945                addBtmTop(colBottom[1], m_blobBtmClip);
 
  946                m_transposeClip.
Reshape(m_colBtm, m_colTop);
 
  949                m_rgShape.Add(m_blobBtmClip.
num);
 
  950                m_rgShape.Add(m_blobBtmClip.
channels);
 
  951                m_blobBtmClip.
Reshape(m_rgShape);
 
  953                blobBtm1 = m_blobBtmClip;
 
  956            m_log.
CHECK_GE(blobBtm0.
num_axes, 2, 
"bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
 
  959            m_log.
CHECK_EQ(blobBtm1.
num_axes, 2, 
"bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
 
  964                reshapeCuDnn(colBottom, colTop);
 
  966                reshapeCaffe(colBottom, colTop);
 
  970                addBtmTop(m_blobTopData, colTop[0]);
 
  971                m_transposeData.Reshape(m_colBtm, m_colTop);
 
  978                reshapeCudnnRnn8(colBottom, colTop);
 
  980                reshapeCudnnRnn(colBottom, colTop);
 
  985            Blob<T> blobBtm0 = colBottom[0];
 
  990                blobBtm0 = m_blobBtmData;
 
  991                blobTop0 = m_blobTopData;
 
  998            m_blobHx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1000            m_blobCx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1004            m_blobHy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1005            m_blobCy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1014                colTop[1].ShareData(m_blobHy);
 
 1015                colTop[1].ShareDiff(m_blobHy);
 
 1018                colTop[2].ShareData(m_blobCy);
 
 1019                colTop[2].ShareDiff(m_blobCy);
 
 1025            Blob<T> blobBtm0 = colBottom[0];
 
 1030                blobBtm0 = m_blobBtmData;
 
 1031                blobTop0 = m_blobTopData;
 
 1039            m_blobHx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1040            m_blobCx.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1043            m_blobHy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1044            m_blobCy.
Reshape(m_nNumLayers, 
m_nN, m_nHiddenSize, 1);
 
 1053                colTop[1].ShareData(m_blobHy);
 
 1054                colTop[1].ShareDiff(m_blobHy);
 
 1057                colTop[2].ShareData(m_blobCy);
 
 1058                colTop[2].ShareDiff(m_blobCy);
 
 1064            Blob<T> blobBtm0 = colBottom[0];
 
 1065            Blob<T> blobBtm1 = colBottom[1];
 
 1070                blobBtm0 = m_blobBtmData;
 
 1071                blobBtm1 = m_blobBtmClip;
 
 1072                blobTop0 = m_blobTopData;
 
 1076            List<int> rgContShape = blobBtm1.
shape();
 
 1077            m_blobContInputBlob.
Reshape(rgContShape);
 
 1082            List<BlobShape> rgRecurInputShapes = 
new List<BlobShape>();
 
 1084            m_log.
CHECK_EQ(rgRecurInputShapes.Count, m_colRecurInputBlobs.
Count, 
"The number of recurrent input shapes must equal the number of recurrent input blobs!");
 
 1086            for (
int i = 0; i < rgRecurInputShapes.Count; i++)
 
 1088                m_colRecurInputBlobs[i].
Reshape(rgRecurInputShapes[i]);
 
 1095            m_blobContInputBlob.
ShareData(blobBtm1);
 
 1097            int nStaticInput = 0;
 
 1102                m_blobXStaticInputBlob.
ShareData(colBottom[2]);
 
 1103                m_blobXStaticInputBlob.
ShareDiff(colBottom[2]);
 
 1106            if (m_bExposeHiddenInput)
 
 1108                int nBottomOffset = 2 + nStaticInput;
 
 1109                for (
int i = nBottomOffset, j = 0; i < colBottom.
Count; i++, j++)
 
 1111                    m_log.
CHECK(
Utility.Compare<
int>(m_colRecurInputBlobs[j].shape(), colBottom[i].shape()), 
"Shape mismatch - recur_input_blobs_[" + j.ToString() + 
"]: '" + m_colRecurInputBlobs[j].shape_string + 
"' vs. bottom[" + i.ToString() + 
"]: '" + colBottom[i].shape_string + 
"'");
 
 1112                    m_colRecurInputBlobs[j].ShareData(colBottom[i]);
 
 1116            for (
int i = 0; i < m_colOutputBlobs.
Count; i++)
 
 1121                    blobTop0.
ShareData(m_colOutputBlobs[i]);
 
 1122                    blobTop0.
ShareDiff(m_colOutputBlobs[i]);
 
 1127                    colTop[i].ShareData(m_colOutputBlobs[i]);
 
 1128                    colTop[i].ShareDiff(m_colOutputBlobs[i]);
 
 1132            if (m_bExposeHiddenOutput)
 
 1134                int nTopOffset = m_colOutputBlobs.
Count;
 
 1135                for (
int i = nTopOffset, j = 0; i < colTop.
Count; i++, j++)
 
 1138                    colTop[i].ShareData(m_colRecurOutputBlobs[j]);
 
 1139                    colTop[i].ShareDiff(m_colRecurOutputBlobs[j]);
 
 1149            for (
int i = 0; i < m_colRecurOutputBlobs.Count; i++)
 
 1151                m_colRecurOutputBlobs[i].SetData(0);
 
 1162                int nMinBottoms = 2;
 
 1166                    List<string> rgInputs = 
new List<string>();
 
 1168                    nMinBottoms += rgInputs.Count;
 
 1203                    List<string> rgOutputs = 
new List<string>();
 
 1205                    nNumTops += rgOutputs.Count;
 
 1220            return (nBottomIdx != 1) ? true : 
false;
 
 1269            if (m_blobCx != 
null)
 
 1272            if (m_blobHx != 
null)
 
 1275            if (m_blobCy != 
null)
 
 1278            if (m_blobHy != 
null)
 
 1333                addBtmTop(colBottom[0], m_blobBtmData);
 
 1334                m_transposeData.Forward(m_colBtm, m_colTop);
 
 1335                addBtmTop(colBottom[1], m_blobBtmClip);
 
 1336                m_transposeClip.
Forward(m_colBtm, m_colTop);
 
 1340                forward_cudnn(colBottom, colTop);
 
 1342                forward_cuda(colBottom, colTop);
 
 1346                addBtmTop(m_blobTopData, colTop[0]);
 
 1347                m_transposeData.Forward(m_colBtm, m_colTop);
 
 1363                for (
int i = 1; i < bTop.
num; i++)
 
 1383                for (
int i = 1; i < bTop.
num; i++)
 
 1392            if (m_bUseCudnnRnn8)
 
 1393                forward_cudnnRnn8(colBottom, colTop);
 
 1395                forward_cudnnRnn(colBottom, colTop);
 
 1400            if (colBottom.
Count > 2)
 
 1403                if (colBottom.
Count > 2)
 
 1404                    copy_or_repeat_fwd(colBottom[2], m_blobHx);
 
 1406                if (colBottom.
Count > 3)
 
 1407                    copy_or_repeat_fwd(colBottom[3], m_blobCx);
 
 1413            m_cuda.Rnn8Forward(m_hCuDnn,
 
 1428            Blob<T> blobBtm1 = colBottom[1];
 
 1430                blobBtm1 = m_blobBtmClip;
 
 1434            if (dfClip > 0 || colBottom.
Count > 2)
 
 1437                if (colBottom.
Count > 2)
 
 1438                    copy_or_repeat_fwd(colBottom[2], m_blobHy);
 
 1440                if (colBottom.
Count > 3)
 
 1441                    copy_or_repeat_fwd(colBottom[3], m_blobCy);
 
 1447            m_cuda.RnnForward(m_hCuDnn,
 
 1464                              m_nWorkspaceSizeInBytes,
 
 1466                              m_nReservedSizeInBytes,
 
 1479            m_log.
CHECK_EQ(m_colRecurInputBlobs.
Count, m_colRecurOutputBlobs.Count, 
"The recurrent input and output blobs must have the same count.");
 
 1481            if (!m_bExposeHiddenInput)
 
 1484                for (
int i = 0; i < m_colRecurInputBlobs.
Count; i++)
 
 1486                    int nCount = m_colRecurInputBlobs[i].count();
 
 1487                    m_log.
CHECK_EQ(nCount, m_colRecurOutputBlobs[i].count(), 
"The input and output blob at " + i.ToString() + 
" must have the same count.");
 
 1488                    long hTimestep_T_Data = m_colRecurOutputBlobs[i].gpu_data;
 
 1489                    long hTimestep_0_Data = m_colRecurInputBlobs[i].mutable_gpu_data;
 
 1490                    m_cuda.copy(nCount, hTimestep_T_Data, hTimestep_0_Data);
 
 1509                addBtmTop(m_blobTopData, colTop[0]);
 
 1510                m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
 
 1514                backward_cudnn(colTop, rgbPropagateDown, colBottom);
 
 1516                backward_cuda(colTop, rgbPropagateDown, colBottom);
 
 1520                addBtmTop(colBottom[0], m_blobBtmData);
 
 1521                m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
 
 1527            if (m_bUseCudnnRnn8)
 
 1528                backward_cudnnRnn8(colTop, rgbPropagateDown, colBottom);
 
 1530                backward_cudnnRnn(colTop, rgbPropagateDown, colBottom);
 
 1539            if (colTop.
Count > 2)
 
 1542                if (colTop.
Count > 1)
 
 1544                    m_log.
CHECK_EQ(colTop[1].count(), m_blobHy.
count(), 
"The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.
shape_string);
 
 1545                    m_blobHy.
CopyFrom(colTop[1], 
true);
 
 1548                if (colTop.
Count > 2)
 
 1550                    m_log.
CHECK_EQ(colTop[2].count(), m_blobCy.
count(), 
"The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.
shape_string);
 
 1551                    m_blobCy.
CopyFrom(colTop[2], 
true);
 
 1556            m_cuda.Rnn8Backward(m_hCuDnn,
 
 1574            if (colBottom.
Count > 2)
 
 1577                if (colBottom.
Count > 2)
 
 1578                    copy_or_repeat_bwd(colBottom[2], m_blobHx);
 
 1580                if (colBottom.
Count > 3)
 
 1581                    copy_or_repeat_bwd(colBottom[3], m_blobCx);
 
 1587            if (rgbPropagateDown[1] && !m_bWarningShown)
 
 1589                m_log.
WriteLine(
"WARNING: Cannot backpropagate to sequence indicators, sequence backprop will be ignored.");
 
 1590                m_bWarningShown = 
true;
 
 1594            if (colTop.
Count > 2)
 
 1597                if (colTop.
Count > 1)
 
 1599                    m_log.
CHECK_EQ(colTop[1].count(), m_blobHy.
count(), 
"The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.
shape_string);
 
 1600                    m_blobHy.
CopyFrom(colTop[1], 
true);
 
 1602                if (colTop.
Count > 2)
 
 1604                    m_log.
CHECK_EQ(colTop[2].count(), m_blobCy.
count(), 
"The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.
shape_string);
 
 1605                    m_blobCy.
CopyFrom(colTop[2], 
true);
 
 1609            m_cuda.RnnBackwardData(m_hCuDnn,
 
 1631                              m_nWorkspaceSizeInBytes,
 
 1633                              m_nReservedSizeInBytes);
 
 1637            m_cuda.RnnBackwardWeights(m_hCuDnn,
 
 1646                              m_nWorkspaceSizeInBytes,
 
 1650                              m_nReservedSizeInBytes);
 
 1653            if (colBottom.
Count > 2)
 
 1656                if (colBottom.
Count > 2)
 
 1657                    copy_or_repeat_bwd(colBottom[2], m_blobHx);
 
 1659                if (colBottom.
Count > 3)
 
 1660                    copy_or_repeat_bwd(colBottom[3], m_blobCx);
 
 1666            m_log.
CHECK(!rgbPropagateDown[1], 
"Cannot backpropagate to sequence indicators.");
 
 1675            m_unrolledNet.
Backward(m_nLastLayerIndex);
 
 1679            if (colBottom.
Count > nCount)
 
 1682                if (colBottom.
Count > nCount)
 
 1684                    m_log.
CHECK_EQ(colBottom[nCount].count(), m_blobHx.
count(), 
"The bottom(" + nCount.ToString() + 
") should have the same shape as 'hx' which has a shape = " + m_blobHx.
shape_string);
 
 1685                    colBottom[nCount].
CopyFrom(m_blobHx, 
true);
 
 1687                if (colBottom.
Count > nCount+1)
 
 1689                    m_log.
CHECK_EQ(colBottom[nCount + 1].count(), m_blobCx.
count(), 
"The bottom(" + (nCount + 1).ToString() + 
") should have the same shape as 'cx' which has a shape = " + m_blobCx.
shape_string);
 
 1690                    colBottom[nCount + 1].
CopyFrom(m_blobCx, 
true);
 
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
 
The Log class provides general output in text form.
 
void CHECK(bool b, string str)
Test a flag for true.
 
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
 
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
 
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
 
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
 
The Utility class provides general utility funtions.
 
The BlobCollection contains a list of Blobs.
 
BlobCollection()
The BlobCollection constructor.
 
void Add(Blob< T > b)
Add a new Blob to the collection.
 
int Count
Returns the number of items in the collection.
 
void Clear(bool bDispose=false)
Remove all items from the collection.
 
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
 
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
 
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
 
The Blob is the main holder of data that moves through the Layers of the Net.
 
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
 
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
 
void ShareData(Blob< T > b)
Set the data to point to the data of the other blob – useful in Layers which simply perform a copy in...
 
int num_axes
Returns the number of axes in the Blob.
 
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
 
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
 
string shape_string
Returns a string describing the Blob's shape.
 
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
 
bool reshape_when_sharing
When true, this Blob is reshaped to the source when sharing the source data (default = false).
 
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
 
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
 
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
 
int count()
Returns the total number of items in the Blob.
 
void ShareDiff(Blob< T > b)
Set the diff to point to the diff of the other blob – useful in Layers which simply perform a copy in...
 
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
 
string Name
Get/set the name of the Blob.
 
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
 
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
 
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
 
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
 
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
 
The GetWorkBlobArgs are passed to the Layer::OnGetWorkBlob event which is supported for debugging onl...
 
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
 
List< Layer< T > > layers
Returns the layers.
 
void Reshape()
Reshape all layers from the bottom to the top.
 
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
 
BlobCollection< T > parameters
Returns the parameters.
 
List< string > layer_names
Returns the layer names.
 
BlobCollection< T > input_blobs
Returns the collection of input Blobs.
 
void set_debug_info(bool bVal)
Sets the debug information flag.
 
void Backward(int nStart=int.MaxValue, int nEnd=0)
The network backward should take no input and output, since it solely computes the gradient w....
 
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
 
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
 
List< int > param_owners
Returns the list of parameter owner indexes.
 
List< string > param_display_names
Returns the list of parameter display names.
 
Abstract Filler class used to fill blobs with values.
 
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
 
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
 
An interface for the units of computation which can be composed into a Net.
 
Log m_log
Specifies the Log for output.
 
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
 
virtual void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event.
 
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
 
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
 
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
 
void Dispose()
Releases all GPU and host resources used by the Layer.
 
Phase m_phase
Specifies the Phase under which the Layer is run.
 
virtual void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it.
 
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
 
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
 
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
 
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
 
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
 
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
 
The RecurrentLayer is an abstract class for implementing recurrent behavior inside of an unrolled new...
 
RecurrentLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel)
The RecurrentLayer constructor.
 
override int ExactNumTopBlobs
Returns the min number of required top (output) Blobs.
 
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Peforms the forward calculation.
 
override int MaxBottomBlobs
Returns the maximum number of required bottom (input) Blobs: min+1
 
abstract void RecurrentInputShapes(List< BlobShape > rgShapes)
Fills shapes with the shapes of the recurrent input Blob's. Subclassses should define this – see RNNL...
 
abstract void RecurrentOutputBlobNames(List< string > rgNames)
Fills names with the names of the Tth timestep recurrent output Blob's. Subclassses should define thi...
 
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Backward computation.
 
abstract void RecurrentInputBlobNames(List< string > rgNames)
Fills names with the names of the 0th timestep recurrent input Blob's. Subclasses should define this ...
 
override void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event on the unrolled net.
 
abstract void FillUnrolledNet(NetParameter net_param)
Fills net_param with the recurrent network architecture. Subclasses should define this – see RNNLayer...
 
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
 
abstract void OutputBlobNames(List< string > rgNames)
Fills names with the names of the output blobs, concatenated across all timesteps....
 
override int MinBottomBlobs
Returns the minimum number of required bottom (input) Blobs.
 
virtual void Reset()
Reset the hidden state of the net by zeroing out all recurrent outputs.
 
int m_nN
The number of independent streams to process simultaneously.
 
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
 
int m_nT
The number of timesteps in the layer's input, and the number of timesteps over which to backpropagate...
 
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
 
override void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it on the unrolled net.
 
bool m_bStaticInput
Whether the layer has a 'static' input copies across all timesteps.
 
override bool AllowForceBackward(int nBottomIdx)
Returns true for all but the bottom index = 1, for you can't propagate to the sequence continuation i...
 
override void dispose()
Releases all GPU and host resources used by the Layer.
 
Specifies the shape of a Blob.
 
BlobShape()
The BlobShape constructor.
 
List< int > dim
The blob shape dimensions.
 
double value
Specifies the value used by 'constant' filler.
 
double mean
Specifies the mean value to use with the 'gaussian' filler.
 
string type
Specifies the type of filler to use.
 
double std
Specifies the standard deviation value to use with the 'gaussian' filler.
 
Specifies the base parameter for all layers.
 
string name
Specifies the name of this LayerParameter.
 
List< double > loss_weight
Specifies the loss weight.
 
LayerType type
Specifies the type of this LayerParameter.
 
InputParameter input_param
Returns the parameter set when initialized with LayerType.INPUT
 
List< string > top
Specifies the active top connections (in the bottom, out the top)
 
TransposeParameter transpose_param
Returns the parameter set when initialized with LayerType.TRANSPOSE
 
RecurrentParameter recurrent_param
Returns the parameter set when initialized with LayerType.RECURRENT
 
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
 
LayerType
Specifies the layer type.
 
Specifies the parameters use to create a Net
 
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
 
bool use_cudnn_rnn8_if_supported
Specifies to use cuDnn RNN8 if supported (requires cuDnn 8.0 or higher), (default = false).
 
bool debug_info
Whether to enable displaying debug info in the unrolled recurrent net.
 
uint num_layers
The number of LSTM layers to implement.
 
uint num_output
The dimension of the output (and usually hidden state) representation – must be explicitly set to non...
 
FillerParameter weight_filler
The filler for the weights.
 
bool expose_hidden_output
Whether to add as additional outputs (tops) the final timestep hidden state blobs....
 
bool bidirectional
Specifies whether the network is bidirectional (true) or unidirectional (false - default).
 
bool batch_first
The input and outputs are shaped with the batch in the first dimension.
 
long dropout_seed
Specifies the seed used by cuDnn for random number generation.
 
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
 
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
 
FillerParameter bias_filler
The filler for the bias.
 
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the rnn operations which is faster but not ...
 
bool auto_repeat_hidden_states_across_layers
Auto repeat the hidden and cell states so that a separate state is fed to each layer.
 
bool expose_hidden_input
Whether to add as additional inputs (bottoms) the initial hidden state blobss. The number of addition...
 
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
 
Phase
Defines the Phase under which to run a Net.
 
The MyCaffe.common namespace contains common MyCaffe classes.
 
RNN_DATALAYOUT
Specifies the RNN data layout of the data input.
 
DIR
Defines the direction of data flow.
 
RNN_MODE
Specifies the RNN mode to use with the Recurrent Layer when using the cuDNN engine.
 
RNN_BIAS_MODE
Specifies the RNN bias mode to use with the Recurrent Layer when using the cuDNN engine.
 
RNN_DIRECTION
Specifies the RNN directional used.
 
RNN_FILLER_TYPE
Defines the filler types used to fill the RNN8 weights.
 
The MyCaffe.db.image namespace contains all image database related classes.
 
The MyCaffe.fillers namespace contains all fillers including the Filler class.
 
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
 
The MyCaffe.param namespace contains parameters used to create models.
 
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...