2using System.Collections.Generic;
5using System.Threading.Tasks;
36 Net<T> m_unrolledNet =
null;
58 int m_nLastLayerIndex;
64 bool m_bExposeHiddenInput;
70 bool m_bExposeHiddenOutput;
99 long m_hDropoutStates;
104 ulong m_nWorkspaceSizeInBytes;
105 bool m_bWorkspaceOwned =
true;
107 ulong m_nReservedSizeInBytes;
108 bool m_bReservedOwned =
true;
110 bool m_bUseTensors =
false;
111 List<int> m_rgShape =
new List<int>(4);
112 bool m_bWarningShown =
false;
113 bool m_bCudnnRnn8Supported =
false;
114 bool m_bUseCudnnRnn8 =
false;
127 m_evtCancel = evtCancel;
135 private void free_tensor(ref
long h)
149 if (m_unrolledNet !=
null)
152 m_unrolledNet =
null;
165 free_tensor(ref m_hHxDesc);
166 free_tensor(ref m_hCxDesc);
167 free_tensor(ref m_hHyDesc);
168 free_tensor(ref m_hCyDesc);
170 if (m_hWeightDesc != 0)
172 m_cuda.FreeFilterDesc(m_hWeightDesc);
178 m_cuda.FreeRnnDesc(m_hRnnDesc);
182 if (m_hDropoutDesc != 0)
184 m_cuda.FreeDropoutDesc(m_hDropoutDesc);
188 if (m_hDropoutStates != 0)
190 m_cuda.FreeMemory(m_hDropoutStates);
191 m_hDropoutStates = 0;
196 m_cuda.FreeRnnDataDesc(m_hXDesc);
202 m_cuda.FreeRnnDataDesc(m_hYDesc);
206 if (m_hWorkspace != 0)
208 if (m_bWorkspaceOwned)
209 m_cuda.FreeMemory(m_hWorkspace);
213 if (m_hReserved != 0)
215 if (m_bReservedOwned)
216 m_cuda.FreeMemory(m_hReserved);
222 m_cuda.FreeCuDNN(m_hCuDnn);
226 if (m_transposeData !=
null)
228 m_transposeData.Dispose();
229 m_transposeData =
null;
232 if (m_transposeClip !=
null)
235 m_transposeClip =
null;
247 if (m_unrolledNet ==
null)
262 base.ResetOnDebug(fn);
264 if (m_unrolledNet ==
null)
288 Blob<T> blobBtm0 = colBottom[0];
289 Blob<T> blobBtm1 = colBottom[1];
291 m_bWarningShown =
false;
292 m_bCudnnRnn8Supported =
m_cuda.IsRnn8Supported();
294 m_bUseCudnnRnn8 =
true;
315 addBtmTop(colBottom[0], m_blobBtmData);
316 m_transposeData.Setup(m_colBtm, m_colTop);
317 blobBtm0 = m_blobBtmData;
327 addBtmTop(colBottom[1], m_blobBtmClip);
328 m_transposeClip.
Setup(m_colBtm, m_colTop);
331 m_rgShape.Add(m_blobBtmClip.
num);
332 m_rgShape.Add(m_blobBtmClip.
channels);
333 m_blobBtmClip.
Reshape(m_rgShape);
335 blobBtm1 = m_blobBtmClip;
338 m_log.
CHECK_GE(blobBtm0.
num_axes, 2,
"Bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
343 m_nInputSize = colBottom[0].count(2);
345 m_log.
WriteLine(
"Initializing recurrent layer: assuming input batch contains " +
m_nT.ToString() +
" timesteps of " +
m_nN.ToString() +
" independent streams.");
347 m_log.
CHECK_EQ(blobBtm1.
num_axes, 2,
"Bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
359 layerSetUpCuDnn(colBottom, colTop);
361 layerSetUpCaffe(colBottom, colTop);
367 layerSetupCudnnRnn8(colBottom, colTop);
369 layerSetupCudnnRnn(colBottom, colTop);
372 private void setupSharedWorkspaceAndReserved(ulong ulWsInBytes, ulong ulResInBytes)
374 m_nWorkspaceSizeInBytes = ulWsInBytes;
375 m_bWorkspaceOwned =
true;
376 m_nReservedSizeInBytes = ulResInBytes;
377 m_bReservedOwned =
true;
380 m_hWorkspace =
m_cuda.AllocMemory((
long)m_nWorkspaceSizeInBytes);
381 if (ulResInBytes > 0)
382 m_hReserved =
m_cuda.AllocMemory((
long)ulResInBytes);
390 m_log.
WriteLine(
"WARNING: RNN8 currently does not support Tensor Cores, disabling Tensor Cores for RNN8.");
395 m_hCuDnn =
m_cuda.CreateCuDNN();
414 blobs.Add(m_blobWts);
418 m_hRnn8 =
m_cuda.CreateRnn8();
429 m_nHiddenSize * nBidirectionalScale,
436 Blob<T> blobBtm0 = colBottom[0];
438 blobBtm0 = m_blobBtmData;
446 m_blobHx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
447 m_blobCx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
450 m_blobHy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
451 m_blobCy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
461 ulong ulWorkspaceSizeInBytes;
462 ulong ulReservedSizeInBytes;
463 m_cuda.GetRnn8MemorySizes(m_hCuDnn, m_hRnn8, out szWtCount, out ulWorkspaceSizeInBytes, out ulReservedSizeInBytes);
465 List<int> rgWtShape =
new List<int>() { (int)szWtCount, 1, 1 };
469 setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
488 throw new Exception(
"Currently the RNN2 weights only support 'constant' and 'xavier' fillers.");
490 double dfBiasVal = 0;
491 double dfBiasVal2 = 0;
504 throw new Exception(
"Currently the RNN2 bias' only support 'constant' and 'xavier' fillers.");
506 m_cuda.InitializeRnn8Weights(m_hCuDnn, m_hRnn8, m_blobWts.
mutable_gpu_data, ftWt, dfWtVal, dfWtVal2, ftBias, dfBiasVal, dfBiasVal2);
511 catch (Exception excpt)
528 m_hCuDnn =
m_cuda.CreateCuDNN();
547 blobs.Add(m_blobWts);
549 m_hXDesc =
m_cuda.CreateRnnDataDesc();
550 m_hYDesc =
m_cuda.CreateRnnDataDesc();
552 m_hHxDesc =
m_cuda.CreateTensorDesc();
553 m_hCxDesc =
m_cuda.CreateTensorDesc();
554 m_hHyDesc =
m_cuda.CreateTensorDesc();
555 m_hCyDesc =
m_cuda.CreateTensorDesc();
558 m_hRnnDesc =
m_cuda.CreateRnnDesc();
559 m_hWeightDesc =
m_cuda.CreateFilterDesc();
560 m_hDropoutDesc =
m_cuda.CreateDropoutDesc();
567 Blob<T> blobBtm0 = colBottom[0];
569 blobBtm0 = m_blobBtmData;
577 m_blobHx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
578 m_blobCx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
581 m_blobHy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
582 m_blobCy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, nDir);
593 int[] rgDimA =
new int[3];
594 int[] rgStrideA =
new int[3];
598 rgDimA[2] = m_nHiddenSize;
600 rgStrideA[0] = rgDimA[2] * rgDimA[1];
601 rgStrideA[1] = rgDimA[2];
604 m_cuda.SetTensorNdDesc(m_hHxDesc, rgDimA, rgStrideA);
605 m_cuda.SetTensorNdDesc(m_hCxDesc, rgDimA, rgStrideA);
606 m_cuda.SetTensorNdDesc(m_hHyDesc, rgDimA, rgStrideA);
607 m_cuda.SetTensorNdDesc(m_hCyDesc, rgDimA, rgStrideA);
611 ulong ulReservedCount;
612 m_cuda.GetDropoutInfo(m_hCuDnn, 0, out ulStateCount, out ulReservedCount);
613 m_hDropoutStates =
m_cuda.AllocMemory((
long)ulStateCount);
618 m_cuda.SetRnnDesc(m_hCuDnn, m_hRnnDesc, m_nHiddenSize, m_nNumLayers, m_hDropoutDesc, m_rnnMode, m_bUseTensors, dir);
622 int nCount =
m_cuda.GetRnnParamCount(m_hCuDnn, m_hRnnDesc, m_hXDesc);
623 List<int> rgWtShape =
new List<int>() { nCount, 1, 1 };
626 int[] rgDimW =
new int[3];
631 m_cuda.SetFilterNdDesc(m_hWeightDesc, rgDimW);
634 ulong ulReservedSizeInBytes;
635 ulong ulWorkspaceSizeInBytes =
m_cuda.GetRnnWorkspaceCount(m_hCuDnn, m_hRnnDesc, m_hXDesc, out ulReservedSizeInBytes);
638 setupSharedWorkspaceAndReserved(ulWorkspaceSizeInBytes, ulReservedSizeInBytes);
643 int nNumLinearLayers = (m_rnnMode ==
RNN_MODE.LSTM) ? 8 : 2;
652 for (
int i = 0; i < m_nNumLayers * nBidir; i++)
654 for (
int j = 0; j < nNumLinearLayers; j++)
656 m_cuda.GetRnnLinLayerParams(m_hCuDnn, m_hRnnDesc, i, m_hXDesc, m_hWeightDesc, m_blobWts.
gpu_data, j, out nWtCount, out hWt, out nBiasCount, out hBias);
658 if (nWtCount % 2 != 0)
668 fillerWt.
Fill(nWtCount, hWt);
671 if (nBiasCount % 2 != 0)
681 fillerBias.
Fill(nBiasCount, hBias);
684 m_cuda.FreeMemoryPointer(hWt);
685 m_cuda.FreeMemoryPointer(hBias);
692 catch (Exception excpt)
704 m_log.
FAIL(
"The 'auto_repeat_hidden_states_across_layers' setting is not supported in the Caffe implementation, use the cuDNN implementation instead.");
706 Blob<T> blobBtm0 = colBottom[0];
707 Blob<T> blobBtm1 = colBottom[1];
710 blobBtm0 = m_blobBtmData;
711 blobBtm1 = m_blobBtmClip;
715 List<string> rgOutputNames =
new List<string>();
718 List<string> rgRecurInputNames =
new List<string>();
721 List<string> rgRecurOutputNames =
new List<string>();
724 int nNumRecurBlobs = rgRecurInputNames.Count;
725 m_log.
CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count,
"The number of recurrent input names must equal the number of recurrent output names.");
728 int nNumHiddenExposed = (m_bExposeHiddenOutput) ? nNumRecurBlobs : 0;
729 int nBottomCount = (m_bExposeHiddenInput) ? 4 : 2;
734 m_log.
CHECK_GE(colBottom[2].num_axes, 1,
"When static input is present, the bottom[2].num_axes must be >= 1");
735 m_log.
CHECK_EQ(
m_nN, colBottom[2].shape(1),
"When static input is present, the bottom[2].shape(1) must = N which is " +
m_nN.ToString());
746 input_layer.
top.Add(
"x");
748 for (
int i = 0; i < blobBtm0.
num_axes; i++)
750 input_shape1.
dim.Add(blobBtm0.
shape(i));
754 input_layer.
top.Add(
"cont");
756 for (
int i = 0; i < blobBtm1.
num_axes; i++)
758 input_shape2.
dim.Add(blobBtm1.
shape(i));
764 input_layer.
top.Add(
"x_static");
766 for (
int i = 0; i < colBottom[2].num_axes; i++)
768 input_shape3.
dim.Add(colBottom[2].shape(i));
773 net_param.
layer.Add(input_layer);
781 if (strLayerName.Length > 0)
783 for (
int i = 0; i < net_param.
layer.Count; i++)
786 layer.
name = strLayerName +
"_" + layer.
name;
793 List<string> rgPseudoLosses =
new List<string>();
794 for (
int i = 0; i < rgOutputNames.Count; i++)
796 rgPseudoLosses.Add(rgOutputNames[i] +
"_pseudoloss");
798 layer.
bottom.Add(rgOutputNames[i]);
799 layer.
top.Add(rgPseudoLosses[i]);
801 net_param.
layer.Add(layer);
806 if (
m_param is LayerParameterEx<T>)
808 RecurrentLayer<T> sharedLayer = ((LayerParameterEx<T>)
m_param).SharedLayer as RecurrentLayer<T>;
809 if (sharedLayer !=
null)
810 sharedNet = sharedLayer.m_unrolledNet;
818 m_blobContInputBlob = m_unrolledNet.
blob_by_name(
"cont");
821 m_blobXStaticInputBlob = m_unrolledNet.
blob_by_name(
"x_static");
827 for (
int i = 0; i < nNumRecurBlobs; i++)
829 m_colRecurInputBlobs.
Add(m_unrolledNet.
blob_by_name(rgRecurInputNames[i]));
830 m_colRecurOutputBlobs.Add(m_unrolledNet.
blob_by_name(rgRecurOutputNames[i]));
834 m_log.
CHECK_EQ(colTop.
Count() - nNumHiddenExposed, rgOutputNames.Count,
"OutputBlobNames must provide output blob name for each top.");
836 for (
int i = 0; i < rgOutputNames.Count; i++)
844 m_log.
CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.
input_blobs.Count,
"The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() +
") + static inputs (" + nStaticInput.ToString() +
")");
850 for (
int i = 0; i < m_unrolledNet.
parameters.Count; i++)
861 for (
int i = 0; i < m_unrolledNet.
layers.Count; i++)
863 for (
int j = 0; j < m_unrolledNet.
layers[i].blobs.Count; j++)
865 m_log.
CHECK(m_unrolledNet.
layers[i].param_propagate_down(j),
"param_propagate_down not set for layer " + i.ToString() +
", param " + j.ToString());
872 for (
int i = 0; i < m_colRecurOutputBlobs.Count; i++)
874 m_colRecurOutputBlobs[i].SetDiff(0);
879 List<string> rgLayerNames = m_unrolledNet.
layer_names;
880 m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
881 for (
int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
883 m_log.
CHECK(rgLayerNames[i] == rgPseudoLosses[j],
"The last layer at idx " + i.ToString() +
" should be the pseudo layer named " + rgPseudoLosses[j]);
893 blob = m_colRecurInputBlobs[0];
898 if (m_colRecurInputBlobs.
Count > 1)
903 blob = m_colRecurInputBlobs[1];
912 blob = m_colRecurOutputBlobs[0];
917 if (m_colRecurOutputBlobs.Count > 1)
922 blob = m_colRecurOutputBlobs[1];
936 Blob<T> blobBtm0 = colBottom[0];
937 Blob<T> blobBtm1 = colBottom[1];
941 addBtmTop(colBottom[0], m_blobBtmData);
942 m_transposeData.Reshape(m_colBtm, m_colTop);
943 blobBtm0 = m_blobBtmData;
945 addBtmTop(colBottom[1], m_blobBtmClip);
946 m_transposeClip.
Reshape(m_colBtm, m_colTop);
949 m_rgShape.Add(m_blobBtmClip.
num);
950 m_rgShape.Add(m_blobBtmClip.
channels);
951 m_blobBtmClip.
Reshape(m_rgShape);
953 blobBtm1 = m_blobBtmClip;
956 m_log.
CHECK_GE(blobBtm0.
num_axes, 2,
"bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)");
959 m_log.
CHECK_EQ(blobBtm1.
num_axes, 2,
"bottom[1] must have exactly 2 axes -- (#timesteps, #streams)");
964 reshapeCuDnn(colBottom, colTop);
966 reshapeCaffe(colBottom, colTop);
970 addBtmTop(m_blobTopData, colTop[0]);
971 m_transposeData.Reshape(m_colBtm, m_colTop);
978 reshapeCudnnRnn8(colBottom, colTop);
980 reshapeCudnnRnn(colBottom, colTop);
985 Blob<T> blobBtm0 = colBottom[0];
990 blobBtm0 = m_blobBtmData;
991 blobTop0 = m_blobTopData;
998 m_blobHx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1000 m_blobCx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1004 m_blobHy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1005 m_blobCy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1014 colTop[1].ShareData(m_blobHy);
1015 colTop[1].ShareDiff(m_blobHy);
1018 colTop[2].ShareData(m_blobCy);
1019 colTop[2].ShareDiff(m_blobCy);
1025 Blob<T> blobBtm0 = colBottom[0];
1030 blobBtm0 = m_blobBtmData;
1031 blobTop0 = m_blobTopData;
1039 m_blobHx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1040 m_blobCx.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1043 m_blobHy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1044 m_blobCy.
Reshape(m_nNumLayers,
m_nN, m_nHiddenSize, 1);
1053 colTop[1].ShareData(m_blobHy);
1054 colTop[1].ShareDiff(m_blobHy);
1057 colTop[2].ShareData(m_blobCy);
1058 colTop[2].ShareDiff(m_blobCy);
1064 Blob<T> blobBtm0 = colBottom[0];
1065 Blob<T> blobBtm1 = colBottom[1];
1070 blobBtm0 = m_blobBtmData;
1071 blobBtm1 = m_blobBtmClip;
1072 blobTop0 = m_blobTopData;
1076 List<int> rgContShape = blobBtm1.
shape();
1077 m_blobContInputBlob.
Reshape(rgContShape);
1082 List<BlobShape> rgRecurInputShapes =
new List<BlobShape>();
1084 m_log.
CHECK_EQ(rgRecurInputShapes.Count, m_colRecurInputBlobs.
Count,
"The number of recurrent input shapes must equal the number of recurrent input blobs!");
1086 for (
int i = 0; i < rgRecurInputShapes.Count; i++)
1088 m_colRecurInputBlobs[i].
Reshape(rgRecurInputShapes[i]);
1095 m_blobContInputBlob.
ShareData(blobBtm1);
1097 int nStaticInput = 0;
1102 m_blobXStaticInputBlob.
ShareData(colBottom[2]);
1103 m_blobXStaticInputBlob.
ShareDiff(colBottom[2]);
1106 if (m_bExposeHiddenInput)
1108 int nBottomOffset = 2 + nStaticInput;
1109 for (
int i = nBottomOffset, j = 0; i < colBottom.
Count; i++, j++)
1111 m_log.
CHECK(
Utility.Compare<
int>(m_colRecurInputBlobs[j].shape(), colBottom[i].shape()),
"Shape mismatch - recur_input_blobs_[" + j.ToString() +
"]: '" + m_colRecurInputBlobs[j].shape_string +
"' vs. bottom[" + i.ToString() +
"]: '" + colBottom[i].shape_string +
"'");
1112 m_colRecurInputBlobs[j].ShareData(colBottom[i]);
1116 for (
int i = 0; i < m_colOutputBlobs.
Count; i++)
1121 blobTop0.
ShareData(m_colOutputBlobs[i]);
1122 blobTop0.
ShareDiff(m_colOutputBlobs[i]);
1127 colTop[i].ShareData(m_colOutputBlobs[i]);
1128 colTop[i].ShareDiff(m_colOutputBlobs[i]);
1132 if (m_bExposeHiddenOutput)
1134 int nTopOffset = m_colOutputBlobs.
Count;
1135 for (
int i = nTopOffset, j = 0; i < colTop.
Count; i++, j++)
1138 colTop[i].ShareData(m_colRecurOutputBlobs[j]);
1139 colTop[i].ShareDiff(m_colRecurOutputBlobs[j]);
1149 for (
int i = 0; i < m_colRecurOutputBlobs.Count; i++)
1151 m_colRecurOutputBlobs[i].SetData(0);
1162 int nMinBottoms = 2;
1166 List<string> rgInputs =
new List<string>();
1168 nMinBottoms += rgInputs.Count;
1203 List<string> rgOutputs =
new List<string>();
1205 nNumTops += rgOutputs.Count;
1220 return (nBottomIdx != 1) ? true :
false;
1269 if (m_blobCx !=
null)
1272 if (m_blobHx !=
null)
1275 if (m_blobCy !=
null)
1278 if (m_blobHy !=
null)
1333 addBtmTop(colBottom[0], m_blobBtmData);
1334 m_transposeData.Forward(m_colBtm, m_colTop);
1335 addBtmTop(colBottom[1], m_blobBtmClip);
1336 m_transposeClip.
Forward(m_colBtm, m_colTop);
1340 forward_cudnn(colBottom, colTop);
1342 forward_cuda(colBottom, colTop);
1346 addBtmTop(m_blobTopData, colTop[0]);
1347 m_transposeData.Forward(m_colBtm, m_colTop);
1363 for (
int i = 1; i < bTop.
num; i++)
1383 for (
int i = 1; i < bTop.
num; i++)
1392 if (m_bUseCudnnRnn8)
1393 forward_cudnnRnn8(colBottom, colTop);
1395 forward_cudnnRnn(colBottom, colTop);
1400 if (colBottom.
Count > 2)
1403 if (colBottom.
Count > 2)
1404 copy_or_repeat_fwd(colBottom[2], m_blobHx);
1406 if (colBottom.
Count > 3)
1407 copy_or_repeat_fwd(colBottom[3], m_blobCx);
1413 m_cuda.Rnn8Forward(m_hCuDnn,
1428 Blob<T> blobBtm1 = colBottom[1];
1430 blobBtm1 = m_blobBtmClip;
1434 if (dfClip > 0 || colBottom.
Count > 2)
1437 if (colBottom.
Count > 2)
1438 copy_or_repeat_fwd(colBottom[2], m_blobHy);
1440 if (colBottom.
Count > 3)
1441 copy_or_repeat_fwd(colBottom[3], m_blobCy);
1447 m_cuda.RnnForward(m_hCuDnn,
1464 m_nWorkspaceSizeInBytes,
1466 m_nReservedSizeInBytes,
1479 m_log.
CHECK_EQ(m_colRecurInputBlobs.
Count, m_colRecurOutputBlobs.Count,
"The recurrent input and output blobs must have the same count.");
1481 if (!m_bExposeHiddenInput)
1484 for (
int i = 0; i < m_colRecurInputBlobs.
Count; i++)
1486 int nCount = m_colRecurInputBlobs[i].count();
1487 m_log.
CHECK_EQ(nCount, m_colRecurOutputBlobs[i].count(),
"The input and output blob at " + i.ToString() +
" must have the same count.");
1488 long hTimestep_T_Data = m_colRecurOutputBlobs[i].gpu_data;
1489 long hTimestep_0_Data = m_colRecurInputBlobs[i].mutable_gpu_data;
1490 m_cuda.copy(nCount, hTimestep_T_Data, hTimestep_0_Data);
1509 addBtmTop(m_blobTopData, colTop[0]);
1510 m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
1514 backward_cudnn(colTop, rgbPropagateDown, colBottom);
1516 backward_cuda(colTop, rgbPropagateDown, colBottom);
1520 addBtmTop(colBottom[0], m_blobBtmData);
1521 m_transposeData.Backward(m_colTop, rgbPropagateDown, m_colBtm);
1527 if (m_bUseCudnnRnn8)
1528 backward_cudnnRnn8(colTop, rgbPropagateDown, colBottom);
1530 backward_cudnnRnn(colTop, rgbPropagateDown, colBottom);
1539 if (colTop.
Count > 2)
1542 if (colTop.
Count > 1)
1544 m_log.
CHECK_EQ(colTop[1].count(), m_blobHy.
count(),
"The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.
shape_string);
1545 m_blobHy.
CopyFrom(colTop[1],
true);
1548 if (colTop.
Count > 2)
1550 m_log.
CHECK_EQ(colTop[2].count(), m_blobCy.
count(),
"The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.
shape_string);
1551 m_blobCy.
CopyFrom(colTop[2],
true);
1556 m_cuda.Rnn8Backward(m_hCuDnn,
1574 if (colBottom.
Count > 2)
1577 if (colBottom.
Count > 2)
1578 copy_or_repeat_bwd(colBottom[2], m_blobHx);
1580 if (colBottom.
Count > 3)
1581 copy_or_repeat_bwd(colBottom[3], m_blobCx);
1587 if (rgbPropagateDown[1] && !m_bWarningShown)
1589 m_log.
WriteLine(
"WARNING: Cannot backpropagate to sequence indicators, sequence backprop will be ignored.");
1590 m_bWarningShown =
true;
1594 if (colTop.
Count > 2)
1597 if (colTop.
Count > 1)
1599 m_log.
CHECK_EQ(colTop[1].count(), m_blobHy.
count(),
"The bottom(1) should have the same shape as 'hy' which has a shape = " + m_blobHy.
shape_string);
1600 m_blobHy.
CopyFrom(colTop[1],
true);
1602 if (colTop.
Count > 2)
1604 m_log.
CHECK_EQ(colTop[2].count(), m_blobCy.
count(),
"The bottom(2) should have the same shape as 'cy' which has a shape = " + m_blobCy.
shape_string);
1605 m_blobCy.
CopyFrom(colTop[2],
true);
1609 m_cuda.RnnBackwardData(m_hCuDnn,
1631 m_nWorkspaceSizeInBytes,
1633 m_nReservedSizeInBytes);
1637 m_cuda.RnnBackwardWeights(m_hCuDnn,
1646 m_nWorkspaceSizeInBytes,
1650 m_nReservedSizeInBytes);
1653 if (colBottom.
Count > 2)
1656 if (colBottom.
Count > 2)
1657 copy_or_repeat_bwd(colBottom[2], m_blobHx);
1659 if (colBottom.
Count > 3)
1660 copy_or_repeat_bwd(colBottom[3], m_blobCx);
1666 m_log.
CHECK(!rgbPropagateDown[1],
"Cannot backpropagate to sequence indicators.");
1675 m_unrolledNet.
Backward(m_nLastLayerIndex);
1679 if (colBottom.
Count > nCount)
1682 if (colBottom.
Count > nCount)
1684 m_log.
CHECK_EQ(colBottom[nCount].count(), m_blobHx.
count(),
"The bottom(" + nCount.ToString() +
") should have the same shape as 'hx' which has a shape = " + m_blobHx.
shape_string);
1685 colBottom[nCount].
CopyFrom(m_blobHx,
true);
1687 if (colBottom.
Count > nCount+1)
1689 m_log.
CHECK_EQ(colBottom[nCount + 1].count(), m_blobCx.
count(),
"The bottom(" + (nCount + 1).ToString() +
") should have the same shape as 'cx' which has a shape = " + m_blobCx.
shape_string);
1690 colBottom[nCount + 1].
CopyFrom(m_blobCx,
true);
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
The Utility class provides general utility funtions.
The BlobCollection contains a list of Blobs.
BlobCollection()
The BlobCollection constructor.
void Add(Blob< T > b)
Add a new Blob to the collection.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
void ReshapeLike(BlobCollection< T > src)
Reshapes all blobs in the collection to the sizes of the source.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
void CopyFrom(BlobCollection< T > bSrc, bool bCopyDiff=false)
Copy the data or diff from another BlobCollection into this one.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
void ShareData(Blob< T > b)
Set the data to point to the data of the other blob – useful in Layers which simply perform a copy in...
int num_axes
Returns the number of axes in the Blob.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
string shape_string
Returns a string describing the Blob's shape.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
bool reshape_when_sharing
When true, this Blob is reshaped to the source when sharing the source data (default = false).
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
int count()
Returns the total number of items in the Blob.
void ShareDiff(Blob< T > b)
Set the diff to point to the diff of the other blob – useful in Layers which simply perform a copy in...
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The GetWorkBlobArgs are passed to the Layer::OnGetWorkBlob event which is supported for debugging onl...
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
List< Layer< T > > layers
Returns the layers.
void Reshape()
Reshape all layers from the bottom to the top.
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
BlobCollection< T > parameters
Returns the parameters.
List< string > layer_names
Returns the layer names.
BlobCollection< T > input_blobs
Returns the collection of input Blobs.
void set_debug_info(bool bVal)
Sets the debug information flag.
void Backward(int nStart=int.MaxValue, int nEnd=0)
The network backward should take no input and output, since it solely computes the gradient w....
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
List< int > param_owners
Returns the list of parameter owner indexes.
List< string > param_display_names
Returns the list of parameter display names.
Abstract Filler class used to fill blobs with values.
void Fill(Blob< T > b)
Fill the blob with values based on the actual filler used.
static Filler< T > Create(CudaDnn< T > cuda, Log log, FillerParameter p)
Create a new Filler instance.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
virtual void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event.
bool shareParameter(Blob< T > b, List< int > rgMinShape, bool bAllowEndsWithComparison=false)
Attempts to share a parameter Blob if another parameter Blob with the same name and accpetable size i...
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
abstract void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Adjust the shapes of top blobs and internal buffers to accomodate the shapes of the bottom blobs.
void Dispose()
Releases all GPU and host resources used by the Layer.
Phase m_phase
Specifies the Phase under which the Layer is run.
virtual void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
static Layer< T > Create(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db=null, TransferInput trxinput=null)
Create a new Layer based on the LayerParameter.
BlobCollection< T > blobs
Returns the collection of learnable parameter Blobs for the Layer.
DictionaryMap< bool > m_rgbParamPropagateDown
Specifies whether or not to compute the learnable diff of each parameter Blob.
LayerParameter convertLayerParam(LayerParameter pChild, LayerParameter pParent)
Called to convert a parent LayerParameterEx, used in blob sharing, with a child layer parameter.
The RecurrentLayer is an abstract class for implementing recurrent behavior inside of an unrolled new...
RecurrentLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel)
The RecurrentLayer constructor.
override int ExactNumTopBlobs
Returns the min number of required top (output) Blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Peforms the forward calculation.
override int MaxBottomBlobs
Returns the maximum number of required bottom (input) Blobs: min+1
abstract void RecurrentInputShapes(List< BlobShape > rgShapes)
Fills shapes with the shapes of the recurrent input Blob's. Subclassses should define this – see RNNL...
abstract void RecurrentOutputBlobNames(List< string > rgNames)
Fills names with the names of the Tth timestep recurrent output Blob's. Subclassses should define thi...
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Backward computation.
abstract void RecurrentInputBlobNames(List< string > rgNames)
Fills names with the names of the 0th timestep recurrent input Blob's. Subclasses should define this ...
override void SetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Set the OnDebug event on the unrolled net.
abstract void FillUnrolledNet(NetParameter net_param)
Fills net_param with the recurrent network architecture. Subclasses should define this – see RNNLayer...
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
abstract void OutputBlobNames(List< string > rgNames)
Fills names with the names of the output blobs, concatenated across all timesteps....
override int MinBottomBlobs
Returns the minimum number of required bottom (input) Blobs.
virtual void Reset()
Reset the hidden state of the net by zeroing out all recurrent outputs.
int m_nN
The number of independent streams to process simultaneously.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
int m_nT
The number of timesteps in the layer's input, and the number of timesteps over which to backpropagate...
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override void ResetOnDebug(EventHandler< GetWorkBlobArgs< T > > fn)
Reset the OnDebug event, disabling it on the unrolled net.
bool m_bStaticInput
Whether the layer has a 'static' input copies across all timesteps.
override bool AllowForceBackward(int nBottomIdx)
Returns true for all but the bottom index = 1, for you can't propagate to the sequence continuation i...
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the shape of a Blob.
BlobShape()
The BlobShape constructor.
List< int > dim
The blob shape dimensions.
double value
Specifies the value used by 'constant' filler.
double mean
Specifies the mean value to use with the 'gaussian' filler.
string type
Specifies the type of filler to use.
double std
Specifies the standard deviation value to use with the 'gaussian' filler.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
List< double > loss_weight
Specifies the loss weight.
LayerType type
Specifies the type of this LayerParameter.
InputParameter input_param
Returns the parameter set when initialized with LayerType.INPUT
List< string > top
Specifies the active top connections (in the bottom, out the top)
TransposeParameter transpose_param
Returns the parameter set when initialized with LayerType.TRANSPOSE
RecurrentParameter recurrent_param
Returns the parameter set when initialized with LayerType.RECURRENT
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
LayerType
Specifies the layer type.
Specifies the parameters use to create a Net
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
bool use_cudnn_rnn8_if_supported
Specifies to use cuDnn RNN8 if supported (requires cuDnn 8.0 or higher), (default = false).
bool debug_info
Whether to enable displaying debug info in the unrolled recurrent net.
uint num_layers
The number of LSTM layers to implement.
uint num_output
The dimension of the output (and usually hidden state) representation – must be explicitly set to non...
FillerParameter weight_filler
The filler for the weights.
bool expose_hidden_output
Whether to add as additional outputs (tops) the final timestep hidden state blobs....
bool bidirectional
Specifies whether the network is bidirectional (true) or unidirectional (false - default).
bool batch_first
The input and outputs are shaped with the batch in the first dimension.
long dropout_seed
Specifies the seed used by cuDnn for random number generation.
bool useCudnn()
Queries whether or not to use NVIDIA's cuDnn.
double dropout_ratio
Specifies the dropout ratio. (e.g. the probability that values will be dropped out and set to zero....
FillerParameter bias_filler
The filler for the bias.
bool cudnn_enable_tensor_cores
Specifies to enable the CUDA tensor cores when performing the rnn operations which is faster but not ...
bool auto_repeat_hidden_states_across_layers
Auto repeat the hidden and cell states so that a separate state is fed to each layer.
bool expose_hidden_input
Whether to add as additional inputs (bottoms) the initial hidden state blobss. The number of addition...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Phase
Defines the Phase under which to run a Net.
The MyCaffe.common namespace contains common MyCaffe classes.
RNN_DATALAYOUT
Specifies the RNN data layout of the data input.
DIR
Defines the direction of data flow.
RNN_MODE
Specifies the RNN mode to use with the Recurrent Layer when using the cuDNN engine.
RNN_BIAS_MODE
Specifies the RNN bias mode to use with the Recurrent Layer when using the cuDNN engine.
RNN_DIRECTION
Specifies the RNN directional used.
RNN_FILLER_TYPE
Defines the filler types used to fill the RNN8 weights.
The MyCaffe.db.image namespace contains all image database related classes.
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...