3using System.Collections.Generic;
7using System.Threading.Tasks;
26 List<Tuple<string, Action>> m_rgBackprop =
new List<Tuple<string, Action>>();
27 Dictionary<string, Blob<T>[]> m_rgDebug =
new Dictionary<string, Blob<T>[]>();
28 bool m_bNeedsBackprop =
true;
29 bool m_bCheckForNans =
false;
30 bool m_bClipGradients =
false;
31 bool m_bAddDebug =
false;
33 string m_strMarker =
null;
45 public ComputeGraph(
CudaDnn<T> cuda,
Log log,
int nAxis,
bool bNeedsBackprop =
true,
bool bClipGradients =
false,
bool bCheckNans =
false,
bool bAddDebug =
false)
49 m_blobWork =
new Blob<T>(cuda, log);
50 m_bNeedsBackprop = bNeedsBackprop;
51 m_bCheckForNans = bCheckNans;
52 m_bClipGradients = bClipGradients;
53 m_bAddDebug = bAddDebug;
62 if (m_blobWork !=
null)
69 private void add_debug(
string str, params
Blob<T>[] rg)
71 string strName = m_rgDebug.Count.ToString() +
"_" + str;
72 m_rgDebug.Add(strName, rg);
78 public Dictionary<string, Blob<T>[]>
Debug
80 get {
return m_rgDebug; }
88 get {
return m_strMarker; }
89 set { m_strMarker = value; }
97 get {
return m_bNeedsBackprop; }
98 set { m_bNeedsBackprop = value; }
106 get {
return m_nAxis; }
111 get {
return m_blobWork; }
114 private int input_count(Blob<T> b)
116 if (b.num_axes <= m_nAxis + 2)
119 return b.
count(m_nAxis + 2);
122 private void clip_gradient1(Blob<T> b)
124 float[] rg =
Utility.ConvertVecF<T>(b.mutable_cpu_diff);
126 for (
int i = 0; i < rg.Length; i++)
128 if (Math.Abs(rg[i]) < 0.000001)
131 rg[i] = (float)Math.Round(rg[i], 7);
137 private void clip_gradient(params Blob<T>[] rg)
139 foreach (Blob<T> b
in rg)
145 private T[] round(T[] rgData1,
int nDecimals)
147 float[] rgData =
Utility.ConvertVecF<T>(rgData1);
149 for (
int i = 0; i < rgData.Length; i++)
151 rgData[i] = (float)Math.Round(rgData[i], nDecimals);
157 private void check_nan(params Blob<T>[] rg)
159 for (
int i = 0; i < rg.Length; i++)
161 work.ReshapeLike(rg[i]);
162 Tuple<double, double, double, double> data = rg[i].minmax_data(work,
true);
163 Tuple<double, double, double, double> diff = rg[i].minmax_diff(work,
true);
165 double dfDataNanCount = data.Item3;
166 double dfDataInfCount = data.Item4;
167 double dfDiffNanCount = diff.Item3;
168 double dfDiffInfCount = diff.Item4;
170 if (dfDataNanCount > 0 || dfDataInfCount > 0)
171 throw new Exception(
"NAN or INF detected in " + rg[i].Name +
" data!");
173 if (dfDataNanCount > 0 || dfDataInfCount > 0)
174 throw new Exception(
"NAN or INF detected in " + rg[i].Name +
" diff!");
178 private void apply(Blob<T> work, Blob<T> btm)
180 m_cuda.
add(btm.count(), work.gpu_diff, btm.gpu_diff, btm.mutable_gpu_diff);
188 string strMarker =
marker;
189 Action backward = () =>
192 for (
int i = 0; i < rgB.Length; i++)
194 str += rgB[i].Name +
",";
196 str = str.TrimEnd(
',');
198 Trace.WriteLine(
"Debugging at " + strMarker +
" blobs: " + str);
200 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
213 if (m_bNeedsBackprop)
215 Action backward = () =>
219 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
234 string strMarker =
marker;
235 List<int> rgShape =
new List<int>() { 1, 1 };
236 rgShape[1] = btm.
count(m_nAxis + 1);
239 int nSpatialDim = btm.
count(m_nAxis + 1);
242 if (m_bNeedsBackprop)
244 Action backward = () =>
252 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
266 string strMarker =
marker;
267 int nSpatialDim = btm.
count(m_nAxis);
271 if (m_bNeedsBackprop)
273 Action backward = () =>
278 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
292 string strMarker =
marker;
293 int nSpatialDim = btm.
count(m_nAxis);
295 if (btm.
count() == 0)
296 top.
SetData(0, nSpatialDim * ix, nSpatialDim);
300 if (m_bNeedsBackprop)
302 Action backward = () =>
311 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
324 string strMarker =
marker;
327 if (m_bNeedsBackprop)
329 Action backward = () =>
333 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
345 string strMarker =
marker;
350 if (m_bNeedsBackprop)
352 Action backward = () =>
354 work.ReshapeLike(btm);
358 if (m_bClipGradients)
363 add_debug(strMarker +
" - tanh", btm, top);
365 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
379 string strMarker =
marker;
384 if (m_bNeedsBackprop)
386 Action backward = () =>
388 work.ReshapeLike(btm);
392 if (m_bClipGradients)
397 add_debug(strMarker +
" - sigmoid", btm, top);
399 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
414 string strMarker =
marker;
419 if (m_bNeedsBackprop)
421 Action backward = () =>
423 work.ReshapeLike(btm1);
426 work.ReshapeLike(btm2);
430 if (m_bClipGradients)
431 clip_gradient(btm1, btm2);
433 check_nan(btm1, btm2);
435 add_debug(strMarker +
" - eltmul", btm1, btm2, top);
437 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
453 string strMarker =
marker;
459 if (m_bNeedsBackprop)
461 Action backward = () =>
463 work.ReshapeLike(btm1);
467 work.ReshapeLike(btm2);
470 work.SetDiff(fDot, nIdx);
473 if (m_bClipGradients)
474 clip_gradient(btm1, btm2);
476 check_nan(btm1, btm2);
478 add_debug(strMarker +
" - scalemul", btm1, btm2, top);
480 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
496 string strMarker =
marker;
497 int nM = btm1.
shape(m_nAxis);
498 int nN = btm2.
count(m_nAxis + 1);
499 int nK = btm1.
count(m_nAxis + 1);
509 if (m_bNeedsBackprop)
511 Action backward = () =>
517 if (m_bClipGradients)
518 clip_gradient(btm1, btm2);
520 check_nan(btm1, btm2);
522 add_debug(strMarker +
" - mul", btm1, btm2, top);
524 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
540 string strMarker =
marker;
545 if (m_bNeedsBackprop)
547 Action backward = () =>
549 if (!bAccumulateGrad)
558 if (m_bClipGradients)
559 clip_gradient(btm1, btm2);
561 check_nan(btm1, btm2);
563 add_debug(strMarker +
" - add", btm1, btm2, top);
565 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
578 if (m_bNeedsBackprop)
580 Action backward = () =>
584 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
610 string strMarker =
marker;
613 int nOuterNum = btm.
count(0, m_nAxis);
614 int nInnerNum = btm.
count(m_nAxis + 1);
615 int nChannels = top.
shape(m_nAxis);
616 int nCount = btm.
count();
618 work.ReshapeLike(top);
625 m_cuda.
channel_max(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, top.
gpu_data, work.mutable_gpu_data);
631 m_cuda.
channel_sum(nOuterNum * nInnerNum, nOuterNum, nChannels, nInnerNum, top.
gpu_data, work.mutable_gpu_data);
635 if (m_bNeedsBackprop)
637 Action backward = () =>
639 work.ReshapeLike(top);
640 m_cuda.
copy(nCount, top.
gpu_diff, work.mutable_gpu_diff);
644 m_cuda.
channel_sub(nCount, nOuterNum, nChannels, nInnerNum, work.gpu_data, work.mutable_gpu_diff);
647 m_cuda.
mul(nCount, work.gpu_diff, top.
gpu_data, work.mutable_gpu_diff);
650 if (m_bClipGradients)
655 add_debug(strMarker +
" - softmax", btm, top);
657 m_rgBackprop.Add(
new Tuple<string, Action>(m_strMarker, backward));
668 get {
return m_rgBackprop.Count; }
677 m_rgBackprop[nIdx].Item2();
686 for (
int i = m_rgBackprop.Count - 1; i >= 0; i--)
688 m_rgBackprop[i].Item2();
692 m_rgBackprop.Clear();
700 m_rgBackprop.Clear();
722 m_blobCache =
new Blob<T>(cuda, log,
false);
723 m_blobCache.
Name =
"cache";
731 if (m_blobCache !=
null)
743 public void Create(
int nCount, List<int> rgItemShape)
745 List<int> rgShape =
new List<int>(rgItemShape);
746 rgShape.Insert(0, nCount);
766 int nSpatialDim = b.
count(nAxis);
768 if (m_nCacheIdx >= m_blobCache.
num)
769 throw new Exception(
"The cache is full!");
782 int nSpatialDim = b.
count(nAxis);
786 throw new Exception(
"The cache is empty!");
The Log class provides general output in text form.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
The BlobCollection contains a list of Blobs.
The Blob is the main holder of data that moves through the Layers of the Net.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
static T Zero
Returns Zero (0) in type T.
T GetData(int nIdx)
Returns the data at a given flat index within the Blob.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The Cache class is used to cache blobs over time.
void CopyFromCache(Blob< T > b, int nAxis)
Copies a value from the current location in the cache to the blob.
Cache(CudaDnn< T > cuda, Log log)
The constructor.
void Dispose()
Release any resources used.
void Reset()
Resets the cache.
void CopyToCache(Blob< T > b, int nAxis)
Copies a blob to the current location in the cache.
void Create(int nCount, List< int > rgItemShape)
Create the cache memory.
The ComputeGraph class provides a simple computation graph of operations used in a forward pass that ...
Blob< T > mul(Blob< T > btm1, Blob< T > btm2, Blob< T > top, bool bAccumulateGrad=true)
'mul' operation performs a blas gemm operation on the 'btm1' matrix with the 'btm2' matrix and places...
void DebugOp(params Blob< T >[] rgB)
DebugOp operation places a debug stub in the backpropagation chain for debugging only.
void CopyToCache(Blob< T > btm, Cache< T > cache)
CopyToCache operation copies the blob into the cache.
Blob< T > clear_grad(Blob< T > b)
'clear_grad' operation only runs on the backward pass and zeros out the gradients on an input.
void PeekItem(Blob< T > btm, Blob< T > top, int ix)
PeekItem operation copies a single item from the bottom to the top.
Blob< T > add(Blob< T > btm1, Blob< T > btm2, Blob< T > top, bool bAccumulateGrad=true)
'elthmul' operation adds each element of the 'btm1' with the 'btm2' and places the results in 'top'.
Dictionary< string, Blob< T >[]> Debug
Returns a dictionary of Blobs used during each operation, only filled when 'bAddDebug' = true in the ...
void Clear()
Clears all backward operations from the list.
int BackwardCount
Returns the backward operation count.
Blob< T > sigmoid(Blob< T > btm, Blob< T > top)
'sigmoid' operation runs the sigmoid on each item in the btm and places the results in the top.
Blob< T > CopyToRow(Blob< T > btm, Blob< T > top, int ix, bool bCopyDiff=false)
CopyToRow operation copies the bottom vector into the top matrix.
void clear_grad(BlobCollection< T > rg)
'clear_grad' operation only runs on the backward pass and zeros out the gradients of the inputs.
void Backward(bool bClear=false)
Runs a backward operation on all items starting from the last and running through the first.
string marker
Get/set a string marker added to the debug information and used to indicate where in the code a given...
Blob< T > Round(Blob< T > b, int nDecimals=6)
Round operation, rounds the values to the nearest specified decimal.
Blob< T > tanh(Blob< T > btm, Blob< T > top)
'tanh' operation runs the tanh on each item in the btm and places the results in the top.
Blob< T > eltmul(Blob< T > btm1, Blob< T > btm2, Blob< T > top)
'elthmul' operation mutliplies each element of the 'btm1' with the 'btm2' and places the results in '...
Blob< T > softmax(Blob< T > btm, Blob< T > top)
'softmax' operation runs the softmax on each item in the btm and places the results in the top.
ComputeGraph(CudaDnn< T > cuda, Log log, int nAxis, bool bNeedsBackprop=true, bool bClipGradients=false, bool bCheckNans=false, bool bAddDebug=false)
The constructor.
void BackwardOne(int nIdx)
Runs a backward operation at a given index.
Blob< T > scalemul(Blob< T > btm1, Blob< T > btm2, Blob< T > top, int nIdx=0)
'scalemul' operation mutliplies each element of the 'btm1' with the first item within 'btm2' and plac...
void Dispose()
Release all resources used.
bool needs_backprop
Get/set whether or not to back propagate.
int axis
Returns the axis on which all operations are performed.
Blob< T > PeekRow(Blob< T > btm, Blob< T > top, int ix)
PeeKRow operation copies data and diffs from one row from within the bottom matrix and places it in t...
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
void copy(int nCount, long hSrc, long hDst, int nSrcOffset=0, int nDstOffset=0, long hStream=-1, bool? bSrcHalfSizeOverride=null, bool? bDstHalfSizeOverride=null)
Copy data from one block of GPU memory to another.
void channel_sub(int nCount, int nOuterNum, int nChannels, int nInnerNum, long hA, long hX, long hY)
Subtracts the values across the channels of X from A and places the result in Y.
void tanh_fwd(int nCount, long hBottomData, long hTopData)
Performs a TanH forward pass in Cuda.
void channel_dot(int nCount, int nOuterNum, int nChannels, int nInnerNum, long hX, long hA, long hY)
Calculates the dot product the the values within each channel of X and places the result in Y.
void add(int n, long hA, long hB, long hC, long hY)
Adds A, B and C and places the result in Y.
void channel_max(int nCount, int nOuterNum, int nChannels, int nInnerNum, long hX, long hY, bool bReturnIdx=false)
Calculates the maximum value within each channel of X and places the result in Y.
void sigmoid_fwd(int nCount, long hBottomData, long hTopData)
Performs a Sigmoid forward pass in Cuda.
void gemm(bool bTransA, bool bTransB, int m, int n, int k, double fAlpha, long hA, long hB, double fBeta, long hC)
Perform a matrix-matrix multiplication operation: C = alpha transB (B) transA (A) + beta C
void scale(int n, double fAlpha, long hX, long hY)
Scales the values in X and places them in Y.
void exp(int n, long hA, long hY)
Calculates the exponent value of A and places the result in Y.
void channel_sum(int nCount, int nOuterNum, int nChannels, int nInnerNum, long hX, long hY, bool bSumAcrossChannels=true, DIR dir=DIR.FWD, int nChannelsY=-1)
Calculates the sum the the values either across or within each channel (depending on bSumAcrossChanne...
float dot_float(int n, long hX, long hY)
Computes the dot product of X and Y.
void mul(int n, long hA, long hB, long hY, int nAOff=0, int nBOff=0, int nYOff=0)
Multiplies each element of A with each element of B and places the result in Y.
void channel_div(int nCount, int nOuterNum, int nChannels, int nInnerNum, long hX, long hY, int nMethod=1)
Divides the values of the channels from X and places the result in Y.
void sigmoid_bwd(int nCount, long hTopDiff, long hTopData, long hBottomDiff)
Performs a Sigmoid backward pass in Cuda.
void tanh_bwd(int nCount, long hTopDiff, long hTopData, long hBottomDiff)
Performs a TanH backward pass in Cuda.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...