10using System.Collections.Generic;
15using System.Threading.Tasks;
49 m_icallback = icallback;
51 m_properties = properties;
80 if (m_mycaffe !=
null)
86 m_icallback.OnShutdown();
91 private void wait(
int nWait)
96 while (nTotalWait < nWait)
98 m_icallback.OnWait(
new WaitArgs(nWaitInc));
99 nTotalWait += nWaitInc;
128 byte[] rgResults = agent.
Run(nN, out type);
143 string strProp = m_properties.
ToString();
146 strProp +=
"EnableNumSkip=False;";
170 agent.
Run(
Phase.TRAIN, nN, type, step);
188 float m_fGamma = 0.95f;
189 bool m_bUseRawInput =
true;
190 int m_nMaxMemory = 10000;
191 int m_nTrainingUpdateFreq = 1000;
192 int m_nExplorationNum = 50000;
194 double m_dfEpsStart = 0;
195 double m_dfEpsEnd = 0;
196 double m_dfEpsDelta = 0;
197 double m_dfExplorationRate = 0;
198 STATE m_state = STATE.EXPLORING;
199 double m_dfBetaStart = 0.4;
200 int m_nBetaFrames = 1000;
201 int m_nMemorySize = 10000;
202 float m_fPriorityAlpha = 0.6f;
222 m_icallback = icallback;
223 m_brain =
new Brain<T>(mycaffe, properties, random, phase);
224 m_properties = properties;
230 m_nTrainingUpdateFreq = properties.
GetPropertyAsInt(
"TrainingUpdateFreq", m_nTrainingUpdateFreq);
231 m_nExplorationNum = properties.
GetPropertyAsInt(
"ExplorationNum", m_nExplorationNum);
235 m_dfEpsDelta = (m_dfEpsStart - m_dfEpsEnd) / m_nEpsSteps;
236 m_dfExplorationRate = m_dfEpsStart;
238 if (m_dfEpsStart < 0 || m_dfEpsStart > 1)
239 throw new Exception(
"The 'EpsStart' is out of range - please specify a real number in the range [0,1]");
241 if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1)
242 throw new Exception(
"The 'EpsEnd' is out of range - please specify a real number in the range [0,1]");
244 if (m_dfEpsEnd > m_dfEpsStart)
245 throw new Exception(
"The 'EpsEnd' must be less than the 'EpsStart' value.");
263 m_icallback.OnGetData(args);
275 case STATE.EXPLORING:
276 return m_random.
Next(nActionCount);
279 if (m_dfExplorationRate > m_dfEpsEnd)
280 m_dfExplorationRate -= m_dfEpsDelta;
282 if (m_random.
NextDouble() < m_dfExplorationRate)
283 return m_random.
Next(nActionCount);
288 return m_brain.
act(sd, sdClip, nActionCount);
291 private void updateStatus(
int nIteration,
int nEpisodeCount,
double dfRewardSum,
double dfRunningReward,
double dfLoss,
double dfLearningRate,
bool bModelUpdated)
293 GetStatusArgs args =
new GetStatusArgs(0, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, m_dfExplorationRate, 0, dfLoss, dfLearningRate, bModelUpdated);
294 m_icallback.OnUpdateStatus(args);
303 public byte[]
Run(
int nIterations, out
string type)
306 if (icallback ==
null)
307 throw new Exception(
"The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");
311 List<float> rgResults =
new List<float>();
314 while (!m_brain.
Cancel.
WaitOne(0) && (nIterations == -1 || nIteration < nIterations))
324 rgResults.Add(action);
329 s = getData(
Phase.RUN, action, nIteration);
339 private bool isAtIteration(
int nN,
ITERATOR_TYPE type,
int nIteration,
int nEpisode)
360 private double beta_by_frame(
int nFrameIdx)
362 return Math.Min(1.0, m_dfBetaStart + nFrameIdx * (1.0 - m_dfBetaStart) / m_nBetaFrames);
381 double dfRunningReward = 0;
382 double dfEpisodeReward = 0;
384 bool bDifferent =
false;
386 StateBase state = getData(phase, -1, -1);
393 while (!m_brain.
Cancel.
WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
395 if (nIteration > m_nExplorationNum && iMemory.
Count > m_brain.
BatchSize)
396 m_state = STATE.TRAINING;
399 int action = getAction(nIteration, x, state.
Clip, state.
ActionCount, step);
402 StateBase state_next = getData(phase, action, nIteration);
407 m_brain.
Log.
WriteLine(
"WARNING: The current state is the same as the previous state!");
410 iMemory.
Add(
new MemoryItem(state, x, action, state_next, x_next, state_next.
Reward, state_next.
Done, nIteration, nEpisode));
411 dfEpisodeReward += state_next.
Reward;
414 if (m_state == STATE.TRAINING)
416 double dfBeta = beta_by_frame(nIteration + 1);
419 iMemory.
Update(rgSamples);
421 if (nIteration % m_nTrainingUpdateFreq == 0)
428 dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
431 updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.
GetModelUpdated());
433 state = getData(phase, -1, -1);
434 x = m_brain.
Preprocess(state, m_bUseRawInput, out bDifferent,
true);
467 Blob<T> m_blobNextQValue =
null;
468 Blob<T> m_blobExpectedQValue =
null;
473 bool m_bUseAcceleratedTraining =
false;
474 double m_dfLearningRate;
475 int m_nMiniBatch = 1;
476 float m_fGamma = 0.99f;
477 int m_nFramesPerX = 4;
478 int m_nStackPerX = 4;
479 int m_nBatchSize = 32;
481 int m_nActionCount = 3;
482 bool m_bModelUpdated =
false;
484 Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>> m_rgStyle =
new Dictionary<Color, Tuple<Brush, Brush, Pen, Brush>>();
485 List<SimpleDatum> m_rgX =
new List<SimpleDatum>();
486 float[] m_rgOverlay =
null;
501 m_netTarget =
new Net<T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.
net_param, m_mycaffe.CancelEvent,
null, phase);
502 m_properties = properties;
507 m_mycaffe.Log.FAIL(
"Missing the expected input 'data' blob!");
510 m_nBatchSize = data.
num;
514 m_mycaffe.Log.FAIL(
"Missing the expected input 'logits' blob!");
519 if (m_transformer ==
null)
522 int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.Channels;
523 int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.Height;
524 int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.Width;
525 m_transformer =
new DataTransformer<T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW);
528 for (
int i = 0; i < m_nFramesPerX; i++)
536 m_blobActions =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log,
false);
537 m_blobQValue =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
538 m_blobNextQValue =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
539 m_blobExpectedQValue =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
540 m_blobDone =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log,
false);
541 m_blobLoss =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log);
542 m_blobWeights =
new Blob<T>(m_mycaffe.Cuda, m_mycaffe.Log,
false);
547 if (m_memLoss ==
null)
548 m_mycaffe.Log.FAIL(
"Missing the expected MEMORY_LOSS layer!");
552 m_dfLearningRate = dfRate.Value;
555 m_bUseAcceleratedTraining = properties.
GetPropertyAsBool(
"UseAcceleratedTraining",
false);
557 if (m_nMiniBatch > 1)
560 m_colAccumulatedGradients.
SetDiff(0);
564 private void dispose(ref
Blob<T> b)
578 dispose(ref m_blobActions);
579 dispose(ref m_blobQValue);
580 dispose(ref m_blobNextQValue);
581 dispose(ref m_blobExpectedQValue);
582 dispose(ref m_blobDone);
583 dispose(ref m_blobLoss);
584 dispose(ref m_blobWeights);
586 if (m_colAccumulatedGradients !=
null)
588 m_colAccumulatedGradients.
Dispose();
589 m_colAccumulatedGradients =
null;
592 if (m_netTarget !=
null)
604 foreach (KeyValuePair<Color, Tuple<Brush, Brush, Pen, Brush>> kv
in m_rgStyle)
606 kv.Value.Item1.Dispose();
607 kv.Value.Item2.Dispose();
608 kv.Value.Item3.Dispose();
609 kv.Value.Item4.Dispose();
623 bool bReset = (nAction == -1) ?
true :
false;
624 return new GetDataArgs(phase, 0, m_mycaffe, m_mycaffe.Log, m_mycaffe.CancelEvent, bReset, nAction,
true,
false,
false,
this);
632 get {
return m_nFramesPerX; }
640 get {
return m_nBatchSize; }
648 get {
return m_mycaffe.
Log; }
678 if (m_sdLast ==
null)
681 bDifferent = sd.
Sub(m_sdLast);
694 m_rgX =
new List<SimpleDatum>();
696 for (
int i = 0; i < m_nFramesPerX * m_nStackPerX; i++)
709 for (
int i=0; i<m_nStackPerX; i++)
711 int nIdx = ((m_nStackPerX - i) * m_nFramesPerX) - 1;
712 rgSd[i] = m_rgX[nIdx];
728 setData(m_netOutput, sd, sdClip);
733 throw new Exception(
"Missing expected 'logits' blob!");
745 bool bModelUpdated = m_bModelUpdated;
746 m_bModelUpdated =
false;
747 return bModelUpdated;
755 m_mycaffe.Log.Enable =
false;
758 m_mycaffe.Log.Enable =
true;
759 m_bModelUpdated =
true;
770 m_rgSamples = rgSamples;
772 if (m_nActionCount != nActionCount)
773 throw new Exception(
"The logit output of '" + m_nActionCount.ToString() +
"' does not match the action count of '" + nActionCount.ToString() +
"'!");
776 m_mycaffe.Log.Enable =
false;
777 setNextStateData(m_netTarget, rgSamples);
780 setCurrentStateData(m_netOutput, rgSamples);
781 m_memLoss.
OnGetLoss += m_memLoss_ComputeTdLoss;
783 if (m_nMiniBatch == 1)
789 m_solver.
Step(1,
TRAIN_STEP.NONE,
true, m_bUseAcceleratedTraining,
true,
true);
792 if (nIteration % m_nMiniBatch == 0)
795 m_colAccumulatedGradients.
SetDiff(0);
796 m_dfLearningRate = m_solver.
ApplyUpdate(nIteration);
801 m_memLoss.
OnGetLoss -= m_memLoss_ComputeTdLoss;
802 m_mycaffe.Log.Enable =
true;
804 resetNoise(m_netOutput);
805 resetNoise(m_netTarget);
827 reduce_sum_axis1(m_blobQValue);
830 m_blobNextQValue.
CopyFrom(next_q_values,
false,
true);
831 reduce_argmax_axis1(m_blobNextQValue);
843 m_mycaffe.Cuda.mul_scalar(m_blobExpectedQValue.
count(), m_fGamma, m_blobExpectedQValue.
mutable_gpu_diff);
844 m_mycaffe.Cuda.add(m_blobExpectedQValue.
count(), m_blobExpectedQValue.
gpu_diff, m_blobExpectedQValue.
gpu_data, m_blobExpectedQValue.
gpu_data);
861 for (
int i = 0; i < rgPrios.Length; i++)
873 double dfGradient = 1.0;
878 dfGradient /= m_blobLoss.
count();
879 m_blobLoss.
SetDiff(dfGradient);
891 mul(m_blobLoss, m_blobActions, e.
Bottom[0]);
893 e.
Loss = reduce_mean(m_blobLoss,
false);
897 private void resetNoise(
Net<T> net)
913 float[] rgResult =
new float[rgActions.Length];
915 for (
int i = 0; i < actions.
num; i++)
917 float fPred = rgVal[i];
919 for (
int j = 0; j < actions.
channels; j++)
921 int nIdx = (i * actions.
channels) + j;
922 rgResult[nIdx] = rgActions[nIdx] * fPred;
929 private float reduce_mean(
Blob<T> b,
bool bDiff)
932 float fSum = rg.Sum(p => p);
933 return fSum / rg.Length;
936 private void reduce_sum_axis1(
Blob<T> b)
938 int nNum = b.
shape(0);
939 int nActions = b.
shape(1);
940 int nInnerCount = b.
count(2);
942 float[] rgSum =
new float[nNum * nInnerCount];
944 for (
int i = 0; i < nNum; i++)
946 for (
int j = 0; j < nInnerCount; j++)
950 for (
int k = 0; k < nActions; k++)
952 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
953 fSum += rg[nIdx + j];
956 int nIdxR = i * nInnerCount;
957 rgSum[nIdxR + j] = fSum;
961 b.
Reshape(nNum, nInnerCount, 1, 1);
965 private void reduce_argmax_axis1(
Blob<T> b)
967 int nNum = b.
shape(0);
968 int nActions = b.
shape(1);
969 int nInnerCount = b.
count(2);
971 float[] rgMax =
new float[nNum * nInnerCount];
973 for (
int i = 0; i < nNum; i++)
975 for (
int j = 0; j < nInnerCount; j++)
977 float fMax = -
float.MaxValue;
979 for (
int k = 0; k < nActions; k++)
981 int nIdx = (i * nActions * nInnerCount) + (k * nInnerCount);
982 fMax = Math.Max(fMax, rg[nIdx + j]);
985 int nIdxR = i * nInnerCount;
986 rgMax[nIdxR + j] = fMax;
990 b.
Reshape(nNum, nInnerCount, 1, 1);
994 private int argmax(
float[] rgProb,
int nActionCount,
int nSampleIdx)
996 float[] rgfProb =
new float[nActionCount];
998 for (
int j = 0; j < nActionCount; j++)
1000 int nIdx = (nSampleIdx * nActionCount) + j;
1001 rgfProb[j] = rgProb[nIdx];
1004 return argmax(rgfProb);
1007 private int argmax(
float[] rgfAprob)
1009 double fMax = -
float.MaxValue;
1012 for (
int i = 0; i < rgfAprob.Length; i++)
1014 if (rgfAprob[i] == fMax)
1019 else if (fMax < rgfAprob[i])
1037 setData(net, rgData, rgClip);
1046 SimpleDatum[] rgClip = (rgClip0 !=
null) ? rgClip0.ToArray() :
null;
1048 setData(net, rgData, rgClip);
1057 SimpleDatum[] rgClip = (rgClip1 !=
null) ? rgClip1.ToArray() :
null;
1059 setData(net, rgData, rgClip);
1067 m_transformer.
Transform(rgData, data, m_mycaffe.Cuda, m_mycaffe.Log);
1075 clip.
Reshape(rgClip.Length, rgClip[0].
Channels, rgClip[0].Height, rgClip[0].Width);
1076 m_transformer.
Transform(rgClip, clip, m_mycaffe.Cuda, m_mycaffe.Log,
true);
1091 if (logits.
num == 1)
1094 if (m_rgOverlay ==
null)
1097 using (Graphics g = Graphics.FromImage(e.
DisplayImage))
1101 int nWid1 = nWid / m_rgOverlay.Length;
1106 float fMax = -
float.MaxValue;
1108 float fMin1 = m_rgOverlay.Min(p => p);
1109 float fMax1 = m_rgOverlay.Max(p => p);
1111 for (
int i=0; i<m_rgOverlay.Length; i++)
1113 if (fMin1 < 0 || fMax1 > 1)
1114 m_rgOverlay[i] = (m_rgOverlay[i] - fMin1) / (fMax1 - fMin1);
1116 if (m_rgOverlay[i] > fMax)
1118 fMax = m_rgOverlay[i];
1123 for (
int i = 0; i < m_rgOverlay.Length; i++)
1125 drawProbabilities(g, nX, nY, nWid1, nHt1, i, m_rgOverlay[i], fMin1, fMax1, clrMap.
GetColor(i + 1), (i == nMaxIdx) ?
true :
false);
1131 private void drawProbabilities(Graphics g,
int nX,
int nY,
int nWid,
int nHt,
int nAction,
float fProb,
float fMin,
float fMax, Color clr,
bool bMax)
1136 m_font =
new Font(
"Century Gothic", 9.0f);
1138 if (!m_rgStyle.ContainsKey(clr))
1140 Color clr1 = Color.FromArgb(128, clr);
1141 Brush br1 =
new SolidBrush(clr1);
1142 Color clr2 = Color.FromArgb(64, clr);
1143 Pen pen =
new Pen(clr2, 1.0f);
1144 Brush br2 =
new SolidBrush(clr2);
1145 Brush brBright =
new SolidBrush(clr);
1146 m_rgStyle.Add(clr,
new Tuple<Brush, Brush, Pen, Brush>(br1, br2, pen, brBright));
1149 Brush brBack = m_rgStyle[clr].Item1;
1150 Brush brFront = m_rgStyle[clr].Item2;
1151 Brush brTop = m_rgStyle[clr].Item4;
1152 Pen penLine = m_rgStyle[clr].Item3;
1154 if (fMin != 0 || fMax != 0)
1156 str =
"Action " + nAction.ToString() +
" (" + fProb.ToString(
"N7") +
")";
1160 str =
"Action " + nAction.ToString() +
" - No Probabilities";
1163 SizeF sz = g.MeasureString(str, m_font);
1165 int nY1 = (int)(nY + (nHt - sz.Height));
1166 int nX1 = (int)(nX + (nWid / 2) - (sz.Width / 2));
1167 g.DrawString(str, m_font, (bMax) ? brTop : brFront,
new Point(nX1, nY1));
1169 if (fMin != 0 || fMax != 0)
1173 nHt -= (int)sz.Height;
1175 float fHt = nHt * fProb;
1176 float fHt1 = nHt - fHt;
1177 RectangleF rc1 =
new RectangleF(fX, nY + fHt1, fWid, fHt);
1178 g.FillRectangle(brBack, rc1);
1179 g.DrawRectangle(penLine, rc1.X, rc1.Y, rc1.Width, rc1.Height);
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
CancelEvent CancelEvent
Returns the CancelEvent used.
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
Solver< T > GetInternalSolver()
Get the internal solver.
ProjectEx CurrentProject
Returns the name of the currently loaded project.
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
void Reset()
Resets the event clearing any signaled state.
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
CancelEvent()
The CancelEvent constructor.
void Set()
Sets the event to the signaled state.
The ColorMapper maps a value within a number range, to a Color within a color scheme.
Color GetColor(double dfVal)
Find the color using a binary search algorithm.
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
double NextDouble()
Returns a random double within the range .
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Log(string strSrc)
The Log constructor.
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
Specifies a key-value pair of properties.
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
override string ToString()
Returns the string representation of the properties.
The SimpleDatum class holds a data input within host memory.
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
void Zero()
Zero out all data in the datum but keep the size and other settings.
DateTime TimeStamp
Get/set the Timestamp.
object Tag
Specifies user data associated with the SimpleDatum.
int Channels
Return the number of channels of the data.
int Index
Returns the index of the SimpleDatum.
The Utility class provides general utility funtions.
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
void SetDiff(double df)
Set all blob diff to the value specified.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
long mutable_gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
T[] mutable_cpu_diff
Get diff from the GPU and bring it over to the host, or Set diff from the Host and send it over to th...
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
List< int > shape()
Returns an array where each element contains the shape of an axis of the Blob.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
long gpu_diff
Returns the diff GPU handle used by the CudaDnn connection.
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
List< Layer< T > > layers
Returns the layers.
double ForwardFromTo(int nStart=0, int nEnd=int.MaxValue)
The FromTo variant of forward and backward operate on the (topological) ordering by which the net is ...
void CopyInternalBlobsTo(Net< T > dstNet)
Copy the internal blobs from one net to another.
void CopyTrainedLayersTo(Net< T > dstNet)
Copies the trained layer of this Net to another Net.
Layer< T > FindLastLayer(LayerParameter.LayerType type)
Find the last layer with the matching type.
virtual void Dispose(bool bDisposing)
Releases all resources (GPU and Host) used by the Net.
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
NetParameter net_param
Returns the net parameter.
Blob< T > blob_by_name(string strName, bool bThrowExceptionOnError=true)
Returns a blob given its name.
The ResultCollection contains the result of a given CaffeControl::Run.
The InnerProductLayer, also know as a 'fully-connected' layer, computes the inner product with a set ...
An interface for the units of computation which can be composed into a Net.
LayerParameter.LayerType type
Returns the LayerType of this Layer.
LayerParameter layer_param
Returns the LayerParameter for this Layer.
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
double Loss
Get/set the externally calculated total loss.
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
bool enable_noise
Enable/disable noise in the inner-product layer (default = false).
Specifies the base parameter for all layers.
List< double > loss_weight
Specifies the loss weight.
InnerProductParameter inner_product_param
Returns the parameter set when initialized with LayerType.INNERPRODUCT
LayerType
Specifies the layer type.
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.
The ConvertOutputArgs is passed to the OnConvertOutput event.
byte[] RawOutput
Specifies the raw output byte stream.
string RawType
Specifies the type of the raw output byte stream.
The GetDataArgs is passed to the OnGetData event to retrieve data.
StateBase State
Specifies the state data of the observations.
The InitializeArgs is passed to the OnInitialize event.
The OverlayArgs is passed ot the OnOverlay event, optionally fired just before displaying a gym image...
Bitmap DisplayImage
Get/set the display image.
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
bool Done
Get/set whether the state is done or not.
double Reward
Get/set the reward of the state.
SimpleDatum Data
Returns other data associated with the state.
int ActionCount
Returns the number of actions.
SimpleDatum Clip
Returns the clip data assoicated with the state.
The WaitArgs is passed to the OnWait event.
The MemoryCollectionFactory is used to create various memory collection types.
static IMemoryCollection CreateMemory(MEMTYPE type, int nMax, float fAlpha=0, string strFile=null)
CreateMemory creates the memory collection type based on the MEMTYPE parameter.
The memory collection stores a set of memory items.
float[] GetInvertedDoneAsOneHotVector()
Returns the inverted done (1 - done) values as a one-hot vector.
List< SimpleDatum > GetNextStateClip()
Returns the list of clip items associated with the next state.
double[] Priorities
Get/set the priorities associated with the collection (if any).
List< SimpleDatum > GetCurrentStateData()
Returns the list of data items associated with the current state.
float[] GetActionsAsOneHotVector(int nActionCount)
Returns the action items as a set of one-hot vectors.
List< SimpleDatum > GetCurrentStateClip()
Returns the list of clip items associated with the current state.
float[] GetRewards()
Returns the rewards as a vector.
List< SimpleDatum > GetNextStateData()
Returns the list of data items associated with the next state.
The MemoryItem stores the information about a given cycle.
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
void OnOverlay(OverlayArgs e)
The OnOverlay callback is called just before displaying the gym image, thus allowing for an overlay t...
void UpdateTargetModel()
The UpdateTargetModel transfers the trained layers from the active Net to the target Net.
bool GetModelUpdated()
Get whether or not the model has been udpated or not.
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
GetDataArgs getDataArgs(Phase phase, int nAction)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
int FrameStack
Specifies the number of frames per X value.
SimpleDatum Preprocess(StateBase s, bool bUseRawInput, out bool bDifferent, bool bReset=false)
Preprocesses the data.
int act(SimpleDatum sd, SimpleDatum sdClip, int nActionCount)
Returns the action from running the model. The action returned is either randomly selected (when usin...
int BatchSize
Returns the batch size defined by the model.
void Dispose()
Release all resources used by the Brain.
void Train(int nIteration, MemoryCollection rgSamples, int nActionCount)
Train the model at the current iteration.
Log Log
Returns the output log.
The DqnAgent both builds episodes from the envrionment and trains on them using the Brain.
DqnAgent(IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase)
The constructor.
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main loop that performs the following steps: 1.) get state 2....
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
void Dispose()
Release all resources used.
The TrainerNoisyDqn implements the Noisy-DQN algorithm as described by Google Dopamine DQNAgent,...
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
TrainerNoisyDqn(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
void Dispose()
Release all resources used.
bool Shutdown(int nWait)
Shutdown the trainer.
bool Initialize()
Initialize the trainer.
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...
The IxTrainerGetDataCallback interface is called right after rendering the output image and just befo...
The IxTrainerRL interface is implemented by each RL Trainer.
The IMemoryCollection interface is implemented by all memory collection types.
void Update(MemoryCollection rgSamples)
Updates the memory collection - currently only used by the Prioritized memory collection to update it...
int Count
Returns the number of items in the memory collection.
void CleanUp()
Performs final clean-up tasks.
void Add(MemoryItem m)
Add a new item to the memory collection.
MemoryCollection GetSamples(CryptoRandom random, int nCount, double dfBeta)
Retrieve a set of samples from the collection.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Phase
Defines the Phase under which to run a Net.
The MyCaffe.common namespace contains common MyCaffe classes.
TRAIN_STEP
Defines the training stepping method (if any).
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
MEMTYPE
Specifies the type of memory collection to use.
The MyCaffe.trainers namespace contains all reinforcement and recurrent learning trainers.
ITERATOR_TYPE
Specifies the iterator type to use.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...