3using System.Collections.Generic;
9using System.Threading.Tasks;
34 List<int> m_rgGpuID =
new List<int>();
46 m_icallback = icallback;
48 m_properties = properties;
52 m_rgGpuID.Add(m_mycaffe.
Cuda.GetDeviceID());
54 string strGpuID = m_properties.
GetProperty(
"GPUIDs",
false);
55 if (strGpuID !=
null && m_nThreads > 1)
57 int nDeviceCount = m_mycaffe.
Cuda.GetDeviceCount();
60 string[] rgstrGpuIDs = strGpuID.Split(
',');
61 foreach (
string strID
in rgstrGpuIDs)
63 int nDevId =
int.Parse(strID);
65 if (nDevId < 0 || nDevId >= nDeviceCount)
66 throw new Exception(
"Invalid device ID - value must be within the range [0," + (nDeviceCount - 1).ToString() +
"].");
68 m_rgGpuID.Add(nDevId);
91 private void wait(
int nWait)
96 while (nTotalWait < nWait)
98 m_icallback.OnWait(
new WaitArgs(nWaitInc));
99 nTotalWait += nWaitInc;
110 if (m_mycaffe !=
null)
116 m_icallback.OnShutdown();
130 Tuple<int,int> res = agent.
Run(nDelay);
132 List<Result> rgActions =
new List<Result>();
133 for (
int i = 0; i < res.Item2; i++)
136 rgActions.Add(
new Result(i, 1.0));
138 rgActions.Add(
new Result(i, 0.0));
157 byte[] rgResults = agent.
Run(nN, out type);
172 string strProp = m_properties.
ToString();
175 strProp +=
"EnableNumSkip=False;";
197 List<Agent<T>> rgAgents =
new List<Agent<T>>();
205 for (
int i = 0; i < m_nThreads; i++)
207 int nGpuID = m_rgGpuID[nGpuIdx];
209 Agent<T> agent =
new Agent<T>(i, m_icallback, m_mycaffe, m_properties, m_random,
Phase.TRAIN, nGpuID, m_nThreads);
214 if (nGpuIdx == m_rgGpuID.Count)
218 if (m_optimizer !=
null)
222 foreach (
Agent<T> agent
in rgAgents)
231 foreach (
Agent<T> agent
in rgAgents)
237 if (m_optimizer !=
null)
239 m_optimizer.
Stop(1000);
251 if (m_optimizer !=
null)
277 m_nCycleDelay = nCycleDelay;
289 get {
return m_step; }
297 get {
return m_nCycleDelay; }
305 get {
return m_phase; }
321 get {
return m_type; }
341 protected ManualResetEvent
m_evtDone =
new ManualResetEvent(
false);
360 protected virtual void doWork(
object arg)
396 double m_dfLearningRate;
397 AutoResetEvent m_evtApplyUpdates =
new AutoResetEvent(
false);
398 ManualResetEvent m_evtDoneApplying =
new ManualResetEvent(
false);
399 object m_syncObj =
new object();
408 m_mycaffePrimary = mycaffePrimary;
423 protected override void doWork(
object arg)
427 m_mycaffePrimary.Cuda.SetDeviceID();
429 List<WaitHandle> rgWait =
new List<WaitHandle>();
430 rgWait.Add(m_evtApplyUpdates);
431 rgWait.AddRange(m_mycaffePrimary.CancelEvent.Handles);
433 int nWait = WaitHandle.WaitAny(rgWait.ToArray());
439 m_mycaffePrimary.CopyGradientsFrom(m_mycaffeWorker);
440 m_mycaffePrimary.Log.Enable =
false;
441 m_dfLearningRate = m_mycaffePrimary.ApplyUpdate(m_nIteration);
442 m_mycaffePrimary.Log.Enable =
true;
446 m_evtDoneApplying.Set();
448 nWait = WaitHandle.WaitAny(rgWait.ToArray());
466 m_mycaffeWorker = mycaffeWorker;
467 m_nIteration = nIteration;
469 m_evtDoneApplying.Reset();
470 m_evtApplyUpdates.Set();
472 List<WaitHandle> rgWait =
new List<WaitHandle>();
473 rgWait.Add(m_evtDoneApplying);
474 rgWait.AddRange(m_mycaffePrimary.CancelEvent.Handles);
476 int nWait = WaitHandle.WaitAny(rgWait.ToArray());
480 return m_dfLearningRate;
496 bool m_bAllowDiscountReset =
false;
497 bool m_bUseRawInput =
false;
499 double m_dfEpsStart = 0;
500 double m_dfEpsEnd = 0;
501 double m_dfExplorationRate = 0;
502 int m_nEpisodeBatchSize = 1;
503 double m_dfEpisodeElitePercentile = 1;
504 static object m_syncObj =
new object();
505 bool m_bShowActionProb =
false;
506 bool m_bVerbose =
false;
527 m_icallback = icallback;
528 m_brain =
new Brain<T>(mycaffe, properties, random, phase, nGpuID, nThreadCount);
530 m_properties = properties;
534 m_bAllowDiscountReset = properties.
GetPropertyAsBool(
"AllowDiscountReset",
false);
544 if (m_dfEpsStart < 0 || m_dfEpsStart > 1)
545 throw new Exception(
"The 'EpsStart' is out of range - please specify a real number in the range [0,1]");
547 if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1)
548 throw new Exception(
"The 'EpsEnd' is out of range - please specify a real number in the range [0,1]");
550 if (m_dfEpsEnd > m_dfEpsStart)
551 throw new Exception(
"The 'EpsEnd' must be less than the 'EpsStart' value.");
576 protected override void doWork(
object arg)
592 catch (Exception excpt)
600 private double getEpsilon(
int nEpisode)
602 if (m_nEpsSteps == 0)
605 if (nEpisode >= m_nEpsSteps)
608 return m_dfEpsStart + (double)(nEpisode * (m_dfEpsEnd - m_dfEpsStart)/m_nEpsSteps);
611 private StateBase getData(
Phase phase,
int nIdx,
int nAction,
bool? bResetOverride =
null)
614 m_icallback.OnGetData(args);
622 m_dfExplorationRate = getEpsilon(nEpisode);
624 if (m_dfExplorationRate > 0 && m_random.
NextDouble() < m_dfExplorationRate)
626 rgfAprob =
new float[nActionCount];
627 int nAction = m_random.
Next(nActionCount);
628 rgfAprob[nAction] = 1.0f;
633 return m_brain.
act(sd, sdClip, out rgfAprob);
636 private int updateStatus(
int nIteration,
int nEpisodeCount,
double dfRunningReward,
double dfRewardSum,
double dfLoss,
double dfLearningRate)
638 GetStatusArgs args =
new GetStatusArgs(
m_nIndex, nIteration, nEpisodeCount, 1000000, dfRunningReward, dfRewardSum, m_dfExplorationRate, 0, dfLoss, dfLearningRate);
639 m_icallback.OnUpdateStatus(args);
640 return args.NewFrameCount;
648 public Tuple<int, int>
Run(
int nDelay = 1000)
652 Thread.Sleep(nDelay);
659 int a = m_brain.
act(state.
Data, state.
Clip, out rgfAprob);
670 public byte[]
Run(
int nIterations, out
string type)
673 if (icallback ==
null)
674 throw new Exception(
"The Run method requires an IxTrainerCallbackRNN interface to convert the results into the native format!");
680 List<float> rgResults =
new List<float>();
683 while (!m_brain.
Cancel.
WaitOne(0) && (nIterations == -1 || nIteration < nIterations))
690 int nAction = m_brain.
act(x, s.
Clip, out rgfAprob);
692 if (m_bShowActionProb && m_bVerbose)
694 string strOut =
"Action Prob: " +
Utility.ToString<
float>(rgfAprob.ToList(), 4) +
" -> " + nAction.ToString();
700 int nData1Idx = s.
Data.
ItemCount - (nItemLen * (nLookahead + 1));
704 rgResults.Add(nAction);
720 private bool isAtIteration(
int nN,
ITERATOR_TYPE type,
int nIteration,
int nEpisode)
756 double? dfRunningReward =
null;
757 double dfEpisodeReward = 0;
765 while (!m_brain.
Cancel.
WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
772 int action = getAction(nIteration, x, s.
Clip, s.
ActionCount, step, out rgfAprob);
774 if (m_bShowActionProb && m_bVerbose)
776 string strOut =
"Action Prob: " +
Utility.ToString<
float>(rgfAprob.ToList(), 4) +
" -> " + action.ToString();
785 dfEpisodeReward += s_.
Reward;
787 if (phase ==
Phase.TRAIN)
798 if (rgMemoryCache.
Add(rgMemory))
800 if (m_bShowActionProb)
805 for (
int i=0; i<rgMemoryCache.
Count; i++)
807 Memory rgMemory1 = rgMemoryCache[i];
833 List<Datum> rgData = rgMemory1.
GetData();
834 List<Datum> rgClip = rgMemory1.
GetClip();
836 m_brain.
SetData(rgData, rgClip);
838 bool bApplyGradients = (i == rgMemoryCache.
Count - 1) ?
true :
false;
839 m_brain.
Train(nIteration, step, bApplyGradients);
842 if (!dfRunningReward.HasValue)
843 dfRunningReward = dfEpisodeReward;
845 dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
847 nEpisode = updateStatus(nIteration, nEpisode, dfRunningReward.Value, dfEpisodeReward, m_brain.
LastLoss, m_brain.
LearningRate);
851 rgMemoryCache.
Clear();
872 if (!dfRunningReward.HasValue)
873 dfRunningReward = dfEpisodeReward;
875 dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
877 nEpisode = updateStatus(nIteration, nEpisode, dfRunningReward.Value, dfEpisodeReward, m_brain.
LastLoss, m_brain.
LearningRate);
907 bool m_bSoftmaxCeSetup =
false;
920 int m_nMiniBatch = 10;
922 double m_dfLastLoss = 0;
923 double m_dfLearningRate = 0;
926 int m_nThreadCount = 1;
927 bool m_bCreated =
false;
928 bool m_bUseAcceleratedTraining =
false;
929 int m_nRecurrentSequenceLength = 0;
930 List<Datum> m_rgData =
null;
931 List<Datum> m_rgClip =
null;
949 m_properties = properties;
953 m_nThreadCount = nThreadCount;
954 m_mycaffePrimary = mycaffe;
958 m_nMiniBatch = nMiniBatch;
964 m_dfLearningRate = dfRate.Value;
966 m_bUseAcceleratedTraining = properties.
GetPropertyAsBool(
"UseAcceleratedTraining",
false);
977 m_mycaffePrimary.Log.Enable =
false;
979 if (m_nThreadCount == 1)
981 m_mycaffeWorker = m_mycaffePrimary;
982 m_mycaffePrimary.
Cuda.SetDeviceID();
986 m_mycaffeWorker = m_mycaffePrimary.
Clone(m_nGpuID);
989 m_mycaffePrimary.Log.Enable =
true;
991 m_mycaffeWorker.
Cuda.SetDeviceID();
1000 if (m_memData ==
null)
1001 throw new Exception(
"Could not find the MemoryData Layer!");
1003 if (m_memLoss ==
null && m_phase !=
Phase.RUN)
1004 throw new Exception(
"Could not find the MemoryLoss Layer!");
1008 if (m_memLoss !=
null)
1009 m_memLoss.
OnGetLoss += memLoss_OnGetLoss;
1011 m_blobDiscountedR =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1012 m_blobPolicyGradient =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1013 m_blobActionOneHot =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1014 m_blobDiscountedR1 =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1015 m_blobPolicyGradient1 =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1016 m_blobActionOneHot1 =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1017 m_blobLoss =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1018 m_blobAprobLogit =
new Blob<T>(m_mycaffeWorker.
Cuda, m_mycaffeWorker.
Log);
1020 if (m_softmax !=
null)
1030 m_colAccumulatedGradients.
SetDiff(0);
1035 private void dispose(ref
Blob<T> b)
1049 if (m_memLoss !=
null)
1050 m_memLoss.
OnGetLoss -= memLoss_OnGetLoss;
1052 if (m_memData !=
null)
1055 dispose(ref m_blobDiscountedR);
1056 dispose(ref m_blobPolicyGradient);
1057 dispose(ref m_blobActionOneHot);
1058 dispose(ref m_blobDiscountedR1);
1059 dispose(ref m_blobPolicyGradient1);
1060 dispose(ref m_blobActionOneHot1);
1061 dispose(ref m_blobLoss);
1062 dispose(ref m_blobAprobLogit);
1064 if (m_colAccumulatedGradients !=
null)
1066 m_colAccumulatedGradients.
Dispose();
1067 m_colAccumulatedGradients =
null;
1070 if (m_mycaffeWorker != m_mycaffePrimary && m_mycaffeWorker !=
null)
1071 m_mycaffeWorker.Dispose();
1073 m_mycaffeWorker =
null;
1081 get {
return m_nRecurrentSequenceLength; }
1089 get {
return m_mycaffePrimary.
Log; }
1097 get {
return (m_softmax ==
null) ? false :
true; }
1107 int nNum = mem.
Count;
1108 int nChannels = mem[0].Data.Channels;
1109 int nHeight = mem[0].Data.Height;
1110 int nWidth = mem[0].Data.Height;
1111 int nActionProbs = 1;
1119 nActionProbs = Math.Max(nCh, nActionProbs);
1125 throw new Exception(
"Could not find a non-loss output! Your model should output the loss and the action probabilities.");
1127 m_blobDiscountedR.
Reshape(nNum, nActionProbs, 1, 1);
1128 m_blobPolicyGradient.
Reshape(nNum, nActionProbs, 1, 1);
1129 m_blobActionOneHot.
Reshape(nNum, nActionProbs, 1, 1);
1130 m_blobDiscountedR1.
Reshape(nNum, nActionProbs, 1, 1);
1131 m_blobPolicyGradient1.
Reshape(nNum, nActionProbs, 1, 1);
1132 m_blobActionOneHot1.
Reshape(nNum, nActionProbs, 1, 1);
1133 m_blobLoss.
Reshape(1, 1, 1, 1);
1135 return nActionProbs;
1144 double dfMean = m_blobDiscountedR.
mean(rg);
1145 double dfStd = m_blobDiscountedR.
std(dfMean, rg);
1146 int nC = m_blobDiscountedR.
channels;
1151 List<float> rgR =
new List<float>();
1153 for (
int i = 0; i < rg.Length; i++)
1155 for (
int j = 0; j < nC; j++)
1191 public void SetData(List<Datum> rgData, List<Datum> rgClip)
1193 if (m_nRecurrentSequenceLength != 1 && rgData.Count > 1 && rgClip !=
null)
1216 bool bReset = (nAction == -1) ?
true :
false;
1217 return new GetDataArgs(phase, nIdx, m_mycaffePrimary, m_mycaffePrimary.Log, m_mycaffePrimary.CancelEvent, bReset, nAction,
false,
true);
1225 get {
return m_dfLastLoss; }
1233 get {
return m_dfLearningRate; }
1241 get {
return m_mycaffePrimary.
Log; }
1265 if (m_sdLast ==
null)
1285 List<Datum> rgData =
new List<Datum>();
1286 rgData.Add(
new Datum(sd));
1287 List<Datum> rgClip =
null;
1291 rgClip =
new List<Datum>();
1292 rgClip.Add(
new Datum(sdClip));
1296 float fRandom = (float)m_random.
NextDouble();
1301 m_bSkipLoss =
false;
1305 for (
int i = 0; i < res.
Count; i++)
1311 if (m_nRecurrentSequenceLength > 1 && res[i].num > 1)
1313 int nCount = res[i].count();
1314 int nOutput = nCount / res[i].num;
1315 nStart = nCount - nOutput;
1318 throw new Exception(
"The start must be zero or greater!");
1321 rgfAprob =
Utility.ConvertVecF<T>(res[i].update_cpu_data(), nStart);
1326 if (rgfAprob ==
null)
1327 throw new Exception(
"Could not find a non-loss output! Your model should output the loss and the action probabilities.");
1331 for (
int i = 0; i < rgfAprob.Length; i++)
1333 fSum += rgfAprob[i];
1339 if (rgfAprob.Length == 1)
1342 return rgfAprob.Length - 1;
1353 int nCount = dst.
count();
1354 dst.
CopyFrom(src, nIdx * nCount, 0, nCount,
true,
false);
1366 if (m_nRecurrentSequenceLength != 1 && m_rgData !=
null && m_rgData.Count > 1 && m_rgClip !=
null)
1368 prepareBlob(m_blobActionOneHot1, m_blobActionOneHot);
1369 prepareBlob(m_blobDiscountedR1, m_blobDiscountedR);
1370 prepareBlob(m_blobPolicyGradient1, m_blobPolicyGradient);
1372 for (
int i = 0; i < m_rgData.Count; i++)
1374 copyBlob(i, m_blobActionOneHot1, m_blobActionOneHot);
1375 copyBlob(i, m_blobDiscountedR1, m_blobDiscountedR);
1376 copyBlob(i, m_blobPolicyGradient1, m_blobPolicyGradient);
1378 List<Datum> rgData1 =
new List<Datum>() { m_rgData[i] };
1379 List<Datum> rgClip1 =
new List<Datum>() { m_rgClip[i] };
1383 m_solver.
Step(1, step,
true, m_bUseAcceleratedTraining,
true,
true);
1387 m_blobActionOneHot.
ReshapeLike(m_blobActionOneHot1);
1388 m_blobDiscountedR.
ReshapeLike(m_blobDiscountedR1);
1389 m_blobPolicyGradient.
ReshapeLike(m_blobPolicyGradient1);
1396 m_solver.
Step(1, step,
true, m_bUseAcceleratedTraining,
true,
true);
1400 if (nIteration % m_nMiniBatch == 0 || bApplyGradients || step ==
TRAIN_STEP.BACKWARD || step ==
TRAIN_STEP.BOTH)
1403 m_colAccumulatedGradients.
SetDiff(0);
1405 if (m_mycaffePrimary == m_mycaffeWorker)
1407 m_dfLearningRate = m_solver.
ApplyUpdate(nIteration);
1420 private T[] unpackLabel(
Datum d)
1450 List<int> rgDataShape = e.
Data.shape();
1451 List<int> rgClipShape = e.
Clip.shape();
1452 List<int> rgLabelShape = e.
Label.shape();
1454 int nSeqLen = rgDataShape[0];
1456 e.
Data.Log.CHECK_GT(nSeqLen, 0,
"The sequence lenth must be greater than zero!");
1457 e.
Data.Log.CHECK_EQ(nBatch, e.
ClipItems.Count,
"The data and clip should have the same number of items.");
1458 e.
Data.Log.CHECK_EQ(nSeqLen, rgClipShape[0],
"The data and clip should have the same sequence count.");
1460 rgDataShape[1] = nBatch;
1461 rgClipShape[1] = nBatch;
1462 rgLabelShape[1] = nBatch;
1464 e.
Data.Reshape(rgDataShape);
1465 e.
Clip.Reshape(rgClipShape);
1466 e.
Label.Reshape(rgLabelShape);
1468 T[] rgRawData =
new T[e.
Data.count()];
1469 T[] rgRawClip =
new T[e.
Clip.count()];
1470 T[] rgRawLabel =
new T[e.
Label.count()];
1472 int nDataSize = e.
Data.count(2);
1473 T[] rgDataItem =
new T[nDataSize];
1477 for (
int i = 0; i < nBatch; i++)
1482 T[] rgLabel = unpackLabel(data);
1484 for (
int j = 0; j < nSeqLen; j++)
1486 dfClip = clip.GetDataAt<T>(j);
1488 for (
int k = 0; k < nDataSize; k++)
1490 rgDataItem[k] = data.GetDataAt<T>(j * nDataSize + k);
1496 nIdx = nBatch * j + i;
1501 nIdx = i * nBatch + j;
1503 Array.
Copy(rgDataItem, 0, rgRawData, nIdx * nDataSize, nDataSize);
1504 rgRawClip[nIdx] = dfClip;
1506 if (rgLabel !=
null)
1508 if (rgLabel.Length == nSeqLen)
1509 rgRawLabel[nIdx] = rgLabel[j];
1510 else if (rgLabel.Length == 1)
1512 if (j == nSeqLen - 1)
1513 rgRawLabel[0] = rgLabel[0];
1517 throw new Exception(
"The Solver SequenceLength parameter does not match the actual sequence length! The label length '" + rgLabel.Length.ToString() +
"' must be either '1' for SINGLE labels, or the sequence length of '" + nSeqLen.ToString() +
"' for MULTI labels. Stopping training.");
1523 e.
Data.mutable_cpu_data = rgRawData;
1524 e.
Clip.mutable_cpu_data = rgRawClip;
1525 e.
Label.mutable_cpu_data = rgRawLabel;
1526 m_nRecurrentSequenceLength = nSeqLen;
1550 int nCount = m_blobActionOneHot.
count();
1551 long hActionOneHot = m_blobActionOneHot.
gpu_data;
1552 long hPolicyGrad = 0;
1553 long hDiscountedR = m_blobDiscountedR.
gpu_data;
1555 int nDataSize = e.
Bottom[0].count(1);
1556 bool bUsingEndData =
false;
1560 if (m_nRecurrentSequenceLength > 1)
1567 List<int> rgShape = e.
Bottom[0].shape();
1569 e.
Bottom[0].Reshape(rgShape);
1570 e.
Bottom[0].CopyFrom(m_blobAprobLogit, (m_blobAprobLogit.
num - 1) * nDataSize, 0, nDataSize,
true,
true);
1571 bUsingEndData =
true;
1575 long hBottomDiff = e.
Bottom[0].mutable_gpu_diff;
1578 if (m_softmax !=
null)
1584 colBottom.
Add(m_blobActionOneHot);
1585 colTop.
Add(m_blobLoss);
1586 colTop.
Add(m_blobPolicyGradient);
1588 if (!m_bSoftmaxCeSetup)
1590 m_softmaxCe.
Setup(colBottom, colTop);
1591 m_bSoftmaxCeSetup =
true;
1594 dfLoss = m_softmaxCe.
Forward(colBottom, colTop);
1595 m_softmaxCe.
Backward(colTop,
new List<bool>() {
true,
false }, colBottom);
1596 hPolicyGrad = colBottom[0].gpu_diff;
1603 m_mycaffeWorker.
Cuda.add_scalar(nCount, -1.0, hActionOneHot);
1604 m_mycaffeWorker.
Cuda.abs(nCount, hActionOneHot, hActionOneHot);
1605 m_mycaffeWorker.
Cuda.mul_scalar(nCount, -1.0, hPolicyGrad);
1606 m_mycaffeWorker.
Cuda.add(nCount, hActionOneHot, hPolicyGrad, hPolicyGrad);
1609 m_mycaffeWorker.
Cuda.mul_scalar(nCount, -1.0, hPolicyGrad);
1613 m_mycaffeWorker.
Cuda.mul(nCount, hPolicyGrad, hDiscountedR, hPolicyGrad);
1618 if (hPolicyGrad != hBottomDiff)
1619 m_mycaffeWorker.
Cuda.copy(nCount, hPolicyGrad, hBottomDiff);
1623 if (m_nRecurrentSequenceLength > 1 && bUsingEndData)
1626 m_blobAprobLogit.
CopyFrom(e.
Bottom[0], 0, (m_blobAprobLogit.
num - 1) * nDataSize, nDataSize,
false,
true);
1627 e.
Bottom[0].CopyFrom(m_blobAprobLogit,
false,
true);
1628 e.
Bottom[0].CopyFrom(m_blobAprobLogit,
true);
1631 m_dfLastLoss = e.
Loss;
1641 List<Memory> m_rgMemory =
new List<Memory>();
1657 get {
return m_rgMemory.Count; }
1667 get {
return m_rgMemory[nIdx]; }
1677 m_rgMemory.Add(mem);
1679 if (m_rgMemory.Count == m_nMax)
1699 if (dfElitePercent <= 0.0 || dfElitePercent >= 1.0)
1702 double dfMin = m_rgMemory.Min(p => p.RewardSum);
1703 double dfMax = m_rgMemory.Max(p => p.RewardSum);
1704 double dfRange = dfMax - dfMin;
1705 double dfCutoff = dfMin + ((1.0 - dfElitePercent) * dfRange);
1706 List<Memory> rgMem = m_rgMemory.OrderByDescending(p => p.RewardSum).ToList();
1707 List<Memory> rgElite =
new List<Memory>();
1709 for (
int i = 0; i < rgMem.Count; i++)
1711 double dfSum = rgMem[i].RewardSum;
1713 if (dfSum >= dfCutoff)
1714 rgElite.Add(rgMem[i]);
1719 m_rgMemory = rgElite;
1728 return m_rgMemory.GetEnumerator();
1735 IEnumerator IEnumerable.GetEnumerator()
1737 return m_rgMemory.GetEnumerator();
1746 List<MemoryItem> m_rgItems =
new List<MemoryItem>();
1747 int m_nEpisodeNumber = 0;
1748 double m_dfRewardSum = 0;
1762 get {
return m_rgItems.Count; }
1774 m_dfRewardSum += item.
Reward;
1775 m_rgItems.Add(item);
1794 get {
return m_rgItems[nIdx]; }
1795 set { m_rgItems[nIdx] = value; }
1803 get {
return m_nEpisodeNumber; }
1804 set { m_nEpisodeNumber = value; }
1812 get {
return m_dfRewardSum; }
1813 set { m_dfRewardSum = value; }
1824 float[] rgR = m_rgItems.Select(p => p.Reward).ToArray();
1825 float fRunningAdd = 0;
1826 float[] rgDiscountedR =
new float[rgR.Length];
1828 for (
int t =
Count - 1; t >= 0; t--)
1830 if (bAllowReset && rgR[t] != 0)
1833 fRunningAdd = fRunningAdd * fGamma + rgR[t];
1834 rgDiscountedR[t] = fRunningAdd;
1837 return rgDiscountedR;
1849 List<float> rgfAprob =
new List<float>();
1851 for (
int i = 0; i < m_rgItems.Count; i++)
1853 rgfAprob.AddRange(m_rgItems[i].Aprob);
1856 return rgfAprob.ToArray();
1865 List<float> rgfAonehot =
new List<float>();
1867 for (
int i = 0; i < m_rgItems.Count; i++)
1869 float[] rgfOneHot =
new float[m_rgItems[0].Aprob.Length];
1871 if (rgfOneHot.Length == 1)
1872 rgfOneHot[0] = m_rgItems[i].Action;
1874 rgfOneHot[m_rgItems[i].Action] = 1;
1876 rgfAonehot.AddRange(rgfOneHot);
1879 return rgfAonehot.ToArray();
1888 List<Datum> rgData =
new List<Datum>();
1890 for (
int i = 0; i < m_rgItems.Count; i++)
1892 rgData.Add(
new Datum(m_rgItems[i].Data));
1904 if (m_rgItems.Count == 0)
1907 if (m_rgItems[0].State.Clip ==
null)
1910 List<Datum> rgData =
new List<Datum>();
1912 for (
int i = 0; i < m_rgItems.Count; i++)
1914 if (m_rgItems[i].State.Clip ==
null)
1917 rgData.Add(
new Datum(m_rgItems[i].State.Clip));
1947 m_nAction = nAction;
1948 m_rgfAprob = rgfAprob;
1949 m_fReward = fReward;
1957 get {
return m_state; }
1973 get {
return m_nAction; }
1981 get {
return m_fReward; }
1989 get {
return m_rgfAprob; }
1998 return "action = " + m_nAction.ToString() +
" reward = " + m_fReward.ToString(
"N2") +
" aprob = " + tostring(m_rgfAprob);
2001 private string tostring(
float[] rg)
2005 for (
int i = 0; i < rg.Length; i++)
2007 str += rg[i].ToString(
"N5");
2011 str = str.TrimEnd(
',');
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
CancelEvent CancelEvent
Returns the CancelEvent used.
Net< T > GetInternalNet(Phase phase=Phase.RUN)
Returns the internal net based on the Phase specified: TRAIN, TEST or RUN.
void CopyWeightsFrom(MyCaffeControl< T > src)
Copy the learnable parameter data from the source MyCaffeControl into this one.
Solver< T > GetInternalSolver()
Get the internal solver.
MyCaffeControl< T > Clone(int nGpuID)
Clone the current instance of the MyCaffeControl creating a second instance.
Log Log
Returns the Log (for output) used.
CudaDnn< T > Cuda
Returns the CudaDnn connection used.
ProjectEx CurrentProject
Returns the name of the currently loaded project.
The BinaryData class is used to pack and unpack DataCriteria binary data, optionally stored within ea...
static List< double > UnPackDoubleList(byte[] rg, DATA_FORMAT fmtExpected)
Unpack the byte array into a list of double values.
static List< float > UnPackFloatList(byte[] rg, DATA_FORMAT fmtExpected)
Unpack the byte array into a list of float values.
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
void Reset()
Resets the event clearing any signaled state.
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
CancelEvent()
The CancelEvent constructor.
void Set()
Sets the event to the signaled state.
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
int Next(int nMinVal, int nMaxVal, bool bMaxInclusive=true)
Returns a random int within the range
double NextDouble()
Returns a random double within the range .
The Datum class is a simple wrapper to the SimpleDatum class to ensure compatibility with the origina...
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
double Progress
Get/set the progress associated with the Log.
void WriteError(Exception e)
Write an error as output.
Log(string strSrc)
The Log constructor.
double? GetSolverSettingAsNumeric(string strParam)
Get a setting from the solver descriptor as a double value.
int GetBatchSize(Phase phase)
Returns the batch size of the project used in a given Phase.
Specifies a key-value pair of properties.
string GetProperty(string strName, bool bThrowExceptions=true)
Returns a property as a string value.
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
bool GetPropertyAsBool(string strName, bool bDefault=false)
Returns a property as a boolean value.
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
override string ToString()
Returns the string representation of the properties.
The Result class contains a single result.
The SimpleDatum class holds a data input within host memory.
void Copy(SimpleDatum d, bool bCopyData, int? nHeight=null, int? nWidth=null)
Copy another SimpleDatum into this one.
float GetDataAtF(int nIdx)
Returns the item at a specified index in the float type.
bool Sub(SimpleDatum sd, bool bSetNegativeToZero=false)
Subtract the data of another SimpleDatum from this one, so this = this - sd.
void Zero()
Zero out all data in the datum but keep the size and other settings.
int ItemCount
Returns the number of data items.
DateTime TimeStamp
Get/set the Timestamp.
byte[] DataCriteria
Get/set data criteria associated with the data.
DATA_FORMAT
Defines the data format of the DebugData and DataCriteria when specified.
DATA_FORMAT DataCriteriaFormat
Get/set the data format of the data criteria.
The Utility class provides general utility funtions.
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void Accumulate(CudaDnn< T > cuda, BlobCollection< T > src, bool bAccumulateDiff)
Accumulate the diffs from one BlobCollection into another.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
The Blob is the main holder of data that moves through the Layers of the Net.
int channels
DEPRECIATED; legacy shape accessor channels: use shape(1) instead.
void SetData(T[] rgData, int nCount=-1, bool bSetCount=true)
Sets a number of items within the Blob's data.
int height
DEPRECIATED; legacy shape accessor height: use shape(2) instead.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
double std(double? dfMean=null, float[] rgDf=null)
Calculate the standard deviation of the blob data.
double mean(float[] rgDf=null, bool bDiff=false)
Calculate the mean of the blob data.
void CopyFrom(Blob< T > src, int nSrcOffset, int nDstOffset, int nCount, bool bCopyData, bool bCopyDiff)
Copy from a source Blob.
int width
DEPRECIATED; legacy shape accessor width: use shape(3) instead.
T sumsq_data()
Calcualte the sum of squares (L2 norm squared) of the data.
void NormalizeData(double? dfMean=null, double? dfStd=null)
Normalize the blob data by subtracting the mean and dividing by the standard deviation.
int count()
Returns the total number of items in the Blob.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
void SetDiff(double dfVal, int nIdx=-1)
Either sets all of the diff items in the Blob to a given value, or alternatively only sets a single i...
int num
DEPRECIATED; legacy shape accessor num: use shape(0) instead.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
BlobCollection< T > Forward()
Run forward with the input Blob's already fed separately.
Layer< T > FindLayer(LayerParameter.LayerType? type, string strName)
Find the layer with the matching type, name and or both.
BlobCollection< T > output_blobs
Returns the collection of output Blobs.
void ClearParamDiffs()
Zero out the diffs of all netw parameters. This should be run before Backward.
BlobCollection< T > learnable_parameters
Returns the learnable parameters.
The ResultCollection contains the result of a given CaffeControl::Run.
void Backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Given the top Blob error gradients, compute the bottom Blob error gradients.
double Forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Given the bottom (input) Blobs, this function computes the top (output) Blobs and the loss.
void Setup(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Implements common Layer setup functionality.
The MemoryDataLayer provides data to the Net from memory. This layer is initialized with the MyCaffe....
virtual void AddDatumVector(Datum[] rgData, Datum[] rgClip=null, int nLblAxis=1, bool bReset=false, bool bResizeBatch=false)
This method is used to add a list of Datums to the memory.
EventHandler< MemoryDataLayerPackDataArgs< T > > OnDataPack
The OnDataPack event fires from within the AddDatumVector method and is used to pack the data into a ...
The MemoryDataLayerPackDataArgs is passed to the OnDataPack event which fires each time the data rece...
Blob< T > Label
Returns the label data to fill with ordered label information.
Blob< T > Clip
Returns the clip data to fill with ordered data for clipping.
List< Datum > ClipItems
Returns the raw clip items to use to fill.
LayerParameter.LayerType LstmType
Returns the LSTM type.
Blob< T > Data
Returns the blob data to fill with ordered data.
List< Datum > DataItems
Returns the raw data items to use to fill.
The MemoryLossLayerGetLossArgs class is passed to the OnGetLoss event.
bool EnableLossUpdate
Get/set enabling the loss update within the backpropagation pass.
double Loss
Get/set the externally calculated total loss.
BlobCollection< T > Bottom
Specifies the bottom passed in during the forward pass.
The MemoryLossLayer provides a method of performing a custom loss functionality. Similar to the Memor...
EventHandler< MemoryLossLayerGetLossArgs< T > > OnGetLoss
The OnGetLoss event fires during each forward pass. The value returned is saved, and applied on the b...
The SoftmaxCrossEntropyLossLayer computes the cross-entropy (logisitic) loss and is often used for pr...
The SoftmaxLayer computes the softmax function. This layer is initialized with the MyCaffe....
Specifies the base parameter for all layers.
List< double > loss_weight
Specifies the loss weight.
LayerType
Specifies the layer type.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
NormalizationMode? normalization
Specifies the normalization mode (default = VALID).
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
abstract double ApplyUpdate(int nIterationOverride=-1)
Make and apply the update value for the current iteration.
The ApplyUpdateArgs is passed to the OnApplyUpdates event.
int Iteration
Returns the iteration from which the gradients are to be applied.
double LearningRate
Returns the learning rate at the time the gradients were applied.
MyCaffeControl< T > MyCaffeWorker
Returns the MyCaffe worker instance whos gradients are to be applied.
The ConvertOutputArgs is passed to the OnConvertOutput event.
byte[] RawOutput
Specifies the raw output byte stream.
string RawType
Specifies the type of the raw output byte stream.
The GetDataArgs is passed to the OnGetData event to retrieve data.
StateBase State
Specifies the state data of the observations.
The InitializeArgs is passed to the OnInitialize event.
The StateBase is the base class for the state of each observation - this is defined by actual trainer...
bool Done
Get/set whether the state is done or not.
double Reward
Get/set the reward of the state.
SimpleDatum Data
Returns other data associated with the state.
int ActionCount
Returns the number of actions.
SimpleDatum Clip
Returns the clip data assoicated with the state.
The WaitArgs is passed to the OnWait event.
The Agent both builds episodes from the envrionment and trains on them using the Brain.
void Dispose()
Release all resources used.
byte[] Run(int nIterations, out string type)
Run the action on a set number of iterations and return the results with no training.
override void doWork(object arg)
This is the main agent thread that runs the agent.
Agent(int nIdx, IxTrainerCallback icallback, MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)
The constructor.
EventHandler< ApplyUpdateArgs< T > > OnApplyUpdates
The OnApplyUpdates event fires each time the Agent needs to apply its updates to the primary instance...
void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The Run method provides the main 'actor' loop that performs the following steps: 1....
Tuple< int, int > Run(int nDelay=1000)
Run a single action on the model.
The Brain uses the instance of MyCaffe (e.g. the open project) to run new actions and train the netwo...
void SetDiscountedR(float[] rg)
Sets the discounted returns in the Discounted Returns Blob.
EventHandler< ApplyUpdateArgs< T > > OnApplyUpdate
The OnApplyUpdate event fires when the Brain needs to apply its gradients to the primary instance of ...
double LearningRate
Return the learning rate used.
double LastLoss
Return the last loss received.
int Reshape(Memory mem)
Reshape all Blobs used based on the Memory specified.
GetDataArgs getDataArgs(Phase phase, int nIdx, int nAction, bool? bResetOverride=null)
Returns the GetDataArgs used to retrieve new data from the envrionment implemented by derived parent ...
void Train(int nIteration, TRAIN_STEP step, bool bApplyGradients=true)
Train the model at the current iteration.
int act(SimpleDatum sd, SimpleDatum sdClip, out float[] rgfAprob)
Returns the action from running the model. The action returned is either randomly selected (when usin...
void SetData(List< Datum > rgData, List< Datum > rgClip)
Add the data to the model by adding it to the MemoryData layer.
CancelEvent Cancel
Returns the Cancel event used to cancel all MyCaffe tasks.
bool? UsesSoftMax
Returns true if the current model uses a SoftMax, false otherwise.
Brain(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, Phase phase, int nGpuID, int nThreadCount)
The constructor.
Log OutputLog
Returns the primary MyCaffe output log for writing output information.
int RecurrentSequenceLength
Returns the recurrent sequence length detected when training a recurrent network, otherwise 0 is retu...
void Dispose()
Release all resources used by the Brain.
void SetActionProbabilities(float[] rg)
Set the action probabilities in the Policy Gradient Blob.
void Create()
Create the Brain CUDA objects - this is called on the thread from which the Brain runs.
SimpleDatum Preprocess(StateBase s, bool bUseRawInput)
Preprocesses the data.
void SetActionOneHotVectors(float[] rg)
Set the action one-hot vectors in the Action OneHot Vector Blob.
Contains the best memory episodes (best by highest total rewards)
void Clear()
Clear all items from the memory cache.
bool Add(Memory mem)
Add a new episode to the memory cache.
void PurgeNonElite(double dfElitePercent)
Purge all non elite episodes.
int Count
Returns the number of items in the cache.
MemoryCache(int nMax)
Constructor.
IEnumerator< Memory > GetEnumerator()
Returns the enumerator.
Specifies a single Memory (e.g. an episode).
int EpisodeNumber
Get/set the episode number of this memory.
void Add(MemoryItem item)
Add a new item to the memory.
float[] GetActionProbabilities()
Retrieve the action probabilities of the episode.
double RewardSum
Get/set the reward sum of this memory.
float[] GetDiscountedRewards(float fGamma, bool bAllowReset)
Retrieve the discounted rewards for this episode.
void Clear()
Remove all items in the list.
int Count
Returns the number of memory items in the memory.
List< Datum > GetClip()
Returns the clip data if it exists, or null.
float[] GetActionOneHotVectors()
Retrieve the action one-hot vectors for the episode.
List< Datum > GetData()
Retrieve the data of each step in the episode.
The MemoryItem stores the information for one step in an episode.
float[] Aprob
Returns the action probabilities which are only used with non-Softmax models.
int Action
Returns the action of this episode step.
float Reward
Returns the reward for taking the action in this episode step.
SimpleDatum Data
Returns the pre-processed data (run through the model) of this episode step.
MemoryItem(StateBase s, SimpleDatum x, int nAction, float[] rgfAprob, float fReward)
The constructor.
StateBase State
Returns the state and data of this episode step.
override string ToString()
Returns the string representation of this episode step.
The Optimizer manages a single thread used to apply updates to the primary instance of MyCaffe....
Optimizer(MyCaffeControl< T > mycaffePrimary)
The constructor.
void Dispose()
Release all resources used.
double ApplyUpdates(MyCaffeControl< T > mycaffeWorker, int nIteration)
The ApplyUpdates function sets the parameters, signals the Apply Updates thread, blocks for the opera...
override void doWork(object arg)
This override is the thread used to apply all updates, its CUDA DeviceID is set to the same device ID...
The TrainerPG implements a simple Policy Gradient trainer inspired by Andrej Karpathy's blog posed re...
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network using a modified PG training algorithm optimized for GPU use.
void Dispose()
Releases all resources used.
TrainerPG(MyCaffeControl< T > mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallback icallback)
The constructor.
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the environment after the delay.
byte[] Run(int nN, PropertySet runProp, out string type)
Run a set of iterations and return the resuts.
bool Shutdown(int nWait)
Shutdown the trainer.
bool Initialize()
Initialize the trainer.
bool Test(int nN, ITERATOR_TYPE type)
Run the test cycle - currently this is not implemented.
The Worker class provides the base class for both the Environment and Optimizer and provides the basi...
AutoResetEvent m_evtCancel
Specfies the cancel event used to cancel this worker.
ManualResetEvent m_evtDone
Specfies the done event set when this worker completes.
int m_nIndex
Specifies the index of this worker.
virtual void doWork(object arg)
This is the actual thread function that is overriden by each derivative class.
Task m_workTask
Specifies the worker task that runs the thread function.
void Start(WorkerStartArgs args)
Start running the thread.
void Stop(int nWait)
Stop running the thread.
Worker(int nIdx)
The constructor.
The WorkerStartArgs provides the arguments used when starting the agent thread.
WorkerStartArgs(int nCycleDelay, Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
The constructor.
TRAIN_STEP Step
Returns the training step to take (if any). This is used for debugging.
int CycleDelay
Returns the cycle delay which specifies the amount of time to wait for a cancel.
Phase Phase
Return the phase on which to run.
ITERATOR_TYPE IterationType
Returns the iteration type.
int N
Returns the maximum number of episodes to run.
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
The IxTrainerCallbackRNN provides functions used by each trainer to 'call-back' to the parent for inf...
void OnConvertOutput(ConvertOutputArgs e)
The OnConvertOutput callback fires from within the Run method and is used to convert the network's ou...
The IxTrainerRL interface is implemented by each RL Trainer.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Phase
Defines the Phase under which to run a Net.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
TRAIN_STEP
Defines the training stepping method (if any).
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers namespace contains all layers that have a solidified code base,...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
ITERATOR_TYPE
Specifies the iterator type to use.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...