2using System.Collections.Generic;
8using System.Threading.Tasks;
66 double m_dfExplorationRate = 0;
67 double m_dfOptimalSelectionRate = 0;
68 double m_dfGlobalRewards = 0;
69 double m_dfGlobalRewardsAve = 0;
70 double m_dfGlobalRewardsMax = -
double.MaxValue;
71 int m_nGlobalEpisodeCount = 0;
72 int m_nGlobalEpisodeMax = 0;
75 REWARD_TYPE m_rewardType = REWARD_TYPE.MAXIMUM;
76 TRAINER_TYPE m_trainerType = TRAINER_TYPE.PG_ST;
77 int m_nItertions = -1;
80 bool m_bSnapshot =
false;
81 object m_syncObj =
new object();
102 InitializeComponent();
113 InitializeComponent();
123 get {
return "MyCaffe RL Trainer"; }
171 switch (m_trainerType)
173 case TRAINER_TYPE.PG_SIMPLE:
176 case TRAINER_TYPE.PG_ST:
179 case TRAINER_TYPE.PG_MT:
183 throw new Exception(
"Unknown trainer type '" + m_trainerType.ToString() +
"'!");
204 switch (m_trainerType)
206 case TRAINER_TYPE.PG_SIMPLE:
209 case TRAINER_TYPE.PG_ST:
212 case TRAINER_TYPE.PG_MT:
216 throw new Exception(
"Unknown trainer type '" + m_trainerType.ToString() +
"'!");
283 #region IXMyCaffeCustomTrainer Interface
290 get {
return Stage.RL; }
362 private void cleanup(
int nWait,
bool bCallShutdown =
false)
366 if (m_itrainer !=
null)
385 m_icallback = icallback;
390 if (strRewardType ==
null)
391 strRewardType =
"VAL";
393 strRewardType = strRewardType.ToUpper();
395 if (strRewardType ==
"VAL" || strRewardType ==
"VALUE")
396 m_rewardType = REWARD_TYPE.VALUE;
397 else if (strRewardType ==
"AVE" || strRewardType ==
"AVERAGE")
398 m_rewardType = REWARD_TYPE.AVERAGE;
402 switch (strTrainerType)
405 m_trainerType = TRAINER_TYPE.PG_SIMPLE;
409 m_trainerType = TRAINER_TYPE.PG_ST;
414 m_trainerType = TRAINER_TYPE.PG_MT;
418 throw new Exception(
"Unknown trainer type '" + strTrainerType +
"'!");
444 if (m_itrainer ==
null)
445 m_itrainer = createTrainer(mycaffe);
462 if (m_itrainer ==
null)
463 m_itrainer = createTrainer(mycaffe);
467 if (icallback !=
null)
470 byte[] rgResults = m_itrainer.
Run(nN, runProp, out type);
484 if (m_itrainer ==
null)
485 m_itrainer = createTrainer(mycaffe);
487 if (nIterationOverride == -1)
488 nIterationOverride = m_nItertions;
490 m_itrainer.
Test(nIterationOverride, type);
503 if (m_itrainer ==
null)
504 m_itrainer = createTrainer(mycaffe);
506 if (nIterationOverride == -1)
507 nIterationOverride = m_nItertions;
509 m_itrainer.
Train(nIterationOverride, type, step);
545 m_dfGlobalRewardsMax = Math.Max(m_dfGlobalRewardsMax, e.
TotalReward);
546 m_dfGlobalRewardsAve = (1.0 / (double)m_nThreads) * e.
TotalReward + ((m_nThreads - 1) / (
double)m_nThreads) * m_dfGlobalRewardsAve;
551 m_nGlobalEpisodeCount++;
553 m_nGlobalEpisodeCount = e.
Frames;
558 if (m_icallback !=
null)
560 Dictionary<string, double> rgValues =
new Dictionary<string, double>();
565 rgValues.Add(
"Threads", m_nThreads);
571 if (e.
Index == 0 && m_nSnapshot > 0 && m_nGlobalEpisodeCount > 0 && (m_nGlobalEpisodeCount % m_nSnapshot) == 0)
580 Thread.Sleep(e.
Wait);
595 case "GlobalRewards":
598 case "GlobalEpisodeCount":
601 case "ExplorationRate":
605 throw new Exception(
"The property '" + strProp +
"' is not supported by the MyCaffeTrainerRNN.");
622 switch (m_rewardType)
624 case REWARD_TYPE.VALUE:
625 return m_dfGlobalRewards;
627 case REWARD_TYPE.AVERAGE:
628 return m_dfGlobalRewardsAve;
631 return (m_dfGlobalRewardsMax == -
double.MaxValue) ? 0 : m_dfGlobalRewardsMax;
641 get {
return m_dfLoss; }
649 get {
return m_nGlobalEpisodeCount; }
657 get {
return m_nGlobalEpisodeMax; }
665 get {
return m_dfExplorationRate; }
673 get {
return m_dfOptimalSelectionRate; }
The MyCaffeControl is the main object used to manage all training, testing and running of the MyCaffe...
ConnectInfo DatasetConnectInfo
Returns the dataset connection information, if used (default = null).
ProjectEx CurrentProject
Returns the name of the currently loaded project.
The ConnectInfo class specifies the server, database and username/password used to connect to a datab...
The CryptoRandom is a random number generator that can use either the standard .Net Random objec or t...
string GetSolverSetting(string strParam)
Get a setting from the solver descriptor.
int OriginalID
Get/set the original project ID.
Specifies a key-value pair of properties.
string GetProperty(string strName, bool bThrowExceptions=true)
Returns a property as a string value.
int GetPropertyAsInt(string strName, int nDefault=0)
Returns a property as an integer value.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
The ResultCollection contains the result of a given CaffeControl::Run.
The GetDataArgs is passed to the OnGetData event to retrieve data.
The GetStatusArgs is passed to the OnGetStatus event.
double Loss
Returns the loss value.
double OptimalSelectionCoefficient
Returns the optimal selection coefficient.
int MaxFrames
Returns the maximum frame count.
int Frames
Returns the total frame count across all agents.
int NewFrameCount
Get/set the new frame count.
double ExplorationRate
Returns the current exploration rate.
double TotalReward
Returns the total rewards.
int Index
Returns the index of the caller.
double LearningRate
Returns the current learning rate.
The InitializeArgs is passed to the OnInitialize event.
(Depreciated - use MyCaffeTrainerDual instead.) The MyCaffeTraininerRL is used to perform reinforceme...
void Test(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type=ITERATOR_TYPE.ITERATION)
Create a new trainer and use it to run a test cycle.
void OpenUi()
Open the user interface for the trainer, of one exists.
double OptimalSelectionRate
Returns the rate of selection from the optimal set with the highest reward (this setting is optional,...
void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type=ITERATOR_TYPE.ITERATION, TRAIN_STEP step=TRAIN_STEP.NONE)
Create a new trainer and use it to run a training cycle.
void OnShutdown()
The OnShutdown callback fires when shutting down the trainer.
void OnUpdateStatus(GetStatusArgs e)
The OnGetStatus callback fires on each iteration within the Train method.
double GetProperty(string strProp)
Return a property value from the trainer.
virtual void shutdown()
Override called from within the CleanUp method.
DatasetDescriptor GetDatasetOverride(int nProjectID, ConnectInfo ci=null)
Returns a dataset override to use (if any) instead of the project's dataset. If there is no dataset o...
ResultCollection RunOne(Component mycaffe, int nDelay=1000)
Create a new trainer and use it to run a single run cycle.
double GlobalLoss
Return the global loss.
PropertySet m_properties
Specifies the properties parsed from the key-value pair passed to the Initialize method.
double ExplorationRate
Returns the current exploration rate.
void CleanUp()
Releases any resources used by the component.
ConnectInfo m_dsCi
Optionally, specifies the dataset connection info, or null.
bool IsRunningSupported
Returns whether or not Running is supported.
virtual IxTrainerRL create_trainerF(Component caffe)
Optionally overridden to return a new type of trainer.
virtual TRAINING_CATEGORY category
Override when using a training method other than the REINFORCEMENT method (the default).
void OnWait(WaitArgs e)
The OnWait callback fires when waiting for a shutdown.
void OnGetData(GetDataArgs e)
The OnGetData callback fires from within the Train method and is used to get a new observation data.
string Information
Returns information describing the trainer.
MyCaffeTrainerRL()
The constructor.
virtual IxTrainerRL create_trainerD(Component caffe)
Optionally overridden to return a new type of trainer.
bool IsTrainingSupported
Returns whether or not Training is supported.
virtual string name
Overriden to give the actual name of the custom trainer.
double? GlobalRewards
Returns the global rewards based on the reward type specified by the 'RewardType' property.
virtual bool getData(GetDataArgs e)
Override called by the OnGetData event fired by the Trainer to retrieve a new set of observation coll...
virtual void initialize(InitializeArgs e)
Override called by the Initialize method of the trainer.
virtual void dispose()
Override to dispose of resources used.
int GlobalEpisodeCount
Returns the global episode count.
void Initialize(string strProperties, IXMyCaffeCustomTrainerCallback icallback)
Initializes a new custom trainer by loading the key-value pair of properties into the property set.
TRAINING_CATEGORY TrainingCategory
Returns the training category of the custom trainer (default = REINFORCEMENT).
CryptoRandom m_random
Random number generator used to get initial actions, etc.
int m_nProjectID
Specifies the project ID of the project held by the instance of MyCaffe.
void OnInitialize(InitializeArgs e)
The OnIntialize callback fires when initializing the trainer.
bool IsTestingSupported
Returns whether or not Testing is supported.
virtual bool get_update_snapshot(out int nIteration, out double dfAccuracy)
Returns true when the training is ready for a snap-shot, false otherwise.
virtual DatasetDescriptor get_dataset_override(int nProjectID, ConnectInfo ci=null)
Returns a dataset override to use (if any) instead of the project's dataset. If there is no dataset o...
MyCaffeTrainerRL(IContainer container)
The constructor.
bool GetUpdateSnapshot(out int nIteration, out double dfAccuracy)
Returns true when the training is ready for a snap-shot, false otherwise.
virtual void openUi()
Called by OpenUi, override this when a UI (via WCF) should be displayed.
string Name
Returns the name of the custom trainer. This method calls the 'name' override.
byte[] Run(Component mycaffe, int nN, out string type)
Run the network using the run technique implemented by this trainer.
virtual string get_information()
Returns information describing the specific trainer, such as the gym used, if any.
int GlobalEpisodeMax
Returns the maximum global episode count.
The WaitArgs is passed to the OnWait event.
int Wait
Returns the amount of time to wait in milliseconds.
The Component class is a standard Microsoft.NET class that implements the IComponent interface and is...
The IXMyCaffeCustomTrainerCallback interface is used to call back to the parent running the custom tr...
void Update(TRAINING_CATEGORY cat, Dictionary< string, double > rgValues)
The Update method updates the parent with the global iteration, reward and loss.
The IXMyCaffeCustomTrainerCallbackRNN interface is used to call back to the parent running the custom...
PropertySet GetRunProperties()
The GetRunProperties method is used to qeury the properties used when Running, if any.
The IXMyCaffeCustomTrainer interface is used by the MyCaffeCustomTraininer components that provide va...
The IxTrainerCallback provides functions used by each trainer to 'call-back' to the parent for inform...
bool Initialize()
Initialize the trainer.
bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
Train the network.
bool Test(int nN, ITERATOR_TYPE type)
Test the newtork.
bool Shutdown(int nWait)
Shutdown the trainer.
The IxTrainerRL interface is implemented by each RL Trainer.
ResultCollection RunOne(int nDelay=1000)
Run a single cycle on the trainer.
byte[] Run(int nN, PropertySet runProp, out string type)
Run a number of 'nN' samples on the trainer.
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
TRAINING_CATEGORY
Defines the category of training.
Stage
Specifies the stage underwhich to run a custom trainer.
The MyCaffe.common namespace contains common MyCaffe classes.
TRAIN_STEP
Defines the training stepping method (if any).
The MyCaffe.gym namespace contains all classes related to the Gym's supported by MyCaffe.
The MyCaffe.trainers namespace contains all reinforcement and recurrent learning trainers.
ITERATOR_TYPE
Specifies the iterator type to use.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...