4using System.Collections.Generic;
7using System.Drawing.Drawing2D;
11using System.Threading.Tasks;
28 string m_strName =
"Cart-Pole";
29 double m_dfGravity = 9.8;
30 double m_dfMassCart = 1.0;
31 double m_dfMassPole = 0.1;
33 double m_dfLength = 0.5;
34 double m_dfPoleMassLength;
35 double m_dfForce = 10;
36 bool m_bAdditive =
false;
37 double m_dfTau = 0.02;
38 Dictionary<string, int> m_rgActionSpace;
46 double m_dfThetaThreshold = CartPoleState.MAX_THETA;
47 double m_dfXThreshold = CartPoleState.MAX_X;
49 Random m_random =
new Random();
50 CartPoleState m_state =
new CartPoleState();
51 int? m_nStepsBeyondDone =
null;
74 m_dfTotalMass = m_dfMassPole + m_dfMassCart;
75 m_dfPoleMassLength = m_dfMassPole * m_dfLength;
77 m_rgActionSpace =
new Dictionary<string, int>();
78 m_rgActionSpace.Add(
"MoveLeft", 0);
79 m_rgActionSpace.Add(
"MoveRight", 1);
97 if (properties !=
null)
118 if (properties !=
null)
129 get {
return false; }
153 get {
return m_strName; }
178 return m_rgActionSpace;
181 private void processAction(
ACTION? a)
188 m_state.ForceMag = (m_state.ForceMag * ((m_bAdditive) ? 1 : 0)) + m_dfForce * -1;
192 m_state.ForceMag = (m_state.ForceMag * ((m_bAdditive) ? 1 : 0)) + m_dfForce * 1;
213 public Tuple<Bitmap, SimpleDatum>
Render(
bool bShowUi,
int nWidth,
int nHeight,
bool bGetAction)
215 List<double> rgData =
new List<double>();
217 rgData.Add(m_state.X);
218 rgData.Add(m_state.XDot);
219 rgData.Add(m_state.Theta);
220 rgData.Add(m_state.ThetaDot);
221 rgData.Add(m_state.ForceMag);
222 rgData.Add(m_nSteps);
224 return Render(bShowUi, nWidth, nHeight, rgData.ToArray(), bGetAction);
236 public Tuple<Bitmap, SimpleDatum>
Render(
bool bShowUi,
int nWidth,
int nHeight,
double[] rgData,
bool bGetAction)
238 Bitmap bmp =
new Bitmap(nWidth, nHeight);
240 double dfX = rgData[0];
241 double dfTheta = rgData[2];
242 double dfThetaInDegrees = dfTheta * (180.0 / Math.PI);
243 double dfForceMag = rgData[4];
244 int nSteps = (int)rgData[5];
247 m_nMaxSteps = Math.Max(nSteps, m_nMaxSteps);
249 using (Graphics g = Graphics.FromImage(bmp))
251 Rectangle rc =
new Rectangle(0, 0, bmp.Width, bmp.Height);
252 g.FillRectangle(Brushes.White, rc);
254 float fScreenWidth = g.VisibleClipBounds.Width;
255 float fScreenHeight = g.VisibleClipBounds.Height;
256 float fWorldWidth = (float)(m_dfXThreshold * 2);
257 float fScale = fScreenWidth / fWorldWidth;
259 float fPoleWidth = 10;
260 float fPoleLen = fScale * 1.0f;
261 float fCartWidth = 50;
262 float fCartHeight = 30;
264 float fL = -fCartWidth / 2;
265 float fR = fCartWidth / 2;
266 float fT = fCartHeight / 2;
267 float fB = -fCartHeight / 2;
268 float fAxleOffset = 0;
269 GeomCart cart =
new GeomCart(fL, fR, fT, fB, Color.SkyBlue, Color.Black);
271 fL = -fPoleWidth / 2;
273 fT = fPoleLen - fPoleWidth / 2;
274 fB = --fPoleWidth / 2;
275 GeomPole pole =
new GeomPole(fL, fR, fT, fB, Color.Tan, Color.Black);
288 if (m_clrMap ==
null)
289 m_clrMap =
new ColorMapper(fL, fR, Color.Fuchsia, Color.Red);
293 float fCartX = (float)dfX * fScale + fScreenWidth / 2;
294 cart.SetLocation(fCartX, fCartY);
295 pole.SetRotation((
float)-dfThetaInDegrees);
296 cart.Attach(pole, fAxleOffset);
300 view.
RenderText(g,
"Current Force = " + dfForceMag.ToString(), 10, 10);
301 view.
RenderText(g,
"X = " + dfX.ToString(
"N02"), 10, 24);
302 view.
RenderText(g,
"Theta = " + dfTheta.ToString(
"N02") +
" radians", 10, 36);
303 view.
RenderText(g,
"Theta = " + dfThetaInDegrees.ToString(
"N02") +
" degrees", 10, 48);
315 sdAction = getActionData(fCartX, fCartY, bmp);
319 return new Tuple<Bitmap, SimpleDatum>(bmp, sdAction);
323 private SimpleDatum getActionData(
float fX,
float fY, Bitmap bmpSrc)
327 double dfX = fX - (dfWid * 0.5);
328 double dfY = (bmpSrc.Height - fY) - (dfHt * 0.75);
330 RectangleF rc =
new RectangleF((
float)dfX, (
float)dfY, (
float)dfWid, (
float)dfHt);
331 Bitmap bmp =
new Bitmap((
int)dfWid, (
int)dfHt);
333 using (Graphics g = Graphics.FromImage(bmp))
335 RectangleF rc1 =
new RectangleF(0, 0, (
float)dfWid, (
float)dfHt);
336 g.FillRectangle(Brushes.Black, rc1);
337 g.DrawImage(bmpSrc, rc1, rc, GraphicsUnit.Pixel);
351 double dfX = randomUniform(-0.05, 0.05);
352 double dfXDot = randomUniform(-0.05, 0.05);
353 double dfTheta = randomUniform(-0.05, 0.05);
354 double dfThetaDot = randomUniform(-0.05, 0.05);
355 m_nStepsBeyondDone =
null;
358 m_state =
new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);
359 return new Tuple<State, double, bool>(m_state.Clone(), 1,
false);
362 private double randomUniform(
double dfMin,
double dfMax)
364 double dfRange = dfMax - dfMin;
365 return dfMin + (m_random.NextDouble() * dfRange);
375 public Tuple<State, double, bool>
Step(
int nAction,
bool bGetLabel,
PropertySet propExtra =
null)
377 CartPoleState state =
new CartPoleState(m_state);
380 processAction((
ACTION)nAction);
382 double dfX = state.X;
383 double dfXDot = state.XDot;
384 double dfTheta = state.Theta;
385 double dfThetaDot = state.ThetaDot;
386 double dfForce = m_state.ForceMag;
387 double dfCosTheta = Math.Cos(dfTheta);
388 double dfSinTheta = Math.Sin(dfTheta);
389 double dfTemp = (dfForce + m_dfPoleMassLength * dfThetaDot * dfThetaDot * dfSinTheta) / m_dfTotalMass;
390 double dfThetaAcc = (m_dfGravity * dfSinTheta - dfCosTheta * dfTemp) / (m_dfLength * ((4.0 / 3.0) - m_dfMassPole * dfCosTheta * dfCosTheta / m_dfTotalMass));
391 double dfXAcc = dfTemp - m_dfPoleMassLength * dfThetaAcc * dfCosTheta / m_dfTotalMass;
393 dfX += m_dfTau * dfXDot;
394 dfXDot += m_dfTau * dfXAcc;
395 dfTheta += m_dfTau * dfThetaDot;
396 dfThetaDot += m_dfTau * dfThetaAcc;
398 CartPoleState stateOut = m_state;
399 m_state =
new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);
403 if (dfX < -m_dfXThreshold || dfX > m_dfXThreshold ||
404 dfTheta < -m_dfThetaThreshold || dfTheta > m_dfThetaThreshold)
411 else if (!m_nStepsBeyondDone.HasValue)
414 m_nStepsBeyondDone = 0;
419 if (m_nStepsBeyondDone.Value == 0)
420 m_log.
WriteLine(
"WARNING: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()'");
422 m_nStepsBeyondDone++;
427 m_nMaxSteps = Math.Max(m_nMaxSteps, m_nSteps);
429 stateOut.Steps = m_nSteps;
430 return new Tuple<State, double, bool>(stateOut.Clone(), dfReward, bDone);
465 class GeomCart : GeomPolygon
469 public GeomCart(
float fL,
float fR,
float fT,
float fB, Color clrFill, Color clrBorder)
470 : base(fL, fR, fT, fB, clrFill, clrBorder)
474 public void Attach(GeomPole pole,
float fXOffset)
477 m_pole.SetLocation(Location.X + fXOffset, Location.Y);
480 public override void Render(Graphics g)
487 class GeomPole : GeomPolygon
491 public GeomPole(
float fL,
float fR,
float fT,
float fB, Color clrFill, Color clrBorder)
492 : base(fL, fR, fT, fB, clrFill, clrBorder)
494 float fWid = fR - fL;
495 m_axis =
new GeomEllipse(fL, fR, fB - fWid, fB, Color.Brown, Color.Black);
498 public override void SetLocation(
float fX,
float fY)
500 m_axis.SetLocation(fX, fY);
501 base.SetLocation(fX, fY);
504 public override void Render(Graphics g)
511 class CartPoleState : State
515 double m_dfTheta = 0;
516 double m_dfThetaDot = 0;
517 double m_dfForceMag = 0;
520 public const double MAX_X = 2.4;
521 public const double MAX_THETA = 20 * (Math.PI/180);
523 public CartPoleState(
double dfX = 0,
double dfXDot = 0,
double dfTheta = 0,
double dfThetaDot = 0)
528 m_dfThetaDot = dfThetaDot;
532 public CartPoleState(CartPoleState s)
535 m_dfXDot = s.m_dfXDot;
536 m_dfTheta = s.m_dfTheta;
537 m_dfThetaDot = s.m_dfThetaDot;
538 m_dfForceMag = s.m_dfForceMag;
539 m_nSteps = s.m_nSteps;
544 get {
return m_nSteps; }
545 set { m_nSteps = value; }
548 public double ForceMag
550 get {
return m_dfForceMag; }
551 set { m_dfForceMag = value; }
556 get {
return m_dfX; }
557 set { m_dfX = value; }
562 get {
return m_dfXDot; }
563 set { m_dfXDot = value; }
568 get {
return m_dfTheta; }
569 set { m_dfTheta = value; }
572 public double ThetaDot
574 get {
return m_dfThetaDot; }
575 set { m_dfThetaDot = value; }
578 public double ThetaInDegrees
582 return m_dfTheta * (180.0/Math.PI);
586 public override State Clone()
588 return new CartPoleState(
this);
591 public override SimpleDatum GetData(
bool bNormalize, out
int nDataLen)
597 data.
SetPixel(0, 0, getValue(m_dfX, -MAX_X, MAX_X, bNormalize));
598 data.
SetPixel(0, 1, getValue(m_dfXDot, -MAX_X * nScale, MAX_X * nScale, bNormalize));
599 data.
SetPixel(0, 2, getValue(m_dfTheta, -MAX_THETA, MAX_THETA, bNormalize));
600 data.
SetPixel(0, 3, getValue(m_dfThetaDot, -MAX_THETA * nScale * 2, MAX_THETA * nScale * 2, bNormalize));
601 data.
SetPixel(0, 4, getValue(m_dfForceMag, -100, 100, bNormalize));
607 private double getValue(
double dfVal,
double dfMin,
double dfMax,
bool bNormalize)
612 return (dfVal - dfMin) / (dfMax - dfMin);
The ColorMapper maps a value within a number range, to a Color within a color scheme.
The ImageData class is a helper class used to convert between Datum, other raw data,...
static Datum GetImageDataD(Bitmap bmp, int nChannels, bool bDataIsReal, int nLabel, bool bUseLockBitmap=true, int[] rgFocusMap=null)
The GetImageDataD function converts a Bitmap into a Datum using the double type for real data.
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Specifies a key-value pair of properties.
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
The SimpleDatum class holds a data input within host memory.
The Realmap operates similar to a bitmap but is actually just an array of doubles.
void SetPixel(int nX, int nY, double clr)
Set a given pixel to a given color.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
The SourceDescriptor class contains all information describing a data source.
The CartPole Gym provides a simulation of a cart with a balancing pole standing on top of it.
DATA_TYPE SelectedDataType
Returns the selected data type.
CartPoleGym()
The constructor.
double TestingPercent
Returns the testinng percent of -1, which then uses the default of 0.2.
bool RequiresDisplayImage
Returns false indicating that this Gym does not require a display image.
void Initialize(Log log, PropertySet properties)
Initialize the gym with the specified properties.
DatasetDescriptor GetDataset(DATA_TYPE dt, Log log=null)
Returns the dataset descriptor of the dynamic dataset produced by the Gym.
Dictionary< string, int > GetActionSpace()
Returns the action space as a dictionary of name,actionid pairs.
void Close()
Shutdown and close the gym.
DATA_TYPE[] SupportedDataType
Returns the data types supported by this gym.
Tuple< State, double, bool > Step(int nAction, bool bGetLabel, PropertySet propExtra=null)
Step the gym one step in its simulation.
string Name
Returns the gym's name.
IXMyCaffeGym Clone(PropertySet properties=null)
Create a new copy of the gym.
Tuple< Bitmap, SimpleDatum > Render(bool bShowUi, int nWidth, int nHeight, bool bGetAction)
Render the gym's current state on a bitmap and SimpleDatum.
int UiDelay
Returns the delay to use (if any) when the user-display is visible.
Tuple< State, double, bool > Reset(bool bGetLabel, PropertySet props=null)
Reset the state of the gym.
Tuple< Bitmap, SimpleDatum > Render(bool bShowUi, int nWidth, int nHeight, double[] rgData, bool bGetAction)
Render the gyms specified data.
ACTION
Defines the actions to perform.
The GeomLine object is used to render a line.
The GeomEllipse object is used to render an rectangle.
The GeomView manages and renders a collection of Geometric objects.
void Render(Graphics g)
Renders the view.
void AddObject(GeomObj obj)
Add a new geometric object to the view.
void RenderText(Graphics g, string str, float fX, float fY, Brush br=null)
Render text at a location.
void RenderSteps(Graphics g, int nSteps, int nMax)
Renders the Gym step information.
The IXMyCaffeGym interface is used to interact with each Gym.
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
GYM_TYPE
Defines the gym type (if any).
DATA_TYPE
Defines the gym data type.
The MyCaffe.gym namespace contains all classes related to the Gym's supported by MyCaffe.
GYM_SRC_TRAIN_ID
Defines the Standard GYM Training Data Source ID's.
GYM_DS_ID
Defines the Standard GYM Dataset ID's.
GYM_SRC_TEST_ID
Defines the Standard GYM Testing Data Source ID's.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...