2using System.Collections.Generic;
9using System.Runtime.Remoting.Messaging;
10using System.Security.Cryptography;
11using System.Security.Policy;
37 List<int> m_rgShape =
new List<int>(4);
39 uint m_nNumHistoricalSteps;
40 uint m_nNumFutureSteps;
43 int[,] m_rgIdx =
null;
56 m_evtCancel = evtCancel;
125 if (!m_data.
LoadData(phase,
m_param.
data_temporal_param.
source,
m_param.
data_temporal_param.
shuffle_data, (
int)
m_param.
data_temporal_param.
batch_size, (
int)m_nNumHistoricalSteps, (
int)m_nNumFutureSteps,
m_param.
data_temporal_param.
max_load_percent,
m_param.
data_temporal_param.
drip_refresh_rate_in_sec,
m_param.
data_temporal_param.
chunk_count,
m_log, m_evtCancel))
129 m_log.
CHECK_GE(nTotalSize, m_nBatchSize,
"There must be enough items for at least one batch - items found = " + nTotalSize.ToString() +
", batch size = " + m_nBatchSize.ToString());
141 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.STATIC_NUMERIC)) !=
null)
144 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.STATIC_CATEGORICAL)) !=
null)
147 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.HISTORICAL_NUMERIC)) !=
null)
150 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.HISTORICAL_CATEGORICAL)) !=
null)
153 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.FUTURE_NUMERIC)) !=
null)
156 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.FUTURE_CATEGORICAL)) !=
null)
159 if (colTop.
Count > 6)
161 if ((rgShape = m_data.
GetShape(DataNpy<T>.OUTPUT_TYPE.TARGET)) !=
null)
169 rgShape[1] = (int)m_nNumHistoricalSteps;
182 layer.
OnLoss += Layer_OnLoss;
185 private void Layer_OnLoss(
object sender,
LossArgs e)
188 m_data.
Add(e, m_rgIdx);
245 public RawData(uint? nSeed,
bool bOutputTargetHistorical)
277 public virtual bool LoadData(
Phase phase,
string strPath,
bool bShuffleData,
int nBatchSize,
int nHistoricalSteps,
int nFutureSteps,
double dfPctMaxLoad,
int nDripRefreshRateInSec, uint nChunkCount,
Log log,
CancelEvent evtCancel)
281 ManualResetEvent evtReady =
new ManualResetEvent(
false);
282 ManualResetEvent evtDone =
new ManualResetEvent(
false);
283 Thread threadLoad =
new Thread(
new ParameterizedThreadStart(
loadDataFunction));
284 threadLoad.Start(
new DataLoadParameters(phase, strPath, nHistoricalSteps, nFutureSteps, dfPctMaxLoad, nDripRefreshRateInSec, nChunkCount, bShuffleData, log, evtCancel, evtReady, evtDone));
286 while (!evtReady.WaitOne(1000))
316 return m_data.LoadBatch(nBatchSize, col, bEnableDebug, strDebugPath);
334 return m_data.GetTotalSize();
342 public virtual int[]
GetShape(DataNpy<T>.OUTPUT_TYPE ot)
344 return m_data.GetShape(ot);
357 int m_nHistoricalSteps;
359 int m_nDropRefreshReateInSec;
362 float[] m_rgStaticNum =
null;
363 float[] m_rgStaticCat =
null;
364 float[] m_rgHistoricalNum =
null;
365 float[] m_rgHistoricalCat =
null;
366 float[] m_rgFutureNum =
null;
367 float[] m_rgFutureCat =
null;
368 float[] m_rgTarget =
null;
369 float[] m_rgTargetHist =
null;
370 int[,] m_rgIdx =
null;
371 BatchPerfSet m_batchPerfSet =
null;
402 public override bool LoadData(
Phase phase,
string strDataset,
bool bShuffleData,
int nBatchSize,
int nHistoricalSteps,
int nFutureSteps,
double dfPctMaxLoad,
int nDripRefreshRateInSec, uint nChunkCount,
Log log,
CancelEvent evtCancel)
416 prop.
SetProperty(
"HistoricalSteps", nHistoricalSteps.ToString());
417 prop.
SetProperty(
"FutureSteps", nFutureSteps.ToString());
422 m_ds = m_db.GetDatasetByName(strDataset);
425 m_log.
WriteLine(
"ERROR: Could not find the dataset '" + strDataset +
"'!");
430 m_db.InitializeWithDsName1(s, strDataset);
432 m_bShuffleData = bShuffleData;
434 m_nHistoricalSteps = nHistoricalSteps;
435 m_nFutureSteps = nFutureSteps;
436 m_nDropRefreshReateInSec = nDripRefreshRateInSec;
443 if (col.
Count <= nIdx)
446 int nItemCount = col[nIdx].count();
450 return new float[nItemCount];
468 if (m_batchPerfSet ==
null)
471 m_batchPerfSet.Add(e, rgIdx);
489 if (m_rgStaticNum ==
null)
490 m_rgStaticNum = getBuffer(col, 0);
491 if (m_rgStaticCat ==
null)
492 m_rgStaticCat = getBuffer(col, 1);
493 if (m_rgHistoricalNum ==
null)
494 m_rgHistoricalNum = getBuffer(col, 2);
495 if (m_rgHistoricalCat ==
null)
496 m_rgHistoricalCat = getBuffer(col, 3);
497 if (m_rgFutureNum ==
null)
498 m_rgFutureNum = getBuffer(col, 4);
499 if (m_rgFutureCat ==
null)
500 m_rgFutureCat = getBuffer(col, 5);
501 if (m_rgTarget ==
null)
502 m_rgTarget = getBuffer(col, 6);
503 if (m_rgTargetHist ==
null)
504 m_rgTargetHist = getBuffer(col, 7);
507 m_rgIdx =
new int[nBatchSize,2];
509 for (
int i = 0; i < nBatchSize; i++)
511 int? nItemIdx =
null;
512 int? nValueIdx =
null;
516 if (m_batchPerfSet !=
null)
517 m_batchPerfSet.Select(ref nItemIdx, ref nValueIdx);
519 SimpleTemporalDatumCollection rgData = m_db.QueryTemporalItem(i, src.
ID, ref nItemIdx, ref nValueIdx, itemSelection, valueSelection, bEnableDebug, strDebugPath);
523 m_rgIdx[i, 0] = nItemIdx.Value;
524 m_rgIdx[i, 1] = nValueIdx.Value;
536 if (m_rgStaticNum !=
null)
538 float[] rgRawData = sdStatNum.
Data;
539 Array.Copy(rgRawData, 0, m_rgStaticNum, i * rgRawData.Length, rgRawData.Length);
543 if (m_rgStaticCat !=
null)
545 float[] rgRawData = sdStatCat.
Data;
546 Array.Copy(rgRawData, 0, m_rgStaticCat, i * rgRawData.Length, rgRawData.Length);
550 if (m_rgHistoricalNum !=
null)
552 float[] rgRawData = sdHistNum.
Data;
553 Array.Copy(rgRawData, 0, m_rgHistoricalNum, i * rgRawData.Length, rgRawData.Length);
557 if (m_rgHistoricalCat !=
null)
559 float[] rgRawData = sdHistCat.
Data;
560 Array.Copy(rgRawData, 0, m_rgHistoricalCat, i * rgRawData.Length, rgRawData.Length);
564 if (m_rgFutureNum !=
null)
566 float[] rgRawData = sdFutureNum.
Data;
567 Array.Copy(rgRawData, 0, m_rgFutureNum, i * rgRawData.Length, rgRawData.Length);
571 if (m_rgFutureCat !=
null)
573 float[] rgRawData = sdFutureCat.
Data;
574 Array.Copy(rgRawData, 0, m_rgFutureCat, i * rgRawData.Length, rgRawData.Length);
578 if (m_rgTarget !=
null)
580 float[] rgRawData = sdTarget.
Data;
581 Array.Copy(rgRawData, 0, m_rgTarget, i * rgRawData.Length, rgRawData.Length);
585 if (m_rgTargetHist !=
null)
587 float[] rgRawData = sdTargetHist.
Data;
588 Array.Copy(rgRawData, 0, m_rgTargetHist, i * rgRawData.Length, rgRawData.Length);
592 setBuffer(col, 0, m_rgStaticNum);
593 setBuffer(col, 1, m_rgStaticCat);
594 setBuffer(col, 2, m_rgHistoricalNum);
595 setBuffer(col, 3, m_rgHistoricalCat);
596 setBuffer(col, 4, m_rgFutureNum);
597 setBuffer(col, 5, m_rgFutureCat);
598 setBuffer(col, 6, m_rgTarget);
599 setBuffer(col, 7, m_rgTargetHist);
610 return m_db.GetTotalSize(m_ds.
ID, m_phase, m_nHistoricalSteps, m_nFutureSteps);
618 public override int[]
GetShape(DataNpy<T>.OUTPUT_TYPE ot)
620 int nStaticNumCount = 0;
621 int nStaticCatCount = 0;
622 int nObservedNumCount = 0;
623 int nObservedCatCount = 0;
624 int nKnownNumCount = 0;
625 int nKnownCatCount = 0;
657 case Data<T>.OUTPUT_TYPE.STATIC_CATEGORICAL:
658 if (nStaticCatCount == 0)
662 case Data<T>.OUTPUT_TYPE.STATIC_NUMERIC:
663 if (nStaticNumCount == 0)
667 case Data<T>.OUTPUT_TYPE.HISTORICAL_SYNC:
668 return new int[] {
m_nBatchSize, m_nHistoricalSteps, 1 };
670 case Data<T>.OUTPUT_TYPE.HISTORICAL_CATEGORICAL:
671 if (nKnownCatCount + nObservedCatCount == 0)
673 return new int[] {
m_nBatchSize, m_nHistoricalSteps, nKnownCatCount + nObservedCatCount, 1 };
675 case Data<T>.OUTPUT_TYPE.HISTORICAL_NUMERIC:
676 if (nKnownNumCount + nObservedNumCount == 0)
678 return new int[] {
m_nBatchSize, m_nHistoricalSteps, nKnownNumCount + nObservedNumCount, 1 };
680 case Data<T>.OUTPUT_TYPE.FUTURE_SYNC:
683 case Data<T>.OUTPUT_TYPE.FUTURE_CATEGORICAL:
684 if (nKnownCatCount == 0)
686 return new int[] {
m_nBatchSize, m_nFutureSteps, nKnownCatCount, 1 };
688 case Data<T>.OUTPUT_TYPE.FUTURE_NUMERIC:
689 if (nKnownNumCount == 0)
691 return new int[] {
m_nBatchSize, m_nFutureSteps, nKnownNumCount, 1 };
693 case Data<T>.OUTPUT_TYPE.TARGET:
694 return new int[] {
m_nBatchSize, m_nFutureSteps, 1, 1 };
712 public RawFileData(uint? nSeed,
bool bOutputTargetHistorical) : base(nSeed, bOutputTargetHistorical)
725 string strType =
"train";
726 strPath = strPath.TrimEnd(
'\\',
'/');
729 if (phase ==
Phase.TEST)
731 else if (phase ==
Phase.RUN)
732 strType =
"validation";
734 strFile = strPath + strType +
"_sync.npy";
735 if (!File.Exists(strFile))
736 throw new Exception(
"Could not find the data file '" + strFile +
"'. You may need to run the SignalPop AI Designer Dataset Creator.");
738 strFile = strPath + strType +
"_schema.xml";
739 if (!File.Exists(strFile))
740 throw new Exception(
"Could not find the schema file '" + strFile +
"'. You may need to run the SignalPop AI Designer Dataset Creator.");
759 public override bool LoadData(
Phase phase,
string strPath,
bool bShuffleData,
int nBatchSize,
int nHistoricalSteps,
int nFutureSteps,
double dfPctMaxLoad,
int nDripRefreshRateInSec, uint nChunkCount,
Log log,
CancelEvent evtCancel)
763 return base.LoadData(phase, strPath, bShuffleData, nBatchSize, nHistoricalSteps, nFutureSteps, dfPctMaxLoad, nDripRefreshRateInSec, nChunkCount, log, evtCancel);
768 DataLoadParameters arg = obj as DataLoadParameters;
769 string strPath = arg.Path;
770 Phase phase = arg.Phase;
772 double dfMaxLoadPct = arg.MaxLoadPercent;
773 int nDripRefreshRateInSec = arg.DripRefreshRateInSec;
775 ManualResetEvent evtReady = arg.ReadyEvent;
776 ManualResetEvent evtDone = arg.DoneEvent;
777 DataNpy<T> dataChunk =
null;
782 string strType =
"train";
783 strPath = strPath.TrimEnd(
'\\',
'/');
786 if (phase ==
Phase.TEST)
788 else if (phase ==
Phase.RUN)
789 strType =
"validation";
791 dataChunk =
new DataNpy<T>(
m_data);
795 int nRowCount = dataChunk.RowCount;
796 int nMaxLoadCount = (int)(nRowCount * dfMaxLoadPct);
799 Stopwatch sw =
new Stopwatch();
804 bool bGoodData =
false;
806 while (dataChunk.Load(nRowIdx, out bGoodData))
814 bool bRefreshed =
m_data.Add(dataChunk, nMaxLoadCount);
821 if (sw.Elapsed.TotalMilliseconds > 1000)
825 log.
WriteLine(
"Background data loading for '" + strType +
"' aborted.");
829 double dfPct = (double)nRowIdx / (
double)nRowCount;
830 if (nMaxLoadCount > 0)
832 if (nRowIdx > nMaxLoadCount)
835 dfPct = (double)nRowIdx / (
double)nMaxLoadCount;
838 log.
WriteLine(
"Background data loading '" + strType +
"' data at " + dfPct.ToString(
"P") +
"...");
844 log.
WriteLine(
"Background data loading '" + strType +
"' refreshed...");
848 while (!evtCancel.
WaitOne(1000))
853 if (nWaitCount > nDripRefreshRateInSec)
857 if (nDripRefreshRateInSec == 0)
863 log.
WriteLine(
"Background data load completed.");
865 if (nDripRefreshRateInSec <= 0)
869 log.
WriteLine(
"Starting drip refresing...");
873 while (!evtCancel.
WaitOne(1000))
878 if (nWaitCount > nDripRefreshRateInSec)
894#pragma warning disable 1591
896 class DataLoadParameters
901 int m_nNumFutureSteps;
902 double m_dfMaxLoadPct;
903 int m_nDripRrefreshRateInSec;
908 ManualResetEvent m_evtReady;
909 ManualResetEvent m_evtDone;
911 public DataLoadParameters(
Phase phase,
string strPath,
int nNumHistSteps,
int nNumFutureSteps,
double dfMaxLoadPct,
int nDripRefreshRateInSec, uint nChunkCount,
bool bShuffleData,
Log log,
CancelEvent evtCancel, ManualResetEvent evtReady, ManualResetEvent evtDone)
915 m_nNumHistSteps = nNumHistSteps;
916 m_nNumFutureSteps = nNumFutureSteps;
917 m_dfMaxLoadPct = dfMaxLoadPct;
918 m_nDripRrefreshRateInSec = nDripRefreshRateInSec;
919 m_nChunkCount = nChunkCount;
920 m_bShuffleData = bShuffleData;
922 m_evtCancel = evtCancel;
923 m_evtReady = evtReady;
927 public Phase Phase {
get {
return m_phase; } }
928 public string Path {
get {
return m_strPath; } }
929 public int HistoricalSteps {
get {
return m_nNumHistSteps; } }
930 public int FutureSteps {
get {
return m_nNumFutureSteps; } }
931 public double MaxLoadPercent {
get {
return m_dfMaxLoadPct; } }
932 public int DripRefreshRateInSec {
get {
return m_nDripRrefreshRateInSec; } }
933 public uint ChunkCount {
get {
return m_nChunkCount; } }
934 public bool ShuffleData {
get {
return m_bShuffleData; } }
935 public Log Log {
get {
return m_log; } }
937 public ManualResetEvent ReadyEvent {
get {
return m_evtReady; } }
938 public ManualResetEvent DoneEvent {
get {
return m_evtDone; } }
941 abstract class Data<T> : IDisposable
943 protected Random m_random;
945 protected int m_nHistoricalSteps;
946 protected int m_nFutureSteps;
947 protected bool m_bShuffleData;
948 protected bool m_bOutputTargetHistorical =
false;
949 protected object m_syncObj =
new object();
950 protected int m_nRows = 0;
951 protected int m_nBatchSize = 0;
952 protected int m_nTotalSize = 0;
960 OBSERVED_CATEGORICAL,
965 public enum OUTPUT_TYPE
970 HISTORICAL_CATEGORICAL,
979 public Data(Random random,
Log log,
int nHistoricalSteps,
int nFutureSteps,
bool bShuffleData,
bool bOutputTargetHistorical)
983 m_nHistoricalSteps = nHistoricalSteps;
984 m_nFutureSteps = nFutureSteps;
985 m_bShuffleData = bShuffleData;
986 m_bOutputTargetHistorical = bOutputTargetHistorical;
989 public Data(Data<T> data)
991 m_random = data.m_random;
993 m_nHistoricalSteps = data.m_nHistoricalSteps;
994 m_nFutureSteps = data.m_nFutureSteps;
995 m_bShuffleData = data.m_bShuffleData;
996 m_bOutputTargetHistorical = data.m_bOutputTargetHistorical;
999 public void Dispose()
1006 get {
return m_nRows; }
1009 public int GetTotalSize()
1011 return m_nTotalSize;
1016 get {
return GetTotalSize() >= m_nBatchSize; }
1019 public abstract void Open(
string strSrc,
string strType,
int nBatchSize);
1021 public abstract void Close();
1023 public abstract int[,] LoadBatch(
int nBatchSize,
BlobCollection<T> col,
bool bEnableDebug,
string strDebugPath);
1025 public abstract int[] GetShape(OUTPUT_TYPE ot);
1027 public abstract bool Add(DataNpy<T> data,
int nMaxLoad);
1031 class DataNpy<T> : Data<T>
1033 DataSchema m_schema;
1034 Lookup m_validRanges =
new Lookup();
1035 Dictionary<DATA_TYPE, string> m_rgstrFiles =
new Dictionary<DATA_TYPE, string>();
1036 Dictionary<DATA_TYPE, List<float[]>> m_rgNumData =
new Dictionary<DATA_TYPE, List<float[]>>();
1037 Dictionary<DATA_TYPE, List<long[]>> m_rgCatData =
new Dictionary<DATA_TYPE, List<long[]>>();
1038 Dictionary<DATA_TYPE, NumpyFile<float>> m_rgNumFiles =
new Dictionary<DATA_TYPE, NumpyFile<float>>();
1039 Dictionary<DATA_TYPE, NumpyFile<long>> m_rgCatFiles =
new Dictionary<DATA_TYPE, NumpyFile<long>>();
1040 Dictionary<DATA_TYPE, int> m_rgFields =
new Dictionary<DATA_TYPE, int>();
1041 Dictionary<OUTPUT_TYPE, long[]> m_rgBatchSync =
new Dictionary<OUTPUT_TYPE, long[]>();
1042 Dictionary<OUTPUT_TYPE, float[]> m_rgBatchBuffers =
new Dictionary<OUTPUT_TYPE, float[]>();
1043 int m_nMaxRowIdx = -1;
1046 int m_nTargetFieldIdx = 0;
1047 int m_nIteration = 0;
1049 public DataNpy(Random random,
Log log,
int nHistoricalSteps,
int nFutureSteps,
bool bShuffleData,
bool bOutputTargetHistorical)
1050 : base(random, log, nHistoricalSteps, nFutureSteps, bShuffleData, bOutputTargetHistorical)
1054 public DataNpy(Data<T> data)
1059 public override void Open(
string strPath,
string strType,
int nBatchSize)
1062 m_schema = DataSchema.Load(strPath +
"\\" + strType +
"_schema.xml");
1063 m_nTargetFieldIdx = m_schema.Data.ObservedNum.FindFieldIndex(Field.INPUT_TYPE.TARGET);
1067 m_nBatchSize = nBatchSize;
1068 m_rgstrFiles.Add(
DATA_TYPE.SYNC, strPath +
"\\" + strType +
"_sync.npy");
1069 m_rgstrFiles.Add(
DATA_TYPE.STATIC_NUMERIC, strPath +
"\\" + strType +
"_static_num.npy");
1070 m_rgstrFiles.Add(
DATA_TYPE.STATIC_CATEGORICAL, strPath +
"\\" + strType +
"_static_cat.npy");
1071 m_rgstrFiles.Add(
DATA_TYPE.OBSERVED_NUMERIC, strPath +
"\\" + strType +
"_observed_num.npy");
1072 m_rgstrFiles.Add(
DATA_TYPE.OBSERVED_CATEGORICAL, strPath +
"\\" + strType +
"_observed_cat.npy");
1073 m_rgstrFiles.Add(
DATA_TYPE.KNOWN_NUMERIC, strPath +
"\\" + strType +
"_known_num.npy");
1074 m_rgstrFiles.Add(
DATA_TYPE.KNOWN_CATEGORICAL, strPath +
"\\" + strType +
"_known_cat.npy");
1077 if (!File.Exists(m_rgstrFiles[
DATA_TYPE.SYNC]))
1078 throw new Exception(
"Could not find the sync file '" + m_rgstrFiles[
DATA_TYPE.SYNC] +
"'.");
1082 m_rgCatFiles.Add(
DATA_TYPE.SYNC, npySync);
1083 m_rgCatData.Add(
DATA_TYPE.SYNC,
new List<
long[]>());
1086 nLen = nBatchSize * m_nHistoricalSteps * m_rgCatFiles[
DATA_TYPE.SYNC].Fields;
1087 m_rgBatchSync.Add(OUTPUT_TYPE.HISTORICAL_SYNC,
new long[nLen]);
1089 nLen = nBatchSize * m_nFutureSteps * m_rgCatFiles[
DATA_TYPE.SYNC].Fields;
1090 m_rgBatchSync.Add(OUTPUT_TYPE.FUTURE_SYNC,
new long[nLen]);
1092 if (!File.Exists(m_rgstrFiles[
DATA_TYPE.OBSERVED_NUMERIC]))
1093 throw new Exception(
"Could not find the sync file '" + m_rgstrFiles[
DATA_TYPE.OBSERVED_NUMERIC] +
"'.");
1097 m_rgNumFiles.Add(
DATA_TYPE.OBSERVED_NUMERIC, npyObsNum);
1098 m_rgNumData.Add(
DATA_TYPE.OBSERVED_NUMERIC,
new List<
float[]>());
1100 m_nRows = npyObsNum.
Rows;
1102 int nNumObsFields = m_schema.Data.ObservedNumExplicitCount;
1103 if (nNumObsFields != m_rgNumFiles[
DATA_TYPE.OBSERVED_NUMERIC].Fields && nNumObsFields != m_rgNumFiles[
DATA_TYPE.OBSERVED_NUMERIC].Fields - 1)
1104 throw new Exception(
"The number of observed numeric fields in the schema does not match the number of fields in the observed numeric data file.");
1106 nLen = nBatchSize * m_nHistoricalSteps * nNumObsFields;
1107 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_NUMERIC,
new float[nLen]);
1109 nLen = nBatchSize * m_nFutureSteps * 1;
1110 m_rgBatchBuffers.Add(OUTPUT_TYPE.TARGET,
new float[nLen]);
1112 if (m_bOutputTargetHistorical)
1115 nLen = nBatchSize * m_nHistoricalSteps * 1;
1116 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_TARGET,
new float[nLen]);
1119 if (File.Exists(m_rgstrFiles[
DATA_TYPE.OBSERVED_CATEGORICAL]))
1123 m_rgCatFiles.Add(
DATA_TYPE.OBSERVED_CATEGORICAL, npyObsCat);
1124 m_rgCatData.Add(
DATA_TYPE.OBSERVED_CATEGORICAL,
new List<
long[]>());
1127 nLen = nBatchSize * m_nHistoricalSteps * m_rgNumFiles[
DATA_TYPE.OBSERVED_CATEGORICAL].Fields;
1128 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_CATEGORICAL,
new float[nLen]);
1131 if (File.Exists(m_rgstrFiles[
DATA_TYPE.KNOWN_NUMERIC]))
1135 m_rgNumFiles.Add(
DATA_TYPE.KNOWN_NUMERIC, npyKnownNum);
1136 m_rgNumData.Add(
DATA_TYPE.KNOWN_NUMERIC,
new List<
float[]>());
1140 nLen = nBatchSize * m_nHistoricalSteps * (m_rgNumFiles[
DATA_TYPE.OBSERVED_NUMERIC].Fields + m_rgNumFiles[
DATA_TYPE.KNOWN_NUMERIC].Fields);
1141 m_rgBatchBuffers[OUTPUT_TYPE.HISTORICAL_NUMERIC] =
new float[nLen];
1143 nLen = nBatchSize * m_nFutureSteps * m_rgNumFiles[
DATA_TYPE.KNOWN_NUMERIC].Fields;
1144 m_rgBatchBuffers.Add(OUTPUT_TYPE.FUTURE_NUMERIC,
new float[nLen]);
1147 if (File.Exists(m_rgstrFiles[
DATA_TYPE.KNOWN_CATEGORICAL]))
1151 m_rgCatFiles.Add(
DATA_TYPE.KNOWN_CATEGORICAL, npyKnownCat);
1152 m_rgCatData.Add(
DATA_TYPE.KNOWN_CATEGORICAL,
new List<
long[]>());
1155 nLen = nBatchSize * m_nHistoricalSteps * m_rgCatFiles[
DATA_TYPE.KNOWN_CATEGORICAL].Fields;
1156 m_rgBatchBuffers.Add(OUTPUT_TYPE.HISTORICAL_CATEGORICAL,
new float[nLen]);
1157 nLen = nBatchSize * m_nFutureSteps * m_rgCatFiles[
DATA_TYPE.KNOWN_CATEGORICAL].Fields;
1158 m_rgBatchBuffers.Add(OUTPUT_TYPE.FUTURE_CATEGORICAL,
new float[nLen]);
1161 if (File.Exists(m_rgstrFiles[
DATA_TYPE.STATIC_NUMERIC]))
1165 m_rgNumFiles.Add(
DATA_TYPE.STATIC_NUMERIC, npyStatNum);
1166 m_rgNumData.Add(
DATA_TYPE.STATIC_NUMERIC,
new List<
float[]>());
1169 nLen = nBatchSize * m_rgNumFiles[
DATA_TYPE.STATIC_NUMERIC].Fields;
1170 m_rgBatchBuffers.Add(OUTPUT_TYPE.STATIC_NUMERIC,
new float[nLen]);
1173 if (File.Exists(m_rgstrFiles[
DATA_TYPE.STATIC_CATEGORICAL]))
1177 m_rgCatFiles.Add(
DATA_TYPE.STATIC_CATEGORICAL, npyStatCat);
1178 m_rgCatData.Add(
DATA_TYPE.STATIC_CATEGORICAL,
new List<
long[]>());
1181 nLen = nBatchSize * m_rgCatFiles[
DATA_TYPE.STATIC_CATEGORICAL].Fields;
1182 m_rgBatchBuffers.Add(OUTPUT_TYPE.STATIC_CATEGORICAL,
new float[nLen]);
1186 public override void Close()
1198 m_rgCatFiles.Clear();
1199 m_rgNumFiles.Clear();
1200 m_rgCatData.Clear();
1201 m_rgNumData.Clear();
1202 m_rgBatchBuffers.Clear();
1203 m_rgBatchSync.Clear();
1207 private int getMaxRowIdx(
int nBatchSize)
1209 int nFields = m_rgFields[
DATA_TYPE.SYNC];
1210 int nCount = nBatchSize;
1212 for (
int i=m_rgCatData[
DATA_TYPE.SYNC].Count-1; i>=0; i--)
1214 nCount -= m_rgCatData[
DATA_TYPE.SYNC][i].Length / nFields;
1222 public bool Load(
int nRowIdx, out
bool bGoodData)
1226 if (nRowIdx >= m_nRows)
1229 int nStartIdx = m_schema.Lookups[0][nRowIdx].ValidRangeStartIndex;
1230 int nEndIdx = m_schema.Lookups[0][nRowIdx].ValidRangeEndIndex;
1231 int nFields = m_rgFields[
DATA_TYPE.SYNC];
1232 if (nStartIdx < 0 || nEndIdx < 0 || (nEndIdx - nStartIdx) < (m_nHistoricalSteps + m_nFutureSteps))
1235 Dictionary<DATA_TYPE, long[]> cat =
new Dictionary<DATA_TYPE, long[]>();
1238 int nStartIdx1 = (kvp.Key ==
DATA_TYPE.STATIC_CATEGORICAL) ? 0 : nStartIdx;
1239 int nEndIdx1 = (kvp.Key ==
DATA_TYPE.STATIC_CATEGORICAL) ? 0 : nEndIdx;
1240 long[] rgBuffer =
null;
1241 rgBuffer = kvp.Value.LoadRow(rgBuffer, nRowIdx, nStartIdx1, (nEndIdx1 - nStartIdx1) + 1);
1242 cat.Add(kvp.Key, rgBuffer);
1243 if (rgBuffer ==
null)
1247 Dictionary<DATA_TYPE, float[]> num =
new Dictionary<DATA_TYPE, float[]>();
1250 int nStartIdx1 = (kvp.Key ==
DATA_TYPE.STATIC_NUMERIC) ? 0 : nStartIdx;
1251 int nEndIdx1 = (kvp.Key ==
DATA_TYPE.STATIC_NUMERIC) ? 0 : nEndIdx;
1252 float[] rgBuffer =
null;
1253 rgBuffer = kvp.Value.LoadRow(rgBuffer, nRowIdx, nStartIdx1, (nEndIdx1 - nStartIdx1) + 1);
1254 num.Add(kvp.Key, rgBuffer);
1255 if (rgBuffer ==
null)
1259 foreach (KeyValuePair<
DATA_TYPE,
long[]> kvp
in cat)
1261 m_rgCatData[kvp.Key].Add(kvp.Value);
1264 foreach (KeyValuePair<
DATA_TYPE,
float[]> kvp
in num)
1266 m_rgNumData[kvp.Key].Add(kvp.Value);
1269 m_validRanges.Add(m_schema.Lookups[0][nRowIdx]);
1276 public override bool Add(DataNpy<T> data,
int nMaxLoad)
1278 bool bRefreshed =
false;
1282 foreach (KeyValuePair<
DATA_TYPE, List<
float[]>> kv
in data.m_rgNumData)
1284 if (!m_rgNumData.ContainsKey(kv.Key))
1285 m_rgNumData.Add(kv.Key,
new List<
float[]>());
1287 m_rgNumData[kv.Key].AddRange(kv.Value);
1288 data.m_rgNumData[kv.Key].Clear();
1290 while (m_rgNumData[kv.Key].Count > nMaxLoad)
1292 m_rgNumData[kv.Key].RemoveAt(0);
1297 foreach (KeyValuePair<
DATA_TYPE, List<
long[]>> kv
in data.m_rgCatData)
1299 if (!m_rgCatData.ContainsKey(kv.Key))
1300 m_rgCatData.Add(kv.Key,
new List<
long[]>());
1302 m_rgCatData[kv.Key].AddRange(kv.Value);
1303 data.m_rgCatData[kv.Key].Clear();
1305 while (m_rgCatData[kv.Key].Count > nMaxLoad)
1307 m_rgCatData[kv.Key].RemoveAt(0);
1311 foreach (KeyValuePair<DATA_TYPE, int> kv
in data.m_rgFields)
1313 if (!m_rgFields.ContainsKey(kv.Key))
1314 m_rgFields.Add(kv.Key, kv.Value);
1317 foreach (KeyValuePair<OUTPUT_TYPE,
long[]> kv
in data.m_rgBatchSync)
1319 m_rgBatchSync.Add(kv.Key, kv.Value);
1321 data.m_rgBatchSync.Clear();
1323 foreach (KeyValuePair<OUTPUT_TYPE,
float[]> kv
in data.m_rgBatchBuffers)
1325 m_rgBatchBuffers.Add(kv.Key, kv.Value);
1327 data.m_rgBatchBuffers.Clear();
1329 m_validRanges.Add(data.m_validRanges);
1330 data.m_validRanges.Clear();
1332 m_schema = data.m_schema;
1333 m_nBatchSize = data.m_nBatchSize;
1334 m_nMaxRowIdx = getMaxRowIdx(m_nBatchSize);
1335 m_nRows = m_rgCatData[
DATA_TYPE.SYNC].Count;
1336 m_nTargetFieldIdx = data.m_nTargetFieldIdx;
1337 int nFields = m_rgFields[
DATA_TYPE.SYNC];
1338 m_nTotalSize = m_rgCatData[
DATA_TYPE.SYNC].Sum(p => p.Length) / (m_nHistoricalSteps + m_nFutureSteps) * nFields;
1344 public override int[] GetShape(OUTPUT_TYPE ot)
1350 case OUTPUT_TYPE.STATIC_NUMERIC:
1351 if (m_rgFields.ContainsKey(
DATA_TYPE.STATIC_NUMERIC))
1352 return new int[] { m_nBatchSize, m_rgFields[
DATA_TYPE.STATIC_NUMERIC] };
1355 case OUTPUT_TYPE.STATIC_CATEGORICAL:
1356 if (m_rgFields.ContainsKey(
DATA_TYPE.STATIC_CATEGORICAL))
1357 return new int[] { m_nBatchSize, m_rgFields[
DATA_TYPE.STATIC_CATEGORICAL] };
1360 case OUTPUT_TYPE.HISTORICAL_NUMERIC:
1362 if (m_rgFields.ContainsKey(
DATA_TYPE.OBSERVED_NUMERIC))
1363 nFields += m_schema.Data.ObservedNumExplicitCount;
1364 if (m_rgFields.ContainsKey(
DATA_TYPE.KNOWN_NUMERIC))
1365 nFields += m_rgFields[
DATA_TYPE.KNOWN_NUMERIC];
1367 return new int[] { m_nBatchSize, m_nHistoricalSteps, nFields };
1370 case OUTPUT_TYPE.HISTORICAL_CATEGORICAL:
1372 if (m_rgFields.ContainsKey(
DATA_TYPE.OBSERVED_CATEGORICAL))
1373 nFields += m_rgFields[
DATA_TYPE.OBSERVED_CATEGORICAL];
1374 if (m_rgFields.ContainsKey(
DATA_TYPE.KNOWN_CATEGORICAL))
1375 nFields += m_rgFields[
DATA_TYPE.KNOWN_CATEGORICAL];
1377 return new int[] { m_nBatchSize, m_nHistoricalSteps, nFields };
1380 case OUTPUT_TYPE.FUTURE_NUMERIC:
1381 if (m_rgFields.ContainsKey(
DATA_TYPE.KNOWN_NUMERIC))
1382 return new int[] { m_nBatchSize, m_nFutureSteps, m_rgFields[
DATA_TYPE.KNOWN_NUMERIC] };
1385 case OUTPUT_TYPE.FUTURE_CATEGORICAL:
1386 if (m_rgFields.ContainsKey(
DATA_TYPE.KNOWN_CATEGORICAL))
1387 return new int[] { m_nBatchSize, m_nFutureSteps, m_rgFields[
DATA_TYPE.KNOWN_CATEGORICAL] };
1390 case OUTPUT_TYPE.TARGET:
1391 return new int[] { m_nBatchSize, m_nFutureSteps, 1 };
1393 case OUTPUT_TYPE.HISTORICAL_TARGET:
1394 return new int[] { m_nBatchSize, m_nHistoricalSteps, 1 };
1397 throw new Exception(
"Unknown output type '" + ot.ToString() +
"'!");
1403 private void stepNext()
1407 m_nRowIdx = m_random.Next(m_validRanges.Count);
1409 int nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1411 while (nRetry < 5 && nValidRangeCount < (m_nHistoricalSteps + m_nFutureSteps))
1413 m_nRowIdx = m_random.Next(m_validRanges.Count);
1414 nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1419 throw new Exception(
"Could not find a row with more than " + (m_nHistoricalSteps + m_nFutureSteps).ToString() +
" valid ranges!");
1421 m_nColIdx = m_random.Next(nValidRangeCount - (m_nHistoricalSteps + m_nFutureSteps));
1426 int nValidRangeCount = m_validRanges[m_nRowIdx].ValidRangeCount;
1427 if (m_nColIdx + m_nHistoricalSteps + m_nFutureSteps > nValidRangeCount)
1430 if (m_nRowIdx >= m_nMaxRowIdx)
1438 private float[] getBatch(OUTPUT_TYPE ot)
1440 if (!m_rgBatchBuffers.ContainsKey(ot))
1443 return m_rgBatchBuffers[ot];
1446 private bool loadSyncBatch(
int nIdx,
long[] rg,
int nStartIdx,
int nCount)
1451 int nStartIdx1 = m_nColIdx + nStartIdx;
1452 int nFields = m_rgFields[
DATA_TYPE.SYNC];
1453 long[] rgSrc = m_rgCatData[
DATA_TYPE.SYNC][m_nRowIdx];
1455 if (nStartIdx1 * nFields + nCount * nFields > rgSrc.Length)
1458 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1463 private void loadStaticCatBatch(
int nIdx,
float[] rg,
DATA_TYPE dt)
1468 int nFields = m_rgFields[dt];
1469 long[] rgSrc = m_rgCatData[dt][m_nRowIdx];
1471 Array.Copy(rgSrc, 0, rg, nIdx * nFields, nFields);
1474 private void loadStaticNumBatch(
int nIdx,
float[] rg,
DATA_TYPE dt)
1479 int nFields = m_rgFields[dt];
1480 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1482 Array.Copy(rgSrc, 0, rg, nIdx * nFields, nFields);
1485 private void loadCatBatch(
int nIdx,
float[] rg,
int nStartIdx,
int nCount,
DATA_TYPE dt)
1490 int nStartIdx1 = m_nColIdx + nStartIdx;
1491 int nFields = m_rgFields[dt];
1492 long[] rgSrc = m_rgCatData[dt][m_nRowIdx];
1493 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1496 private void loadCatBatch(
int nIdx,
float[] rg,
int nStartIdx,
int nCount,
DATA_TYPE dt1,
DATA_TYPE dt2)
1501 int nStartIdx1 = m_nColIdx + nStartIdx;
1502 int nFields1 = (m_rgFields.ContainsKey(dt1)) ? m_rgFields[dt1] : 0;
1503 long[] rgSrc1 = (m_rgFields.ContainsKey(dt1)) ? m_rgCatData[dt1][m_nRowIdx] :
null;
1504 int nFields2 = (m_rgFields.ContainsKey(dt2)) ? m_rgFields[dt2] : 0;
1505 long[] rgSrc2 = (m_rgFields.ContainsKey(dt2)) ? m_rgCatData[dt2][m_nRowIdx] :
null;
1506 int nFields = nFields1 + nFields2;
1508 for (
int j = nStartIdx1; j < nStartIdx1 + nCount; j++)
1510 for (
int k = 0; k < nFields1; k++)
1512 int nSrcIdx = j * nFields1 + k;
1513 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + k;
1514 rg[nDstIdx] = rgSrc1[nSrcIdx];
1516 for (
int k = 0; k < nFields2; k++)
1518 int nSrcIdx = j * nFields2 + k;
1519 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + k + nFields1;
1520 rg[nDstIdx] = rgSrc2[nSrcIdx];
1525 private void loadNumBatch(
int nIdx,
float[] rg,
int nStartIdx,
int nCount,
DATA_TYPE dt)
1530 int nStartIdx1 = m_nColIdx + nStartIdx;
1531 int nFields = m_rgFields[dt];
1532 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1533 Array.Copy(rgSrc, nStartIdx1 * nFields, rg, nIdx * nCount * nFields, nCount * nFields);
1536 private void loadNumBatch(
int nIdx,
float[] rg,
int nStartIdx,
int nCount,
int nFieldIdx,
DATA_TYPE dt)
1541 int nStartIdx1 = m_nColIdx + nStartIdx;
1542 int nFields = m_rgFields[dt];
1543 float[] rgSrc = m_rgNumData[dt][m_nRowIdx];
1545 for (
int i = 0; i < nCount; i++)
1547 int nSrcIdx = nStartIdx1 * nFields + i * nFields + nFieldIdx;
1548 int nDstIdx = nIdx * nCount + i;
1550 rg[nDstIdx] = rgSrc[nSrcIdx];
1557 return m_rgFields[dt];
1559 return m_schema.Data.ObservedNumExplicitCount;
1562 private void loadNumBatch(
int nIdx,
float[] rg,
int nStartIdx,
int nCount,
DATA_TYPE dt1,
DATA_TYPE dt2)
1567 int nStartIdx1 = m_nColIdx + nStartIdx;
1568 int nFields1Explicit = m_rgFields.ContainsKey(dt1) ? getNumFields(dt1) : 0;
1569 int nFields1 = (m_rgFields.ContainsKey(dt1)) ? m_rgFields[dt1] : 0;
1570 float[] rgSrc1 = (m_rgFields.ContainsKey(dt1)) ? m_rgNumData[dt1][m_nRowIdx] :
null;
1571 int nFields2 = (m_rgFields.ContainsKey(dt2)) ? m_rgFields[dt2] : 0;
1572 float[] rgSrc2 = (m_rgFields.ContainsKey(dt2)) ? m_rgNumData[dt2][m_nRowIdx] :
null;
1573 int nFields = nFields1Explicit + nFields2;
1575 for (
int j = nStartIdx1; j < nStartIdx1 + nCount; j++)
1577 int nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields;
1578 int nDstIdx1 = nDstIdx;
1580 for (
int k = 0; k < nFields1; k++)
1582 int nSrcIdx = j * nFields1 + k;
1584 if (m_schema.Data.IsObservedNum(k))
1586 rg[nDstIdx1] = rgSrc1[nSrcIdx];
1591 for (
int k = 0; k < nFields2; k++)
1593 int nSrcIdx = j * nFields2 + k;
1594 nDstIdx = nIdx * nCount * nFields + (j - nStartIdx1) * nFields + nFields1Explicit + k;
1595 rg[nDstIdx] = rgSrc2[nSrcIdx];
1600 public override int[,] LoadBatch(
int nBatchSize,
BlobCollection<T> col,
bool bEnableDebug,
string strDebugPath)
1604 long[] rgHistSync = m_rgBatchSync[OUTPUT_TYPE.HISTORICAL_SYNC];
1605 long[] rgFutSync = m_rgBatchSync[OUTPUT_TYPE.FUTURE_SYNC];
1606 float[] rgStatCat = getBatch(OUTPUT_TYPE.STATIC_CATEGORICAL);
1607 float[] rgStatNum = getBatch(OUTPUT_TYPE.STATIC_NUMERIC);
1608 float[] rgHistCat = getBatch(OUTPUT_TYPE.HISTORICAL_CATEGORICAL);
1609 float[] rgHistNum = getBatch(OUTPUT_TYPE.HISTORICAL_NUMERIC);
1610 float[] rgFutCat = getBatch(OUTPUT_TYPE.FUTURE_CATEGORICAL);
1611 float[] rgFutNum = getBatch(OUTPUT_TYPE.FUTURE_NUMERIC);
1612 float[] rgTarget = getBatch(OUTPUT_TYPE.TARGET);
1613 float[] rgHistTarget = getBatch(OUTPUT_TYPE.HISTORICAL_TARGET);
1615 for (
int i = 0; i < nBatchSize; i++)
1617 if (loadSyncBatch(i, rgHistSync, 0, m_nHistoricalSteps) &&
1618 loadSyncBatch(i, rgFutSync, m_nHistoricalSteps, m_nFutureSteps))
1620 loadStaticCatBatch(i, rgStatCat,
DATA_TYPE.STATIC_CATEGORICAL);
1621 loadStaticNumBatch(i, rgStatNum,
DATA_TYPE.STATIC_NUMERIC);
1623 loadCatBatch(i, rgHistCat, 0, m_nHistoricalSteps,
DATA_TYPE.OBSERVED_CATEGORICAL,
DATA_TYPE.KNOWN_CATEGORICAL);
1624 loadNumBatch(i, rgHistNum, 0, m_nHistoricalSteps,
DATA_TYPE.OBSERVED_NUMERIC,
DATA_TYPE.KNOWN_NUMERIC);
1626 loadCatBatch(i, rgFutCat, m_nHistoricalSteps, m_nFutureSteps,
DATA_TYPE.KNOWN_CATEGORICAL);
1627 loadNumBatch(i, rgFutNum, m_nHistoricalSteps, m_nFutureSteps,
DATA_TYPE.KNOWN_NUMERIC);
1629 loadNumBatch(i, rgHistTarget, 0, m_nHistoricalSteps, m_nTargetFieldIdx,
DATA_TYPE.OBSERVED_NUMERIC);
1630 loadNumBatch(i, rgTarget, m_nHistoricalSteps, m_nFutureSteps, m_nTargetFieldIdx,
DATA_TYPE.OBSERVED_NUMERIC);
1636 if (rgStatNum !=
null)
1639 if (rgStatCat !=
null)
1642 if (rgHistNum !=
null)
1645 if (rgHistCat !=
null)
1648 if (rgFutNum !=
null)
1651 if (rgFutCat !=
null)
1654 if (rgTarget !=
null)
1657 if (rgHistTarget !=
null)
1662 if (Directory.Exists(strDebugPath))
1720 BatchPerf[] m_rgBatchPerf =
new BatchPerf[2];
1721 int m_nSelectIdx = 0;
1723 int m_nSelectFrequency = 1;
1724 int m_nSelectCount = 0;
1726 double m_dfPctTopSelectionPct = 0.25;
1727 bool m_bActive =
false;
1729 public BatchPerfSet(Random rand,
double dfPctTopSelectionPct,
int nMax,
int nSelectFrequency)
1731 m_rgBatchPerf[0] =
new BatchPerf(nMax, dfPctTopSelectionPct);
1732 m_rgBatchPerf[1] =
new BatchPerf(nMax, dfPctTopSelectionPct);
1733 m_nSelectFrequency = nSelectFrequency;
1734 m_dfPctTopSelectionPct = dfPctTopSelectionPct;
1738 public bool Add(
LossArgs e,
int[,] rg)
1740 if (m_rgBatchPerf[m_nLoadIdx].Add(e, rg))
1742 if (m_nLoadIdx == 0)
1744 m_rgBatchPerf[1].Clear();
1750 m_rgBatchPerf[0].Clear();
1765 public bool IsActive
1767 get {
return m_bActive; }
1770 public bool Select(ref
int? nIdx1, ref
int? nIdx2)
1773 if (m_nSelectCount % m_nSelectFrequency == 0)
1774 return m_rgBatchPerf[m_nSelectIdx].Select(m_random, m_dfPctTopSelectionPct, ref nIdx1, ref nIdx2);
1783 int m_nLastSortCount;
1784 double m_dfTopSelectionPct;
1785 List<Tuple<float, int, int>> m_rgPerformanceItems =
new List<Tuple<float, int, int>>();
1787 public BatchPerf(
int nMax,
double dfPctTopSelectionPct)
1789 m_rgPerformanceItems =
new List<Tuple<float, int, int>>(nMax + 1);
1790 m_dfTopSelectionPct = dfPctTopSelectionPct;
1792 m_nLastSortCount = (int)(nMax * dfPctTopSelectionPct);
1795 public bool Add(
LossArgs e,
int[,] rg)
1797 bool bAtMax =
false;
1799 for (
int i = 0; i < e.
Data.Length; i++)
1801 if (rg[i,0] == -1 || rg[i, 1] == -1)
1804 m_rgPerformanceItems.Add(
new Tuple<float, int, int>(e.
Data[i], rg[i,0], rg[i,1]));
1807 if (m_rgPerformanceItems.Count > m_nMax)
1809 m_rgPerformanceItems.RemoveAt(0);
1819 m_rgPerformanceItems.Clear();
1824 m_rgPerformanceItems = m_rgPerformanceItems.OrderByDescending(p => p.Item1).ToList();
1825 m_nLastSortCount = (int)(m_nMax * m_dfTopSelectionPct);
1828 public bool Select(Random rand,
double dfPct, ref
int? nIdx1, ref
int? nIdx2)
1830 if (m_rgPerformanceItems.Count < m_nMax)
1833 if (m_nLastSortCount <= 0)
1836 int nCount = (int)(m_rgPerformanceItems.Count * dfPct);
1837 int nIdx = rand.Next(nCount);
1839 nIdx1 = m_rgPerformanceItems[nIdx].Item2;
1840 nIdx2 = m_rgPerformanceItems[nIdx].Item3;
1845#pragma warning restore 1591
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
CancelEvent()
The CancelEvent constructor.
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Log(string strSrc)
The Log constructor.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
The LossArgs contains the loss values for a given batch.
float[] Data
Specifies the loss values for a given batch.
Specifies a key-value pair of properties.
void SetProperty(string strName, string strVal)
Sets a property in the property set to a value if it exists, otherwise it adds the new property.
The SettingsCaffe defines the settings used by the MyCaffe CaffeControl.
int DbLoadLimit
Get/set the image database load limit.
DB_LOAD_METHOD DbLoadMethod
Get/set the image database loading method.
The SimpleTemporalDatumCollection manages a collection of SimpleTemporalDatum objects.
The SimpleTemporalDatum is used to return temporal data
float[] Data
Get/set the data which is of length (Channels * Width * Height).
The Utility class provides general utility funtions.
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
int ID
Get/set the database ID of the item.
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
SourceDescriptor TrainingSource
Get/set the training data source.
SourceDescriptor TestingSource
Get/set the testing data source.
The SourceDescriptor class contains all information describing a data source.
TemporalDescriptor TemporalDescriptor
Get/set the temporal descriptor (if any).
List< ValueStreamDescriptor > ValueStreamDescriptors
Returns the value stream descriptor.
The value stream descriptor describes a single value stream within a value item.
STREAM_CLASS_TYPE ClassType
Returns the value stream class type.
STREAM_VALUE_TYPE ValueType
Returns the value stream value type.
STREAM_CLASS_TYPE
Defines the stream class type.
STREAM_VALUE_TYPE
Defines the stream value type.
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The NumpyFile reads data from a numpy file in the base type specified.
void OpenRead(string strFile)
Open the numpy file for reading, and read in the header information.
int Rows
Returns the number of rows.
int Fields
Returns the number of fields per column item.
[Temporal Database] The MyCaffeTemporalDatabase provides an enhanced in-memory temporal database used...
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
Phase m_phase
Specifies the Phase under which the Layer is run.
LayerParameter.LayerType m_type
Specifies the Layer type.
LayerParameter layer_param
Returns the LayerParameter for this Layer.
The LossLayer provides an interface for Layer's that take two blobs as input – usually (1) prediction...
EventHandler< LossArgs > OnLoss
Specifies the loss event called on each learning cycle.
The DataTemporalLayer implements the data layer used to load the temporal data into the model.
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the top (output) blobs.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Not implemented - data Layers do not perform backward.
DataTemporalLayer(CudaDnn< T > cuda, Log log, LayerParameter p, CancelEvent evtCancel, IXDatabaseBase db)
The constructor.
override void dispose()
Releases all GPU and host resources used by the Layer.
override int MinTopBlobs
Returns the exact number of required top (output) Blobs: static_numeric, static_categorical,...
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override int? MaxTopBlobs
Returns the exact number of required top (output) Blobs: static_numeric, static_categorical,...
override void ConnectLoss(LossLayer< T > layer)
Connect the loss layer to the data layer so that we can rank the data values.
override int ExactNumBottomBlobs
The data layer has no bottom blobs.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Forward computation
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
The RawData class is the base class for all raw data types.
RawData(uint? nSeed, bool bOutputTargetHistorical)
The constructor.
int m_nBatchSize
Specifies the batch size.
Data< T > m_data
Specifies the base data object used to store data blocks loaded from disk or database.
bool m_bOutputTargetHistorical
Specifies to output the target historical data.
virtual int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection< T > col, bool bEnableDebug=false, string strDebugPath=null)
Loads a batch of data items into the BlobCollection.
virtual int GetTotalSize()
Returns the total size of the data.
Random m_random
Specifies the random number generator used to shuffle the data.
virtual bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
Random Random
Specifies the random number generator used.
virtual void loadDataFunction(object obj)
The virtual load data function override by the derived class to load the data in the background.
virtual void Add(LossArgs e, int[,] rgIdx)
Adds the selected indexes along with the loss data (used by the BatchPerfSet to select worst cases).
virtual int[] GetShape(DataNpy< T >.OUTPUT_TYPE ot)
Returns the shape of a given output type.
The RawFileData object is used to load raw NPY file data.
override bool LoadData(Phase phase, string strPath, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
RawFileData(uint? nSeed, bool bOutputTargetHistorical)
The constructor.
override void loadDataFunction(object obj)
The virtual load data function override by the derived class to load the data in the background.
void VerifyFiles(Phase phase, string strPath)
Verify that the data files exist.
The RawSqlData class loads data from a database.
override int[] GetShape(DataNpy< T >.OUTPUT_TYPE ot)
Return the shape of the OUTPUT_TYPE.
RawSqlData(uint? nSeed, bool bOutputTargetHistorical, IXTemporalDatabaseBase db, Log log)
The constructor.
override int GetTotalSize()
Return the total number of blocks available in the current phase.
override bool LoadData(Phase phase, string strDataset, bool bShuffleData, int nBatchSize, int nHistoricalSteps, int nFutureSteps, double dfPctMaxLoad, int nDripRefreshRateInSec, uint nChunkCount, Log log, CancelEvent evtCancel)
Loads all data values for the phase specified.
override int[,] LoadBatch(Phase phase, int nBatchSize, BlobCollection< T > col, bool bEnableDebug=false, string strDebugPath=null)
Load a batch of data to feed into the network.
override void Add(LossArgs e, int[,] rgIdx)
Add the loss data for the batch into the performance data later used to select the worst cases.
Specifies the base parameter for all layers.
DataTemporalParameter data_temporal_param
Returns the parameter set when initialized with LayerType.DATA_TEMPORAL
LayerType
Specifies the layer type.
Specifies the parameters for the DataTemporalLayer (used in TFT models).
bool shuffle_data
Specifies to randomly select from the data (default = true).
SOURCE_TYPE
Defines the type of source data.
bool enable_debug_output
Optionally, specifies to output debug information (slower) on each pass.
string debug_output_path
Specifies the debug output path where debug images are placed when enable_debug_output = true.
virtual uint batch_size
Specifies the batch size of the data.
int drip_refresh_rate_in_sec
Specifies rate the drip refresh occurs in seconds (default = 0, disabled).
uint num_historical_steps
Specifies the number of historical steps
uint num_future_steps
Specifies the number of future steps
double max_load_percent
Specifies the maximum percent of data rows to load (default = 1.0 = 100%).
Phase? forced_phase
Optionally, specifies the phase to use when loading data.
string source
Specifies the data source.
uint chunk_count
Specifies the number of items to load per cycle when background loading (default = 1024).
bool output_target_historical
Optionally, specifies to output a top containing the target historical data.
uint? seed
Specifies the random seed used to shuffle the data. When not specified, the default seed is used.
SOURCE_TYPE source_type
Specifies the type of source data.
The IXDatabaseBase interface defines the general interface to the in-memory database.
Teh IXTemporalDatabaseBase interface defines the general interface to the in-memory temporal database...
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
DB_ITEM_SELECTION_METHOD
Defines the item (e.g., image or temporal item) selection method.
DB_LOAD_METHOD
Defines how to laod the items into the in-memory database.
Phase
Defines the Phase under which to run a Net.
DB_LABEL_SELECTION_METHOD
Defines the label selection method.
DATA_TYPE
Defines the gym data type.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
@ TARGET
The blob contains target data.
The MyCaffe.db.temporal namespace contains all classes used to create the MyCaffeTemporalDatabase in-...
The MyCaffe.layers.tft namespace contains all TFT related layers.
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...