3using System.Collections.Generic;
8using System.Threading.Tasks;
28 public Bucket(
double fMin,
double fMax)
56 public int Add(
double fVal,
bool bForce =
false)
76 get {
return m_nCount; }
84 get {
return m_fSum / m_nCount; }
92 get {
return m_fMin; }
100 get {
return m_fMax; }
108 get {
return m_fMin + (m_fMax - m_fMin) / 2.0; }
116 get {
return m_tag; }
117 set { m_tag = value; }
126 return "[" + m_fMin.ToString(
"N10") +
"," + m_fMax.ToString(
"N10") +
"]-> " + m_nCount.ToString(
"N0");
133 public void Save(BinaryWriter bw)
148 int nCount = br.ReadInt32();
149 double dfSum = br.ReadDouble();
150 double dfMin = br.ReadDouble();
151 double dfMax = br.ReadDouble();
166 List<Bucket> m_rgBuckets =
new List<Bucket>();
167 bool m_bIsDataReal =
false;
196 double fRange = fMax - fMin;
197 double fStep = fRange / (double)nCount;
200 for (
int i = 0; i < nCount; i++)
202 double dfMax = (i == nCount - 1) ? fMax : Math.Round(fVal + fStep, 9);
204 m_rgBuckets.Add(
new Bucket(fVal, dfMax));
208 m_bIsDataReal =
true;
219 for (
int i = 0; i < rgVocab.Count; i++)
221 int nVal = rgVocab[i];
227 m_bIsDataReal =
false;
236 m_bIsDataReal = bIsReal;
244 get {
return m_bIsDataReal; }
245 set { m_bIsDataReal = value; }
253 get {
return m_rgBuckets.Count; }
263 get {
return m_rgBuckets[nIdx]; }
275 for (
int i = 0; i < m_rgBuckets.Count; i++)
277 if (nMax < m_rgBuckets[i].
Count)
279 nMax = m_rgBuckets[i].
Count;
287 return m_rgBuckets[nMaxIdx];
296 if (m_rgBuckets.Count == 0)
297 return new Tuple<double, double>(0, 0);
299 return new Tuple<double, double>(m_rgBuckets[0].Minimum, m_rgBuckets[m_rgBuckets.Count - 1].Maximum);
307 public int Add(
double fVal)
309 for (
int i = 0; i < m_rgBuckets.Count; i++)
311 int nVal = m_rgBuckets[i].Add(fVal);
315 if (nVal < 0 && i == 0)
317 m_rgBuckets[i].Add(fVal,
true);
321 if (nVal == 1 && i == m_rgBuckets.Count - 1)
323 m_rgBuckets[i].Add(fVal,
true);
328 throw new Exception(
"Failed to find a bucket!");
338 int nThreshold = (int)(b.
Count * (1.0 - dfPct));
340 List<Bucket> rgBuckets =
new List<Bucket>();
341 foreach (
Bucket b1
in m_rgBuckets)
343 if (b1.
Count > nThreshold)
347 m_rgBuckets = rgBuckets;
357 for (
int i = 0; i < m_rgBuckets.Count; i++)
359 if (m_rgBuckets[i].Contains(fVal) == 0)
360 return m_rgBuckets[i].Average;
363 return m_rgBuckets[m_rgBuckets.Count - 1].Average;
373 for (
int i = 0; i < m_rgBuckets.Count; i++)
375 if (m_rgBuckets[i].Contains(dfVal) == 0)
379 return m_rgBuckets.Count - 1;
388 public double GetValueAt(
int nIdx,
bool bUseMidPoint =
false)
391 return m_rgBuckets[nIdx].MidPoint;
393 return m_rgBuckets[nIdx].Average;
414 Stopwatch sw =
new Stopwatch();
419 if (!dfMin.HasValue || !dfMax.HasValue)
421 dfMin =
double.MaxValue;
422 dfMax = -
double.MaxValue;
424 for (
int i = 0; i < nChannels; i++)
426 for (
int j = 0; j < nCount; j++)
429 dfMin = Math.Min(dfMin.Value, dfVal);
430 dfMax = Math.Max(dfMax.Value, dfVal);
433 if (sw.Elapsed.TotalMilliseconds > 1000)
435 if (evtCancel !=
null && evtCancel.
WaitOne(0))
438 double dfPct = (double)nIdx / (
double)nItemCount;
439 log.
WriteLine(
"Calculating min/max at " + dfPct.ToString(
"P") +
"...");
452 for (
int i = 0; i < nChannels; i++)
454 for (
int j = 0; j < nCount; j++)
460 if (sw.Elapsed.TotalMilliseconds > 1000)
462 if (evtCancel !=
null && evtCancel.
WaitOne(0))
465 double dfPct = (double)nIdx / (
double)nItemCount;
466 log.
WriteLine(strName +
" at " + dfPct.ToString(
"P") +
"...");
488 int nItemCount = rgrgData.Count * rgrgData[0].Length;
489 Stopwatch sw =
new Stopwatch();
493 for (
int i = 0; i < rgrgData.Count; i++)
495 for (
int j = 0; j < rgrgData[i].Length; j++)
497 double dfVal = rgrgData[i][j];
499 rgrgData[i][j] = dfNewVal;
501 if (evtCancel !=
null && evtCancel.
WaitOne(0))
504 if (sw.Elapsed.TotalMilliseconds > 1000)
506 double dfPct = (double)nIdx / (
double)nItemCount;
507 log.
WriteLine(strName +
" at " + dfPct.ToString(
"P") +
"...");
524 using (MemoryStream ms =
new MemoryStream())
525 using (BinaryWriter bw =
new BinaryWriter(ms))
527 bw.Write(m_bIsDataReal);
528 bw.Write(m_rgBuckets.Count);
530 for (
int i = 0; i < m_rgBuckets.Count; i++)
532 m_rgBuckets[i].Save(bw);
546 using (MemoryStream ms =
new MemoryStream())
547 using (BinaryReader br =
new BinaryReader(ms))
549 bool bIsReal = br.ReadBoolean();
551 int nCount = br.ReadInt32();
553 for (
int i = 0; i < nCount; i++)
556 col.m_rgBuckets.Add(b);
572 foreach (
Bucket b
in m_rgBuckets)
597 str +=
"MINIMUM, MAXIMUM, COUNT" + Environment.NewLine;
599 foreach (
Bucket b
in m_rgBuckets)
601 double dfPct = (dfTotalCount == 0) ? 0 : (
double)b.
Count / dfTotalCount;
606 strDots = strDots.PadRight((
int)(nMaxDots * dfPct),
'*');
607 str +=
"[" + b.
Minimum.ToString(strFmt) +
", " + b.
Maximum.ToString(strFmt) +
"] " + strDots +
" (" + b.
Count.ToString(
"N0") +
")";
609 if (bIncludePercents)
610 str +=
" " + (dfPct * 100).ToString(
"N4") +
"%";
612 str += Environment.NewLine;
616 str += b.
Minimum.ToString() +
"," + b.
Maximum.ToString() +
"," + b.
Count.ToString() + Environment.NewLine;
620 str += dfPct.ToString(
"P");
627 str = str.TrimEnd(
',');
640 return m_rgBuckets.GetEnumerator();
647 IEnumerator IEnumerable.GetEnumerator()
649 return m_rgBuckets.GetEnumerator();
The BucketCollection contains a set of Buckets.
int FindIndex(double dfVal)
Finds the index of the Bucket containing the value.
BucketCollection(List< int > rgVocab)
The constructor.
double Translate(double fVal)
Finds the Bucket associated with the value and returns the Bucket's average value.
BucketCollection(bool bIsReal)
The constructor.
Tuple< double, double > GetRange()
Returns the numeric range that all buckets fall into.
static BucketCollection Bucketize(string strName, int nBucketCount, SimpleDatum sd, Log log, CancelEvent evtCancel, double? dfMin=null, double? dfMax=null)
The Bucketize method adds all values within a SimpleDatum to a new BucketCollection.
int TotalCount
Returns the total count across all buckets.
int Count
Returns the number of Buckets.
static BucketCollection FromByteStream(byte[] rg)
Converts a byte stream into a BucketCollection.
bool UnBucketize(string strName, List< double[]> rgrgData, Log log, CancelEvent evtCancel)
The UnBucketize method converts all Data received into their respective Bucket average values.
OUTPUT_FMT
Specifies the output format used when creating a distribution string.
byte[] ToByteStream()
Converts the BucketCollection into a byte stream.
int Add(double fVal)
Finds the correct Bucket and adds the value to it.
bool IsDataReal
Get/set whether or not the Buckets hold Real values.
void Reduce(double dfPct)
Reduces the buckets to only include those that have a count that are within 1.0 - dfPct of the maximu...
BucketCollection(double fMin, double fMax, int nCount)
The constructor.
string ToDistributionString(OUTPUT_FMT fmt=OUTPUT_FMT.NONE, int nMaxDots=30, string strFmt="0.00000", bool bIncludePercents=false)
Returns the distribution of buckets as a percentage for each time a bucket was hit.
Bucket GetBucketWithMaxCount()
Returns the bucket with the highest count.
IEnumerator< Bucket > GetEnumerator()
Returns the enumerator used in foreach loops.
double GetValueAt(int nIdx, bool bUseMidPoint=false)
Returns the average of the Bucket at a given index.
The Bucket class contains the information describing a single range of values within a BucketCollecti...
static Bucket Load(BinaryReader br)
Load a Bucket from a BinaryReader.
object Tag
Get/set a user specified tag.
Bucket(double fMin, double fMax)
The constructor.
int Contains(double fVal)
Tests to see if the Bucket range contains the value.
int Count
Returns the number of items added to the Bucket.
override string ToString()
Returns a string representation of the Bucket.
int Add(double fVal, bool bForce=false)
Attempts to add a new value to the Bucket.
double Maximum
Returns the bucket maximum value.
double Average
Returns the average value of all values added to the Bucket.
void Save(BinaryWriter bw)
Save the Bucket to a BinaryWriter.
double Minimum
Returns the bucket minimum value.
double MidPoint
Returns the bucket midpoint.
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
The SimpleDatum class holds a data input within host memory.
int ItemCount
Returns the number of data items.
int Channels
Return the number of channels of the data.
double GetDataAtD(int nIdx)
Returns the item at a specified index in the double type.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
@ NONE
No training category specified.