6using System.Collections.Generic;
22 bool m_bEnableTrace =
false;
24 string m_strTestImagesBin;
25 string m_strTestLabelsBin;
26 string m_strTrainImagesBin;
27 string m_strTrainLabelsBin;
39 public event EventHandler<ProgressArgs>
OnError;
55 m_bEnableTrace = bEnableTrace;
56 m_strDataPath = strDataPath;
64 get {
return m_strTestImagesBin; }
72 get {
return m_strTestLabelsBin; }
80 get {
return m_strTrainImagesBin; }
88 get {
return m_strTrainLabelsBin; }
96 get {
return m_nChannels; }
104 get {
return m_nHeight; }
112 get {
return m_nWidth; }
121 Trace.WriteLine(
"Unpacking the files");
122 m_strTestImagesBin = expandFile(m_strDataPath.TrimEnd(
'\\') +
"\\t10k-images-idx3-ubyte.gz");
123 m_strTestLabelsBin = expandFile(m_strDataPath.TrimEnd(
'\\') +
"\\t10k-labels-idx1-ubyte.gz");
124 m_strTrainImagesBin = expandFile(m_strDataPath.TrimEnd(
'\\') +
"\\train-images-idx3-ubyte.gz");
125 m_strTrainLabelsBin = expandFile(m_strDataPath.TrimEnd(
'\\') +
"\\train-labels-idx1-ubyte.gz");
128 private string expandFile(
string strFile)
130 string strDstFile = strFile +
".bin";
131 if (File.Exists(strDstFile))
134 FileInfo fi =
new FileInfo(strFile);
136 using (FileStream fs = fi.OpenRead())
138 using (FileStream fsBin = File.Create(strDstFile))
140 using (GZipStream decompStrm =
new GZipStream(fs, CompressionMode.Decompress))
142 decompStrm.CopyTo(fsBin);
155 public void ExtractImages(out List<Tuple<
byte[],
int>> rgTrainingData, out List<Tuple<
byte[],
int>> rgTestingData)
164 reportProgress(nIdx, nTotal,
"Creating MNIST images...");
166 rgTrainingData = loadFile(m_strTrainImagesBin, m_strTrainLabelsBin, m_strDataPath.TrimEnd(
'\\') +
"\\images_training");
167 rgTestingData = loadFile(m_strTestImagesBin, m_strTestLabelsBin, m_strDataPath.TrimEnd(
'\\') +
"\\images_testing");
169 catch (Exception excpt)
171 reportError(0, 0, excpt);
176 private List<Tuple<byte[], int>> loadFile(
string strImagesFile,
string strLabelsFile,
string strExportPath)
178 if (!Directory.Exists(strExportPath))
179 Directory.CreateDirectory(strExportPath);
181 Stopwatch sw =
new Stopwatch();
183 reportProgress(0, 0,
" loading " + strImagesFile +
"...");
187 List<Tuple<byte[], int>> rgData =
new List<Tuple<byte[], int>>();
195 if (magicImg != 2051)
196 throw new Exception(
"Incorrect image file magic.");
198 if (magicLbl != 2049)
199 throw new Exception(
"Incorrect label file magic.");
204 if (num_items != num_labels)
205 throw new Exception(
"The number of items must be equal to the number of labels!");
212 m_nHeight = (int)rows;
213 m_nWidth = (int)cols;
219 string strAction =
"loading";
221 reportProgress(0, (
int)num_items,
" " + strAction +
" a total of " + num_items.ToString() +
" items.");
222 reportProgress(0, (
int)num_items,
" (with rows: " + rows.ToString() +
", cols: " + cols.ToString() +
")");
226 for (
int i = 0; i < num_items; i++)
228 rgPixels = image_file.
ReadBytes((
int)(rows * cols));
231 rgData.Add(
new Tuple<
byte[],
int>(rgPixels, rgLabel[0]));
233 if (sw.Elapsed.TotalMilliseconds > 1000)
235 reportProgress(i, (
int)num_items,
" " + strAction +
" data...");
240 reportProgress((
int)num_items, (
int)num_items,
" " + strAction +
" completed.");
251 private void reportProgress(
int nIdx,
int nTotal,
string strMsg)
255 double dfPct = (nTotal == 0) ? 0 : (
double)nIdx / (double)nTotal;
256 Trace.WriteLine(
"(" + dfPct.ToString(
"P") +
") " + strMsg);
260 OnProgress(
this,
new ProgressArgs(
new ProgressInfo(nIdx, nTotal, strMsg)));
263 private void reportError(
int nIdx,
int nTotal, Exception err)
267 double dfPct = (nTotal == 0) ? 0 : (
double)nIdx / (double)nTotal;
268 Trace.WriteLine(
"(" + dfPct.ToString(
"P") +
") ERROR: " + err.Message);
272 OnError(
this,
new ProgressArgs(
new ProgressInfo(nIdx, nTotal,
"ERROR", err)));
The BinaryFile class is used to manage binary files used by the MNIST dataset creator.
UInt32 ReadUInt32()
Reads in a UINT32 and performs an endian swap.
byte[] ReadBytes(int nCount)
Reads bytes from the file.
virtual void Dispose(bool disposing)
Dispose if disposing, or ignore if already disposed.
The MnistDataLoader is used to extrac the MNIST dataset to disk and load the data into the training p...
int Channels
Return the image channel count (should = 1 for black and white images).
void ExtractImages(out List< Tuple< byte[], int > > rgTrainingData, out List< Tuple< byte[], int > > rgTestingData)
Extract the images from the .bin files and save to disk
void ExtractFiles(string strDstPath)
Extract the .gz files, expanding them to .bin files.
int Width
Return the image with.
MnistDataLoaderLite(string strDataPath, bool bEnableTrace=false)
The constructor.
string TrainLabelsBinFileName
Returns the train labels bin filename.
EventHandler< ProgressArgs > OnProgress
The OnProgress event fires during the creation process to show the progress.
string TrainImagesBinFileName
Returns the train images bin filename.
string TestLabelsBinFileName
Returns the test labels bin filename.
EventHandler< ProgressArgs > OnError
The OnError event fires when an error occurs.
int Height
Return the image height.
string TestImagesBinFileName
Returns the test images bin filename.
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...