3using System.Collections.Generic;
7using System.Runtime.InteropServices;
9using System.Threading.Tasks;
19 FileStream m_fs =
null;
20 BinaryReader m_br =
null;
28 int m_nFieldCount = 1;
29 Tuple<int, int> m_count;
30 Stopwatch m_sw =
new Stopwatch();
76 get {
return m_dataType; }
84 get {
return m_rgShape; }
92 get {
return m_nRows; }
100 get {
return m_nColumns; }
108 get {
return m_nFieldCount; }
119 for (
int i=0; i<m_rgShape.Length; i++)
121 nCount *= m_rgShape[i];
135 m_fs = File.OpenRead(strFile);
136 m_br =
new BinaryReader(m_fs);
138 BinaryReader br = m_br;
140 byte[] rgMagic =
new byte[6];
141 for (
int i = 0; i < rgMagic.Length; i++)
143 rgMagic[i] = br.ReadByte();
146 if (rgMagic[0] != 0x93 || rgMagic[1] != 0x4E || rgMagic[2] != 0x55 || rgMagic[3] != 0x4D || rgMagic[4] != 0x50 || rgMagic[5] != 0x59)
147 throw new Exception(
"The file is not a valid Numpy file!");
149 byte bMajor = br.ReadByte();
150 byte bMinor = br.ReadByte();
152 if (bMajor != 1 || bMinor != 0)
153 throw new Exception(
"The file is not a valid Numpy file!");
155 byte bHeaderLen1 = br.ReadByte();
156 byte bHeaderLen2 = br.ReadByte();
157 int nHeaderLen = bHeaderLen2 << 8 | bHeaderLen1;
159 byte[] rgHeader =
new byte[nHeaderLen];
160 for (
int i = 0; i < rgHeader.Length; i++)
162 rgHeader[i] = br.ReadByte();
164 string strHeader = Encoding.ASCII.GetString(rgHeader);
167 m_count = parseHeaderEx(strHeader, out bFortranOrder, out m_rgShape, out m_dataType, out m_nDataTypeSize);
170 throw new Exception(
"Currently the fortran ordering is not supported");
174 m_nRows = m_rgShape[0];
175 m_nColumns = (m_rgShape.Length == 1) ? 1 : m_rgShape[1];
177 for (
int i=0; i<m_rgShape.Length; i++)
179 m_nCount *= m_rgShape[i];
182 m_nFieldCount *= m_rgShape[i];
185 m_nHeaderSize = m_fs.Position;
188 private static Tuple<int, int> parseHeaderEx(
string str, out
bool bFortranOrder, out
int[] rgShape, out Type dataType, out
int nDataTypeSize,
int nMax =
int.MaxValue)
192 List<int> rgShape1 =
new List<int>();
193 str = str.Trim(
'{',
'}',
' ',
'\n',
',');
195 dataType = typeof(
object);
198 string strShape =
null;
199 string strTarget =
"'shape':";
200 int nPos = str.IndexOf(strTarget);
203 strShape = str.Substring(nPos + strTarget.Length);
204 str = str.Substring(0, nPos);
206 nPos = strShape.IndexOf(
')');
207 str += strShape.Substring(nPos + 1);
208 str = str.Trim(
',',
' ');
210 strShape = strShape.Substring(0, nPos);
211 strShape = strShape.Trim(
' ',
'(',
')');
212 string[] rgShapeStr = strShape.Split(
',');
214 for (
int i = 0; i < rgShapeStr.Count(); i++)
216 string strShape1 = rgShapeStr[i];
217 if (!
string.IsNullOrEmpty(strShape1))
219 int nShape =
int.Parse(strShape1);
221 if (i == 0 && nShape > nMax)
224 rgShape1.Add(nShape);
227 nNum = rgShape1[rgShape1.Count - 1];
229 nCount *= rgShape1[rgShape1.Count - 1];
234 rgShape = rgShape1.ToArray();
235 bFortranOrder =
false;
237 string[] rgstr = str.Split(
',');
238 foreach (
string str1
in rgstr)
240 string[] rgstrKeyVal = str1.Split(
':');
241 if (rgstrKeyVal.Length != 2)
242 throw new Exception(
"Invalid header key value, '" + str1 +
"'!");
244 string strKey = rgstrKeyVal[0].Trim(
'\'',
' ');
245 string strVal = rgstrKeyVal[1].Trim(
'\'',
' ');
251 dataType = typeof(
float);
252 else if (strVal ==
"<f8")
253 dataType = typeof(
double);
254 else if (strVal ==
"<i4")
255 dataType = typeof(
int);
256 else if (strVal ==
"<i8")
257 dataType = typeof(
long);
258 else if (strVal ==
"|b1")
259 dataType = typeof(
bool);
260 else if (strVal.StartsWith(
"<U"))
262 strVal = strVal.Substring(2);
263 nDataTypeSize =
int.Parse(strVal);
264 dataType = typeof(
string);
267 throw new Exception(
"Unsupported data type '" + strVal +
"', currenly only support '<f4'");
270 case "fortran_order":
271 bFortranOrder =
bool.Parse(strVal);
276 nDataTypeSize = Marshal.SizeOf(dataType);
278 return new Tuple<int, int>(nNum, nCount);
290 public T[]
LoadRow(T[] rgVal,
int nRowIdx,
int nStartIdx = 0,
int nColumnCount =
int.MaxValue)
293 throw new Exception(
"The file is not open!");
295 if (nRowIdx >= m_nRows)
296 throw new Exception(
"The row index '" + nRowIdx.ToString() +
"' is out of range!");
298 if (nStartIdx >= m_nColumns)
299 throw new Exception(
"The start index '" + nStartIdx.ToString() +
"' is out of range!");
301 if (nColumnCount ==
int.MaxValue)
302 nColumnCount = m_nColumns - nStartIdx;
303 else if (nStartIdx + nColumnCount > m_nColumns)
306 int nSize = nColumnCount * m_nFieldCount * m_nDataTypeSize;
310 long nOffset = m_nHeaderSize + (nRowIdx * m_nColumns + nStartIdx) * m_nFieldCount * m_nDataTypeSize;
311 m_fs.Seek(nOffset, SeekOrigin.Begin);
314 byte[] rgData = m_br.ReadBytes(nSize);
315 int nItemCount = nColumnCount * m_nFieldCount;
317 if (rgVal ==
null || rgVal.Length != nItemCount)
318 rgVal =
new T[nItemCount];
320 Buffer.BlockCopy(rgData, 0, rgVal, 0, rgData.Length);
332 public List<T[]>
Load(
int nStartIdx = 0,
int nCount =
int.MaxValue)
335 throw new Exception(
"The file is not open!");
337 if (nStartIdx >= m_rgShape[0])
338 throw new Exception(
"The start index '" + nStartIdx.ToString() +
"' is out of range!");
340 if (m_dataType == typeof(
string))
341 throw new Exception(
"String data types not supported.");
343 if (nStartIdx + nCount > m_rgShape[0])
344 nCount = m_rgShape[1] - nStartIdx;
346 List<T[]> rgVal =
new List<T[]>();
355 for (
int i = 1; i < m_rgShape.Length; i++)
357 nItems *= m_rgShape[i];
360 long lSeekPos = m_nHeaderSize + nStartIdx * nItems * m_nDataTypeSize;
361 m_fs.Seek(lSeekPos, SeekOrigin.Begin);
364 for (
int i = nStartIdx; i < nStartIdx + nCount; i++)
366 T[] rgItemT =
new T[m_count.Item2 * m_nDataTypeSize];
367 byte[] rgItem = m_br.ReadBytes(m_count.Item2 * m_nDataTypeSize);
368 Buffer.BlockCopy(rgItem, 0, rgItemT, 0, rgItem.Length);
372 if (m_sw.Elapsed.TotalMilliseconds > 1000)
374 double dfPct = (double)i / (nCount - nStartIdx);
375 string strOut =
"Loading '" + m_strFile +
"' at " + dfPct.ToString(
"P5") +
"...";
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
The NumpyFile reads data from a numpy file in the base type specified.
NumpyFile(Log log)
The constructor.
List< T[]> Load(int nStartIdx=0, int nCount=int.MaxValue)
Load the data from the numpy file, optionally specifying the starting row index and number of rows to...
T[] LoadRow(T[] rgVal, int nRowIdx, int nStartIdx=0, int nColumnCount=int.MaxValue)
Load a single row (or portion of a row) from the numpy file.
int Columns
Returns the number of items per row.
void OpenRead(string strFile)
Open the numpy file for reading, and read in the header information.
int Rows
Returns the number of rows.
void Dispose()
Dispose all resources and close the file.
int[] Shape
Return the data shape of the data in the Numpy file.
int TotalCount
Returns the total number of items * fields in the data.
int Fields
Returns the number of fields per column item.
void Close()
Close the file if open.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
DataType
Specifies the base datatype corresponding the the template type 'T'. Currently, only
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...