2using System.Collections.Generic;
25 T[] m_rgCpuData =
null;
26 bool m_bOwnData =
true;
27 bool m_bHalfSize =
false;
40 if (bUseHalfSize && typeof(T) != typeof(
float))
45 log.
WriteLine(
"Half size disabled for non 'float' basetypes!");
48 m_bHalfSize = bUseHalfSize;
57 m_lCapacity = lCapacity;
92 public void Allocate(
long lCount,
bool bUseHalfSize =
false)
94 if (bUseHalfSize && typeof(T) != typeof(
float))
99 m_log.WriteLine(
"Half size disabled for non 'float' basetypes!");
104 m_bHalfSize = bUseHalfSize;
105 m_hGpuData = m_cuda.
AllocMemory(lCount, m_bHalfSize);
106 m_lCapacity = lCount;
117 public void Allocate(T[] rg,
bool bUseHalfSize =
false)
119 if (bUseHalfSize && typeof(T) != typeof(
float))
121 bUseHalfSize =
false;
124 m_log.WriteLine(
"Half size disabled for non 'float' basetypes!");
129 m_bHalfSize = bUseHalfSize;
130 m_hGpuData = m_cuda.
AllocMemory(rg, 0, m_bHalfSize);
131 m_lCapacity = rg.Length;
132 m_lCount = rg.Length;
147 m_cuda.
set((
int)m_lCount, m_hGpuData, 0.0);
159 m_cuda.
set((
int)m_lCapacity, m_hGpuData, 0.0);
167 public void Set(
double dfVal)
172 m_cuda.
set((
int)m_lCount, m_hGpuData, dfVal);
184 m_cuda.
set((
int)m_lCount, m_hGpuData, fVal, nIdx);
195 T[] rg = m_cuda.
get((
int)m_lCount, m_hGpuData, nIdx);
210 return hDstHostBuffer;
213 if (m_lCapacity < src.m_lCount)
216 m_lCount = src.m_lCount;
223 m_cuda.
copy((
int)m_lCount, src.m_hGpuData, m_hGpuData);
227 if (hDstHostBuffer == 0)
234 if (lCount < m_lCount)
241 src.m_cuda.KernelCopy((
int)m_lCount, src.m_hGpuData, 0, m_cuda.
KernelHandle, m_hGpuData, 0, hDstHostBuffer, m_cuda.
KernelHandle);
245 return hDstHostBuffer;
267 get {
return m_bHalfSize; }
275 get {
return m_tag; }
276 set { m_tag = value; }
284 get {
return m_nDeviceID; }
292 get {
return m_lCapacity; }
300 get {
return m_lCount; }
301 set { m_lCount= value; }
309 get {
return m_hGpuData; }
323 m_lCapacity = lCount;
350 get {
return m_rgCpuData; }
359 public void SetData(T[] rgData,
int nCount,
bool bSetCount =
true)
362 nCount = rgData.Length;
364 if (nCount > m_lCapacity || m_hGpuData == 0)
370 m_cuda.
SetMemory(m_hGpuData, rgData, 0, nCount);
391 if (value.Length > m_lCapacity || m_hGpuData == 0)
398 m_lCount = value.Length;
413 if (lCount > m_lCapacity)
414 throw new ArgumentOutOfRangeException();
422 m_rgCpuData =
new List<T>().ToArray();
424 m_rgCpuData = m_cuda.
GetMemory(m_hGpuData, m_lCount);
452 m_lCapacity = rg.Length;
456 m_cuda.
SetMemory(m_hGpuData, rg, hStream);
459 m_lCount = rg.Length;
463 private void check_device()
469 m_log.CHECK_EQ(nDeviceId, m_nDeviceID,
"The current device DOES'NT match the device for which the memory was allocated!");
The Log class provides general output in text form.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
T[] GetMemory(long hMem, long lCount=-1)
Retrieves the GPU memory as an array of type 'T'
int GetDeviceID()
Returns the current device id set within Cuda.
void copy(int nCount, long hSrc, long hDst, int nSrcOffset=0, int nDstOffset=0, long hStream=-1, bool? bSrcHalfSizeOverride=null, bool? bDstHalfSizeOverride=null)
Copy data from one block of GPU memory to another.
void FreeMemory(long hMem)
Free previously allocated GPU memory.
long CreateMemoryPointer(long hData, long lOffset, long lCount)
Creates a memory pointer into an already existing block of GPU memory.
void SetMemory(long hMem, List< double > rg)
Copies a list of doubles into a block of already allocated GPU memory.
long AllocMemory(List< double > rg)
Allocate a block of GPU memory and copy a list of doubles to it.
long AllocHostBuffer(long lCapacity)
Allocate a block of host memory with a specified capacity.
long GetHostBufferCapacity(long hMem)
Returns the host memory capacity.
void FreeHostBuffer(long hMem)
Free previously allocated host memory.
void set(int nCount, long hHandle, double fVal, int nIdx=-1)
Set the values of GPU memory to a specified value of type
long KernelHandle
Returns the Low-Level kernel handle used for this instance. Each Low-Level kernel maintains its own s...
void FreeMemoryPointer(long hData)
Frees a memory pointer.
T[] get(int nCount, long hHandle, int nIdx=-1)
Queries the GPU memory by copying it into an array of type 'T'.
The SyncedMemory manages the low-level connection between the GPU and host memory.
bool HalfSize
Returns whether or not the sync memory is half-sized memory.
void Allocate(T[] rg, bool bUseHalfSize=false)
Allocate a number of items and copy the given array into the memory on the GPU.
void Zero()
Set all items in the GPU memory up to the Count, to zero.
void Set(double dfVal)
Set all items up to Count to a given value.
void async_gpu_push(long hStream, T[] rg)
Pushes the host data, previously set with set_cpu_data_locally(), to the GPU.
long gpu_data
Returns the handle to the GPU memory.
object Tag
Get/set data associated with the synced memory.
void set_gpu_data(long hData, long lCount, long lOffset)
Copies a new Memory Pointer within the low-level CudaDnnDLL where a Memory Pointer uses another alrea...
T[] mutable_cpu_data
Get/set the mutable host data.
long Copy(SyncedMemory< T > src, long hDstHostBuffer=0)
Copy another SyncedMemory into this one.
void Dispose()
Releases all GPU and host resources used.
SyncedMemory(CudaDnn< T > cuda, Log log, long lCapacity=0, object tag=null, bool bUseHalfSize=false)
The SyncedMemory constructor.
void SetData(T[] rgData, int nCount, bool bSetCount=true)
Sets the array of host data on the GPU and re-allocates the GPU memory if needed.
void Allocate(long lCount, bool bUseHalfSize=false)
Allocate a number of items in GPU memory and save the handle.
long Capacity
Returns the total amount of GPU memory held by this SyncedMemory.
SyncedMemory< T > Clone()
Copy this SyncedMemory.
T GetAt(int nIdx)
Return a value at a given index.
long mutable_gpu_data
Returns the mutable handle to GPU data.
T[] update_cpu_data(long lCount=-1)
Updates the host data by copying the GPU data to the host data.
T[] cpu_data
Returns the data on the CPU that has already been transferred from GPU to CPU.
void SetAt(int nIdx, T fVal)
Set a specific item at a given index to a value.
void ZeroAll()
Set all items in the GPU memory up to the Capacity, to zero.
void set_cpu_data_locally(T[] rg)
This does not place the data on the GPU - call async_gpu_push() to move it to the GPU.
int DeviceID
Returns the Device ID on which the GPU memory of this SyncedMemory was allocated.
long Count
Returns the current count of items in this SyncedMemory. Note, the Count may be less than the Capacit...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...