2using System.Collections.Generic;
7using System.Threading.Tasks;
49 m_lCount = total_size(root_solver.
net.learnable_parameters);
85 for (
int i = 0; i < rgParam.
Count; i++)
87 nSize += (long)rgParam[i].
count();
153 m_cuda.SetDeviceID(nDeviceID);
225 for (
int i = 0; i < rgBlobs.
Count; i++)
227 int nCount = rgBlobs[i].count();
233 m_cuda.copy(nCount, rgBlobs[i].
data.gpu_data, hBuffer, 0, (
int)lOffset);
237 rgBlobs[i].data.set_gpu_data(hBuffer, nCount, lOffset);
240 case Op.replace_gpu_diff:
241 if (rgBlobs[i].DiffExists)
242 rgBlobs[i].diff.set_gpu_data(hBuffer, nCount, lOffset);
251 m_log.
CHECK_EQ(lTotalSize -
m_lExtra, (lOffset == 0) ? 1 : lOffset,
"The total memory doesn't match.");
270 ManualResetEvent m_evtGradientsReady =
new ManualResetEvent(
false);
271 List<ManualResetEvent> m_rgGradientReady =
new List<ManualResetEvent>();
283 : base(cuda, log, root_solver, nDeviceID)
285 m_rgGradientReady = rgGradientReadyEvents;
286 if (rgGradientReadyEvents !=
null && rgGradientReadyEvents.Count > 0)
287 m_evtGradientsReady = rgGradientReadyEvents[root_solver.
solver_rank];
289 m_solver = root_solver;
323 m_cuda.SynchronizeStream();
324 m_evtGradientsReady.Set();
326 while (!WaitHandle.WaitAll(m_rgGradientReady.ToArray(), 250))
338 m_evtGradientsReady.Reset();
351 public void Run(List<int> rgGpus,
int nIterationOverride = -1)
353 List<long> rghNccl =
new List<long>();
354 Guid guid = Guid.NewGuid();
356 m_rgGradientReady =
new List<ManualResetEvent>();
358 for (
int i = 0; i < rgGpus.Count; i++)
362 m_rgGradientReady.Add(
new ManualResetEvent(
false));
365 m_cuda.NcclInitializeSingleProcess(rghNccl.ToArray());
366 m_hNccl = rghNccl[0];
367 m_evtGradientsReady = m_rgGradientReady[0];
369 List<WaitHandle> rgWaitAllInit =
new List<WaitHandle>();
370 List<Worker<T>> rgWorkers =
new List<common.Worker<T>>();
371 ManualResetEvent evtAllCreated =
new ManualResetEvent(
false);
373 for (
int i = 1; i < rghNccl.Count; i++)
380 List<WaitHandle> rgWait =
new List<WaitHandle>();
381 rgWait.AddRange(m_solver.CancelEvent.Handles);
385 int nWait = WaitHandle.WaitAny(rgWait.ToArray());
386 if (nWait < rgWait.Count - 2)
389 if (nWait == rgWait.Count - 2)
391 if (info.
Error !=
null)
394 throw new Exception(
"Error starting the solver.");
398 rgWorkers.Add(worker);
402 while (!WaitHandle.WaitAll(rgWaitAllInit.ToArray(), 250))
408 m_cuda.SynchronizeDevice();
414 m_solver.
Solve(nIterationOverride);
417 for (
int i = 0; i < rgWorkers.Count; i++)
419 rgWorkers[i].StopInternalThread();
437 this.
DoWork += Worker_DoWork;
450 Log log =
new Log(
"Worker solver for DeviceID = " + e.
DeviceID.ToString());
469 log.
CHECK_EQ((
int)solver.
type, (
int)rank0.
type,
"The solver types should be the same.");
480 m_cuda.SynchronizeDevice();
482 List<WaitHandle> rgWait =
new List<WaitHandle>();
486 int nWait = WaitHandle.WaitAny(rgWait.ToArray());
487 if (nWait < rgWait.Count - 1)
496 solver.
Step(nIterations);
499 catch (Exception excpt)
521 string m_strCudaPath;
526 int m_nIterationOverride;
527 ManualResetEvent m_evtInitialized =
new ManualResetEvent(
false);
528 ManualResetEvent m_evtStarted =
new ManualResetEvent(
false);
529 ManualResetEvent m_evtAllCreated =
new ManualResetEvent(
false);
530 AutoResetEvent m_evtError =
new AutoResetEvent(
false);
531 List<ManualResetEvent> m_rgGradientReadyEvents =
null;
532 Exception m_error =
null;
545 public SolverInfo(
Solver<T> rank0,
long hSrcKernel,
long hSrcNccl,
int nSolverRank,
int nIterationOverride,
string strCudaPath, List<ManualResetEvent> rgGradientReadyEvents, ManualResetEvent evtAllCreated)
547 m_strCudaPath = strCudaPath;
549 m_hSrcKernel = hSrcKernel;
550 m_hSrcNccl = hSrcNccl;
551 m_nSolverRank = nSolverRank;
552 m_nIterationOverride = nIterationOverride;
553 m_rgGradientReadyEvents = rgGradientReadyEvents;
554 m_evtAllCreated = evtAllCreated;
562 get {
return m_rank0; }
570 get {
return m_strCudaPath; }
578 get {
return m_nIterationOverride; }
586 get {
return m_hSrcKernel; }
594 get {
return m_hSrcNccl; }
602 get {
return m_nSolverRank; }
610 get {
return m_evtInitialized; }
618 get {
return m_evtStarted; }
626 get {
return m_evtAllCreated; }
634 get {
return m_rgGradientReadyEvents; }
642 get {
return m_error; }
643 set { m_error = value; }
651 get {
return m_evtError; }
WaitHandle[] Handles
Returns the internal wait handle of the CancelEvent.
bool WaitOne(int nMs=int.MaxValue)
Waits for the signal state to occur.
The Log class provides general output in text form.
bool Enable
Enables/disables the Log. When disabled, the Log does not output any data.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
The ActionStateArgs are sent to the DoWork event when fired from the InternalThreadEntry.
object Arg
Returns the user supplied argument.
int DeviceID
Returns the Device ID of the device to use in the thread.
The BlobCollection contains a list of Blobs.
int Count
Returns the number of items in the collection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The GPUParams contains the connection to the low-level Cuda, and the stream associated with this inst...
long m_hStream
The handle to the Cuda stream used for synchronization.
void Configure(Solver< T > solver)
Configure the GPU Params by copying the Solver training Net parameters into the data and diff buffers...
GPUParams(CudaDnn< T > cuda, Log log, Solver< T > root_solver, int nDeviceID)
The GPUParams constructor.
void apply_buffers(BlobCollection< T > rgBlobs, long hBuffer, long lTotalSize, Op op)
Transfer between the data/diff buffers and a collection of Blobs (e.g. the learnable parameters).
Log m_log
The Log used for output.
void SynchronizeStream()
Synchronize with the Cuda stream.
void Dispose()
Release all GPU and Host resources used.
CudaDnn< T > m_cuda
The instance of CudaDnn that provides the connection to Cuda.
The GradientsReadyArgs is sent to the Solver::OnGradientsReady event which fires at the end of each S...
The InternalThread manages an internal thread used for Parallel and data collection operations.
void StartInternalThread(CudaDnn< T > cuda, Log log, int nDeviceID=0, object arg=null, int nInitialDelay=0)
Starts running the internal thread function which then calls the DoWork event.
EventHandler< ActionStateArgs< T > > DoWork
The DoWork event is the working thread function.
The NCCL class manages the multi-GPU operations using the low-level NCCL functionality provided by th...
new void Dispose()
Release all GPU and Host resources used.
NCCL(CudaDnn< T > cuda, Log log, Solver< T > root_solver, int nDeviceID, long hNccl, List< ManualResetEvent > rgGradientReadyEvents)
The NCCL constructor.
void Run(List< int > rgGpus, int nIterationOverride=-1)
Run the root Solver and coordinate with all other Solver's participating in the multi-GPU training.
void Broadcast()
Broadcast the data to all other solvers participating in the multi-GPU session.
The Params contains the base parameters used in multi-GPU training.
long data
Returns the handle to the GPU memory containing the Net parameters.
Params(Solver< T > root_solver)
The Param constructor.
long m_lExtra
size of the padding added to the memory buffers.
long count
Returns the size of the buffers (in items).
long m_lCount
size of the buffers (in items).
long diff
Returns the handle to the GPU memory containing the Net gradients.
long m_hDiff
Handle to GPU memory containing the Net gradient.
long m_hData
Handle to GPU memory containing the Net parameters.
int m_nDeviceID
The Device ID.
The SolverInfo defines the user supplied arguments passed to each Worker.
int IterationOverride
Returns the training iteration override to use.
AutoResetEvent ErrorEvent
Returns the event that is set when an error occurs.
Exception Error
Returns the error (if any) that occured when running the solver thread.
SolverInfo(Solver< T > rank0, long hSrcKernel, long hSrcNccl, int nSolverRank, int nIterationOverride, string strCudaPath, List< ManualResetEvent > rgGradientReadyEvents, ManualResetEvent evtAllCreated)
The SolverInfo constructor.
string CudaPath
Returns the file path to the low-level CudaDnnDll.DLL file to use. Note, when null or emtpy,...
ManualResetEvent StartedEvent
Returns the event that is set after the Worker has started running.
long KernelHandle
Returns a handle to the kernel where the NCCL for this Solver was created (typically this is the kern...
Solver< T > Rank0
Returns rank Solver that will run in the Worker.
List< ManualResetEvent > GradientReadyEvents
Returns the event that is set after the gradients of the Solver in this Worker are ready.
long NcclHandle
Returns the handle to the NCCL instance for this Solver (typically this is created on the kernel that...
ManualResetEvent AllCreatedEvent
Returns the event that is set after all Workers have been created.
ManualResetEvent InitializedEvent
Returns the event that is set after the Worker has completed initializing.
int SolverRank
Returns the rank of this Solver.
The Worker manages each 'non' root sover running, where each Worker operates on a different GPU.
Worker()
The Worker constructor.
The SolverParameter is a parameter for the solver, specifying the train and test networks.
int max_iter
The maximum number of iterations.
SolverParameter Clone()
Creates a new copy of the SolverParameter.
int device_id
The device id that will be used when run on the GPU.
SolverType type
Specifies the solver type.
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
void Dispose()
Discards the resources (GPU and Host) used by this Solver.
static SGDSolver< T > Create(CudaDnn< T > cuda, Log log, ProjectEx p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXDatabaseBase db, IXPersist< T > persist, int nSolverCount=1, int nSolverRank=0, Net< T > shareNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
Create a new Solver based on the project containing the SolverParameter.
int iter
Returns the current training iteration.
SolverParameter.SolverType type
Returns the type of solver.
Net< T > net
Returns the main training Net.
int solver_count
Returns the solver count in a multi-GPU session.
CancelEvent CancelEvent
Returns the cancel event which when set cancels the current operation run by the Solver.
SolverParameter parameter
Returns the SolverParameter used.
bool Step(int nIters, TRAIN_STEP step=TRAIN_STEP.NONE, bool bZeroDiffs=true, bool bApplyUpdates=true, bool bDisableOutput=false, bool bDisableProgress=false, double? dfLossOverride=null, bool? bAllowSnapshot=null)
Steps a set of iterations through a training cycle.
EventHandler< GradientsReadyArgs > OnGradientsReady
The OnGradientsReady event fires after the gradients of a Solver are ready for distribution to other ...
int solver_rank
Returns this Solver's rank in a multi-GPU session.
IXDatabaseBase Database
Returns the in-memory MyCaffeDatabase used.
virtual void Solve(int nIterationOverride=-1, byte[] rgWeights=null, byte[] rgState=null, TRAIN_STEP step=TRAIN_STEP.NONE)
The main entry of the solver function. In default, iter will be zero. Pass in a non-zero iter number ...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
DEVINIT
Specifies the initialization flags used when initializing CUDA.
NCCL_REDUCTION_OP
Specifies the reduction operation to use with 'Nickel' NCCL.
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...