6using System.Collections.Generic;
11using System.Threading.Tasks;
32 List<double> m_rgRhoHistory =
new List<double>();
58 public LBFGSSolver(
CudaDnn<T> cuda,
Log log,
SolverParameter p,
CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest,
IXImageDatabaseBase imgDb,
IXPersist<T> persist,
int nSolverCount = 1,
int nSolverRank = 0,
Net<T> shareNet =
null, onGetWorkspace getws =
null, onSetWorkspace setws =
null)
59 : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws)
61 m_tZero = (T)Convert.ChangeType(0, typeof(T));
62 m_tOne = (T)Convert.ChangeType(1, typeof(T));
63 m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T));
72 if (m_blobGradients !=
null)
75 m_blobGradients =
null;
78 if (m_blobGradientsPrev !=
null)
81 m_blobGradientsPrev =
null;
84 if (m_blobDirection !=
null)
87 m_blobDirection =
null;
90 if (m_colBlobHistoryY !=
null)
93 m_colBlobHistoryY =
null;
96 if (m_colBlobHistoryS !=
null)
99 m_colBlobHistoryS =
null;
116 for (
int i = 0; i < net_params.
Count; i++)
118 if (
m_net.params_lr[i] != 0)
119 m_nN += net_params[i].count();
126 List<int> rgShape =
new List<int>() { m_nN };
127 m_colBlobHistoryS.
Clear(
true);
128 m_colBlobHistoryY.
Clear(
true);
129 m_rgRhoHistory.Clear();
134 m_blobGradients.
Name =
"gradients";
136 m_blobGradientsPrev.Name =
"gradients prev";
138 m_blobDirection.
Name =
"direction";
144 m_rgRhoHistory.Add(0);
147 catch (Exception excpt)
149 m_colBlobHistoryS.
Clear(
true);
150 m_colBlobHistoryY.
Clear(
true);
151 m_rgRhoHistory.Clear();
153 if (m_blobGradients !=
null)
156 m_blobGradients =
null;
159 if (m_blobGradientsPrev !=
null)
162 m_blobGradientsPrev =
null;
165 if (m_blobDirection !=
null)
168 m_blobDirection =
null;
187 for (
int i = 0; i <
m_net.learnable_parameters.Count; i++)
189 m_net.learnable_parameters[i].SetDiff(0);
219 m_cuda.copy(m_nN, m_blobGradients.
gpu_data, m_blobGradientsPrev.mutable_gpu_data);
222 for (
int i = 0; i < net_params.
Count; i++)
224 if (
m_net.params_lr[i] != 0)
226 m_cuda.copy(net_params[i].count(), net_params[i].gpu_diff, m_blobGradients.
mutable_gpu_data, 0, nDstOffset);
227 nDstOffset += net_params[i].count();
241 m_cuda.axpby(m_nN, m_tOne, m_blobGradients.
gpu_data, m_tMinusOne, m_blobGradientsPrev.mutable_gpu_data);
242 T fYs =
m_cuda.dot(m_nN, m_blobDirection.
gpu_data, m_blobGradientsPrev.gpu_data);
243 double dfYs =
Utility.ConvertVal<T>(fYs);
272 m_cuda.copy(m_nN, m_blobDirection.
gpu_data, m_colBlobHistoryS[m_nEnd].mutable_gpu_data);
273 m_cuda.copy(m_nN, m_blobGradientsPrev.gpu_data, m_colBlobHistoryY[m_nEnd].mutable_gpu_data);
274 m_rgRhoHistory[m_nEnd] = 1.0 / dfYs;
285 T fh0 =
m_cuda.dot(m_nN, m_colBlobHistoryY[m_nEnd].gpu_data, m_colBlobHistoryY[m_nEnd].gpu_data);
286 double dfH0 =
Utility.ConvertVal<T>(fh0);
288 m_dfH0 = 1.0 / m_rgRhoHistory[m_nEnd] / dfH0;
291 private List<int> lbfgs_history_indices(
int nStart,
int nEnd,
int nMax)
293 List<int> rgIndices =
Utility.
Create<
int>((nStart == 0) ? nEnd + 1 : nMax, 0);
297 for (
int i = nStart; i <= nEnd; i++)
306 for (
int i = nStart; i < rgIndices.Count; i++)
311 for (
int i = 0; i <= nEnd; i++)
331 List<double> rgAlpha =
Utility.
Create<
double>(rgIndices.Count, 0);
334 for (
int i = rgIndices.Count - 1; i >= 0; i--)
336 int nIdx = rgIndices[i];
338 T fAlpha =
m_cuda.dot(m_nN, m_colBlobHistoryS[nIdx].gpu_data, m_blobDirection.
gpu_data);
339 rgAlpha[nIdx] = (double)
Utility.ConvertVal<T>(fAlpha);
340 rgAlpha[nIdx] *= m_rgRhoHistory[nIdx];
347 for (
int i = 0; i < rgIndices.Count; i++)
349 int nIdx = rgIndices[i];
351 T fBeta =
m_cuda.dot(m_nN, m_colBlobHistoryY[nIdx].gpu_data, m_blobDirection.
gpu_data);
352 dfBeta = (double)
Utility.ConvertVal<T>(fBeta);
353 dfBeta *= m_rgRhoHistory[nIdx];
355 m_cuda.axpy(m_nN, rgAlpha[nIdx] - dfBeta, m_colBlobHistoryS[nIdx].gpu_data, m_blobDirection.
mutable_gpu_data);
377 for (
int i = 0; i < net_params.
Count; i++)
379 int nCount = net_params[i].count();
381 if (
m_net.params_lr[i] != 0)
387 T fLr = (T)Convert.ChangeType(
m_net.params_lr[i], typeof(T));
388 m_cuda.scale(nCount, fLr, m_blobDirection.
gpu_data, net_params[i].mutable_gpu_diff, nOffset, 0);
412 m_nStart = state.
start;
417 for (
int i = 0; i < rgIndices.Count; i++)
419 int nIdx = rgIndices[i];
421 m_colBlobHistoryS[i].FromProto(state.
history[nIdx]);
422 m_colBlobHistoryY[i].FromProto(state.
s_history[nIdx]);
440 state.
start = m_nStart;
445 for (
int i = 0; i < rgIndices.Count; i++)
447 int nIdx = rgIndices[i];
449 state.
s_history.Add(m_colBlobHistoryS[nIdx].ToProto());
450 state.
history.Add(m_colBlobHistoryY[nIdx].ToProto());
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
The BlobCollection contains a list of Blobs.
void Dispose()
Release all resource used by the collection and its Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetDiff(double df)
Set all blob diff to the value specified.
int Count
Returns the number of items in the collection.
void Clear(bool bDispose=false)
Remove all items from the collection.
The Blob is the main holder of data that moves through the Layers of the Net.
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
void FromProto(BlobProto bp, bool bReshape=true)
Create a new Blob from a given BlobProto.
BlobProto ToProto(bool bWriteDiff=false)
Writes the Blob to a new BlobProto.
string Name
Get/set the name of the Blob.
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
The SolverParameter is a parameter for the solver, specifying the train and test networks.
int lbgfs_corrections
Specifies the number of lbgfs corrections used with the L-BGFS solver.
double base_lr
The base learning rate (default = 0.01).
SolverType type
Specifies the solver type.
The SolverState specifies the state of a given solver.
int end
Specifies the end used by L-BGFS
BlobProto gradients
Gradients used with L-BFGS state.
List< double > rho_history
rho history used with L-BFGS state.
int iter
The current iteration.
List< BlobProto > history
The history for SGD solvers.
int start
Specifies the start used by L-BGFS
int current_step
The current step for learning rate.
List< BlobProto > s_history
S history used with L-BFGS state.
BlobProto direction
Direction used with L-BFGS state.
Optimizes the parameters of a Net using L-BFGS. This implementation is based on minFunc,...
virtual void CollectGradients()
Collect the gradients from the network learnable parameters.
override void dispose()
Releases all resources (GPU and Host) used by the Solver.
virtual void UpdateNet()
Update the network.
virtual void ComputeStep()
Compute the step.
override double ApplyUpdate(int nIterationOverride=-1)
Apply the gradients to the network.
void PreSolve()
Runs the pre-solve which parpares the Solver to start Solving.
virtual void UpdateHistory()
Update the history values with the gradients and direction.
virtual void ComputeInitialHessianApprox()
Compute the initial Hessian approximation.
LBFGSSolver(CudaDnn< T > cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist< T > persist, int nSolverCount=1, int nSolverRank=0, Net< T > shareNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
The LBFGSSolver constructor.
virtual void ComputeDirection()
Compute the direction.
override byte[] SnapshotSolverState()
Save the solver state.
override void RestoreSolverState(byte[] rgState)
Restore a previously saved solver state.
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
SolverParameter m_param
Specifies the SolverParameter that defines how the Solver operates.
CudaDnn< T > m_cuda
Specifies the instance of CudaDnn used by the Solver that provides a connection to Cuda.
bool? is_root_solver
Returns whether or not this is the root solver.
int m_nIter
Specifies the current iteration.
IXPersist< T > m_persist
Specifies the persistance object used to save weight and solver states.
Net< T > m_net
Specifies the training Net.
int m_nCurrentStep
Specifies the current step.
Log m_log
Specifies the Log for output.
The IXImageDatabaseBase interface defines the general interface to the in-memory image database.
The IXPersist interface is used by the CaffeControl to load and save weights.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe.db.image namespace contains all image database related classes.
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...