mycaffe/html/_l_b_f_g_s_solver_8cs_source.html

using MyCaffe.basecode;

using MyCaffe.common;

using MyCaffe.db.image;

using MyCaffe.param;

using System;

using System.Collections.Generic;

using System.Diagnostics;

using System.Linq;

using System.Text;

using System.Threading;

using System.Threading.Tasks;


namespace MyCaffe.solvers

{

    public class LBFGSSolver<T> : Solver<T>

    {

        Blob<T> m_blobGradientsPrev;

        Blob<T> m_blobGradients;

        Blob<T> m_blobDirection;

        BlobCollection<T> m_colBlobHistoryS = new BlobCollection<T>();

        BlobCollection<T> m_colBlobHistoryY = new BlobCollection<T>();

        List<double> m_rgRhoHistory = new List<double>();

        int m_nStart;

        int m_nEnd;

        int m_nN;

        double m_dfH0;

        double m_dfStep;

        T m_tZero;

        T m_tOne;

        T m_tMinusOne;


        public LBFGSSolver(CudaDnn<T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist<T> persist, int nSolverCount = 1, int nSolverRank = 0, Net<T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)

            : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws)

        {

            m_tZero = (T)Convert.ChangeType(0, typeof(T));

            m_tOne = (T)Convert.ChangeType(1, typeof(T));

            m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T));

            PreSolve();

        }


        protected override void dispose()

        {

            if (m_blobGradients != null)

            {

                m_blobGradients.Dispose();

                m_blobGradients = null;

            }


            if (m_blobGradientsPrev != null)

            {

                m_blobGradientsPrev.Dispose();

                m_blobGradientsPrev = null;

            }


            if (m_blobDirection != null)

            {

                m_blobDirection.Dispose();

                m_blobDirection = null;

            }


            if (m_colBlobHistoryY != null)

            {

                m_colBlobHistoryY.Dispose();

                m_colBlobHistoryY = null;

            }


            if (m_colBlobHistoryS != null)

            {

                m_colBlobHistoryS.Dispose();

                m_colBlobHistoryS = null;

            }


            base.dispose();

        }


        public void PreSolve()

        {

            try

            {

                BlobCollection<T> net_params = m_net.learnable_parameters;


                m_nN = 0;


                for (int i = 0; i < net_params.Count; i++)

                {

                    if (m_net.params_lr[i] != 0)

                        m_nN += net_params[i].count();

                }


                // Nothing to do, all learnable parameters have lr_mult = 0

                if (m_nN == 0)

                    return;


                List<int> rgShape = new List<int>() { m_nN };

                m_colBlobHistoryS.Clear(true);

                m_colBlobHistoryY.Clear(true);

                m_rgRhoHistory.Clear();

                m_nStart = 0;

                m_nEnd = -1;


                m_blobGradients = new Blob<T>(m_cuda, m_log, rgShape, false);

                m_blobGradients.Name = "gradients";

                m_blobGradientsPrev = new Blob<T>(m_cuda, m_log, rgShape, false);

                m_blobGradientsPrev.Name = "gradients prev";

                m_blobDirection = new Blob<T>(m_cuda, m_log, rgShape, false);

                m_blobDirection.Name = "direction";


                for (int i = 0; i < m_param.lbgfs_corrections; i++)

                {

                    m_colBlobHistoryS.Add(new Blob<T>(m_cuda, m_log, rgShape, false));

                    m_colBlobHistoryY.Add(new Blob<T>(m_cuda, m_log, rgShape, false));

                    m_rgRhoHistory.Add(0);

                }

            }

            catch (Exception excpt)

            {

                m_colBlobHistoryS.Clear(true);

                m_colBlobHistoryY.Clear(true);

                m_rgRhoHistory.Clear();


                if (m_blobGradients != null)

                {

                    m_blobGradients.Dispose();

                    m_blobGradients = null;

                }


                if (m_blobGradientsPrev != null)

                {

                    m_blobGradientsPrev.Dispose();

                    m_blobGradientsPrev = null;

                }


                if (m_blobDirection != null)

                {

                    m_blobDirection.Dispose();

                    m_blobDirection = null;

                }


                throw excpt;

            }

            finally

            {

            }

        }


        public override double ApplyUpdate(int nIterationOverride = -1)

        {

            if (m_nN == 0)

            {

                for (int i = 0; i < m_net.learnable_parameters.Count; i++)

                {

                    m_net.learnable_parameters[i].SetDiff(0);

                }


                return 0;

            }


            m_log.CHECK(is_root_solver, "You can only apply the LBFGS Solver updates on the root solver.");


            CollectGradients();

            UpdateHistory();

            ComputeInitialHessianApprox();

            ComputeDirection();

            ComputeStep();

            UpdateNet();


            // Increment the internal iter_ counter -- its value should always indicate

            // the number of times the weights have been updated.

            m_nIter++;


            return 0;

        }


        public virtual void CollectGradients()

        {

            BlobCollection<T> net_params = m_net.learnable_parameters;


            if (m_nIter != 0)

                m_cuda.copy(m_nN, m_blobGradients.gpu_data, m_blobGradientsPrev.mutable_gpu_data);


            int nDstOffset = 0;

            for (int i = 0; i < net_params.Count; i++)

            {

                if (m_net.params_lr[i] != 0)

                {

                    m_cuda.copy(net_params[i].count(), net_params[i].gpu_diff, m_blobGradients.mutable_gpu_data, 0, nDstOffset);

                    nDstOffset += net_params[i].count();

                }

            }

        }


        public virtual void UpdateHistory()

        {

            if (m_nIter == 0)

                return;


            m_cuda.scal(m_nN, m_tMinusOne, m_blobDirection.mutable_gpu_data); // s

            m_cuda.axpby(m_nN, m_tOne, m_blobGradients.gpu_data, m_tMinusOne, m_blobGradientsPrev.mutable_gpu_data); // y

            T fYs = m_cuda.dot(m_nN, m_blobDirection.gpu_data, m_blobGradientsPrev.gpu_data);

            double dfYs = Utility.ConvertVal<T>(fYs);


            if (dfYs < 1e-10)

            {

                m_log.WriteLine("WARNING: Skipping L-BFGS update.");

                if (m_nEnd < 0)

                    m_nEnd = 0;


                return;

            }


            m_nEnd += 1;


            if (m_nEnd < m_param.lbgfs_corrections)

            {

                if (m_nStart != 0)

                {

                    m_nStart += 1;


                    if (m_nStart == m_param.lbgfs_corrections)

                        m_nStart = 0;

                }

            }

            else

            {

                m_nStart = 1;

                m_nEnd = 0;

            }


            m_cuda.copy(m_nN, m_blobDirection.gpu_data, m_colBlobHistoryS[m_nEnd].mutable_gpu_data);

            m_cuda.copy(m_nN, m_blobGradientsPrev.gpu_data, m_colBlobHistoryY[m_nEnd].mutable_gpu_data);

            m_rgRhoHistory[m_nEnd] = 1.0 / dfYs;

        }


        public virtual void ComputeInitialHessianApprox()

        {

            if (m_nIter == 0)

                return;


            T fh0 = m_cuda.dot(m_nN, m_colBlobHistoryY[m_nEnd].gpu_data, m_colBlobHistoryY[m_nEnd].gpu_data);

            double dfH0 = Utility.ConvertVal<T>(fh0);


            m_dfH0 = 1.0 / m_rgRhoHistory[m_nEnd] / dfH0;

        }


        private List<int> lbfgs_history_indices(int nStart, int nEnd, int nMax)

        {

            List<int> rgIndices = Utility.Create<int>((nStart == 0) ? nEnd + 1 : nMax, 0);


            if (nStart == 0)

            {

                for (int i = nStart; i <= nEnd; i++)

                {

                    rgIndices[i] = i;

                }

            }

            else

            {

                int j = 0;


                for (int i = nStart; i < rgIndices.Count; i++)

                {

                    rgIndices[j++] = i;

                }


                for (int i = 0; i <= nEnd; i++)

                {

                    rgIndices[j++] = i;

                }

            }


            return rgIndices;

        }


        public virtual void ComputeDirection()

        {

            m_cuda.copy(m_nN, m_blobGradients.gpu_data, m_blobDirection.mutable_gpu_data);


            if (m_nIter == 0)

                return;


            List<int> rgIndices = lbfgs_history_indices(m_nStart, m_nEnd, m_param.lbgfs_corrections);

            List<double> rgAlpha = Utility.Create<double>(rgIndices.Count, 0);

            double dfBeta = 0;


            for (int i = rgIndices.Count - 1; i >= 0; i--)

            {

                int nIdx = rgIndices[i];


                T fAlpha = m_cuda.dot(m_nN, m_colBlobHistoryS[nIdx].gpu_data, m_blobDirection.gpu_data);

                rgAlpha[nIdx] = (double)Utility.ConvertVal<T>(fAlpha);

                rgAlpha[nIdx] *= m_rgRhoHistory[nIdx];


                m_cuda.axpy(m_nN, -rgAlpha[nIdx], m_colBlobHistoryY[nIdx].gpu_data, m_blobDirection.mutable_gpu_data);

            }


            m_cuda.scal(m_nN, m_dfH0, m_blobDirection.mutable_gpu_data);


            for (int i = 0; i < rgIndices.Count; i++)

            {

                int nIdx = rgIndices[i];


                T fBeta = m_cuda.dot(m_nN, m_colBlobHistoryY[nIdx].gpu_data, m_blobDirection.gpu_data);

                dfBeta = (double)Utility.ConvertVal<T>(fBeta);

                dfBeta *= m_rgRhoHistory[nIdx];


                m_cuda.axpy(m_nN, rgAlpha[nIdx] - dfBeta, m_colBlobHistoryS[nIdx].gpu_data, m_blobDirection.mutable_gpu_data);

            }

        }


        public virtual void ComputeStep()

        {

            m_dfStep = 1.0;

        }


        public virtual void UpdateNet()

        {

            m_cuda.scal(m_nN, m_dfStep, m_blobDirection.mutable_gpu_data);


            BlobCollection<T> net_params = m_net.learnable_parameters;


            int nOffset = 0;

            for (int i = 0; i < net_params.Count; i++)

            {

                int nCount = net_params[i].count();


                if (m_net.params_lr[i] != 0)

                {

                    double dfLr = m_net.params_lr[i].GetValueOrDefault(1.0) * m_param.base_lr;


                    if (dfLr != 1.0)

                    {

                        T fLr = (T)Convert.ChangeType(m_net.params_lr[i], typeof(T));

                        m_cuda.scale(nCount, fLr, m_blobDirection.gpu_data, net_params[i].mutable_gpu_diff, nOffset, 0);

                    }


                    nOffset += nCount;

                }

                else

                {

                    net_params[i].SetDiff(0);

                }

            }


            m_net.Update();

        }


        protected override void RestoreSolverState(byte[] rgState)

        {

            SolverState state = m_persist.LoadSolverState(rgState, m_param.type);


            m_nIter = state.iter;

            m_nCurrentStep = state.current_step;

            m_nStart = state.start;

            m_nEnd = state.end;


            List<int> rgIndices = lbfgs_history_indices(m_nStart, m_nEnd, m_param.lbgfs_corrections);


            for (int i = 0; i < rgIndices.Count; i++)

            {

                int nIdx = rgIndices[i];


                m_colBlobHistoryS[i].FromProto(state.history[nIdx]);

                m_colBlobHistoryY[i].FromProto(state.s_history[nIdx]);

                m_rgRhoHistory[i] = state.rho_history[i];

            }


            m_blobGradients.FromProto(state.gradients);

            m_blobDirection.FromProto(state.direction);

        }


        protected override byte[] SnapshotSolverState()

        {

            SolverState state = new SolverState();


            state.iter = m_nIter;

            state.current_step = m_nCurrentStep;

            state.start = m_nStart;

            state.end = m_nEnd;


            List<int> rgIndices = lbfgs_history_indices(m_nStart, m_nEnd, m_param.lbgfs_corrections);


            for (int i = 0; i < rgIndices.Count; i++)

            {

                int nIdx = rgIndices[i];


                state.s_history.Add(m_colBlobHistoryS[nIdx].ToProto());

                state.history.Add(m_colBlobHistoryY[nIdx].ToProto());

                state.rho_history.Add(m_rgRhoHistory[nIdx]);

            }


            state.gradients = m_blobGradients.ToProto();

            state.direction = m_blobDirection.ToProto();


            return m_persist.SaveSolverState(state, m_param.type);

        }

    }

}

MyCaffe.basecode.CancelEvent
The CancelEvent provides an extension to the manual cancel event that allows for overriding the manua...
Definition: CancelEvent.cs:17

MyCaffe.basecode.Log
The Log class provides general output in text form.
Definition: Log.cs:13

MyCaffe.basecode.Log.CHECK
void CHECK(bool b, string str)
Test a flag for true.
Definition: Log.cs:227

MyCaffe.basecode.Log.WriteLine
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80

MyCaffe.basecode.Utility
The Utility class provides general utility funtions.
Definition: Utility.cs:35

MyCaffe.basecode.Utility.Create
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
Definition: Utility.cs:721

MyCaffe.common.BlobCollection
The BlobCollection contains a list of Blobs.
Definition: BlobCollection.cs:16

MyCaffe.common.BlobCollection.Dispose
void Dispose()
Release all resource used by the collection and its Blobs.
Definition: BlobCollection.cs:542

MyCaffe.common.BlobCollection.Add
void Add(Blob< T > b)
Add a new Blob to the collection.
Definition: BlobCollection.cs:92

MyCaffe.common.BlobCollection.SetDiff
void SetDiff(double df)
Set all blob diff to the value specified.
Definition: BlobCollection.cs:311

MyCaffe.common.BlobCollection.Count
int Count
Returns the number of items in the collection.
Definition: BlobCollection.cs:30

MyCaffe.common.BlobCollection.Clear
void Clear(bool bDispose=false)
Remove all items from the collection.
Definition: BlobCollection.cs:135

MyCaffe.common.Blob
The Blob is the main holder of data that moves through the Layers of the Net.
Definition: Blob.cs:25

MyCaffe.common.Blob.mutable_gpu_data
long mutable_gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1487

MyCaffe.common.Blob.FromProto
void FromProto(BlobProto bp, bool bReshape=true)
Create a new Blob from a given BlobProto.
Definition: Blob.cs:1589

MyCaffe.common.Blob.ToProto
BlobProto ToProto(bool bWriteDiff=false)
Writes the Blob to a new BlobProto.
Definition: Blob.cs:1663

MyCaffe.common.Blob.Name
string Name
Get/set the name of the Blob.
Definition: Blob.cs:2184

MyCaffe.common.Blob.Dispose
virtual void Dispose(bool bDisposing)
Releases all resources used by the Blob (including both GPU and Host).
Definition: Blob.cs:402

MyCaffe.common.Blob.gpu_data
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
Definition: Blob.cs:1479

MyCaffe.common.CudaDnn
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
Definition: CudaDnn.cs:969

MyCaffe.common.Net
Connects Layer's together into a direct acrylic graph (DAG) specified by a NetParameter
Definition: Net.cs:23

MyCaffe.param.SolverParameter
The SolverParameter is a parameter for the solver, specifying the train and test networks.
Definition: SolverParameter.cs:32

MyCaffe.param.SolverParameter.lbgfs_corrections
int lbgfs_corrections
Specifies the number of lbgfs corrections used with the L-BGFS solver.
Definition: SolverParameter.cs:901

MyCaffe.param.SolverParameter.base_lr
double base_lr
The base learning rate (default = 0.01).
Definition: SolverParameter.cs:402

MyCaffe.param.SolverParameter.type
SolverType type
Specifies the solver type.
Definition: SolverParameter.cs:828

MyCaffe.param.SolverState
The SolverState specifies the state of a given solver.
Definition: SolverState.cs:17

MyCaffe.param.SolverState.end
int end
Specifies the end used by L-BGFS
Definition: SolverState.cs:58

MyCaffe.param.SolverState.gradients
BlobProto gradients
Gradients used with L-BFGS state.
Definition: SolverState.cs:85

MyCaffe.param.SolverState.rho_history
List< double > rho_history
rho history used with L-BFGS state.
Definition: SolverState.cs:112

MyCaffe.param.SolverState.iter
int iter
The current iteration.
Definition: SolverState.cs:40

MyCaffe.param.SolverState.history
List< BlobProto > history
The history for SGD solvers.
Definition: SolverState.cs:67

MyCaffe.param.SolverState.start
int start
Specifies the start used by L-BGFS
Definition: SolverState.cs:49

MyCaffe.param.SolverState.current_step
int current_step
The current step for learning rate.
Definition: SolverState.cs:76

MyCaffe.param.SolverState.s_history
List< BlobProto > s_history
S history used with L-BFGS state.
Definition: SolverState.cs:103

MyCaffe.param.SolverState.direction
BlobProto direction
Direction used with L-BFGS state.
Definition: SolverState.cs:94

MyCaffe.solvers.LBFGSSolver
Optimizes the parameters of a Net using L-BFGS. This implementation is based on minFunc,...
Definition: LBFGSSolver.cs:26

MyCaffe.solvers.LBFGSSolver.CollectGradients
virtual void CollectGradients()
Collect the gradients from the network learnable parameters.
Definition: LBFGSSolver.cs:214

MyCaffe.solvers.LBFGSSolver.dispose
override void dispose()
Releases all resources (GPU and Host) used by the Solver.
Definition: LBFGSSolver.cs:70

MyCaffe.solvers.LBFGSSolver.UpdateNet
virtual void UpdateNet()
Update the network.
Definition: LBFGSSolver.cs:370

MyCaffe.solvers.LBFGSSolver.ComputeStep
virtual void ComputeStep()
Compute the step.
Definition: LBFGSSolver.cs:362

MyCaffe.solvers.LBFGSSolver.ApplyUpdate
override double ApplyUpdate(int nIterationOverride=-1)
Apply the gradients to the network.
Definition: LBFGSSolver.cs:183

MyCaffe.solvers.LBFGSSolver.PreSolve
void PreSolve()
Runs the pre-solve which parpares the Solver to start Solving.
Definition: LBFGSSolver.cs:108

MyCaffe.solvers.LBFGSSolver.UpdateHistory
virtual void UpdateHistory()
Update the history values with the gradients and direction.
Definition: LBFGSSolver.cs:235

MyCaffe.solvers.LBFGSSolver.ComputeInitialHessianApprox
virtual void ComputeInitialHessianApprox()
Compute the initial Hessian approximation.
Definition: LBFGSSolver.cs:280

MyCaffe.solvers.LBFGSSolver.LBFGSSolver
LBFGSSolver(CudaDnn< T > cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist< T > persist, int nSolverCount=1, int nSolverRank=0, Net< T > shareNet=null, onGetWorkspace getws=null, onSetWorkspace setws=null)
The LBFGSSolver constructor.
Definition: LBFGSSolver.cs:58

MyCaffe.solvers.LBFGSSolver.ComputeDirection
virtual void ComputeDirection()
Compute the direction.
Definition: LBFGSSolver.cs:323

MyCaffe.solvers.LBFGSSolver.SnapshotSolverState
override byte[] SnapshotSolverState()
Save the solver state.
Definition: LBFGSSolver.cs:434

MyCaffe.solvers.LBFGSSolver.RestoreSolverState
override void RestoreSolverState(byte[] rgState)
Restore a previously saved solver state.
Definition: LBFGSSolver.cs:406

MyCaffe.solvers.Solver
An interface for classes that perform optimization on Nets - this class serves as the base class for ...
Definition: Solver.cs:28

MyCaffe.solvers.Solver.m_param
SolverParameter m_param
Specifies the SolverParameter that defines how the Solver operates.
Definition: Solver.cs:40

MyCaffe.solvers.Solver.m_cuda
CudaDnn< T > m_cuda
Specifies the instance of CudaDnn used by the Solver that provides a connection to Cuda.
Definition: Solver.cs:32

MyCaffe.solvers.Solver.is_root_solver
bool? is_root_solver
Returns whether or not this is the root solver.
Definition: Solver.cs:1309

MyCaffe.solvers.Solver.m_nIter
int m_nIter
Specifies the current iteration.
Definition: Solver.cs:52

MyCaffe.solvers.Solver.m_persist
IXPersist< T > m_persist
Specifies the persistance object used to save weight and solver states.
Definition: Solver.cs:90

MyCaffe.solvers.Solver.m_net
Net< T > m_net
Specifies the training Net.
Definition: Solver.cs:44

MyCaffe.solvers.Solver.m_nCurrentStep
int m_nCurrentStep
Specifies the current step.
Definition: Solver.cs:56

MyCaffe.solvers.Solver.m_log
Log m_log
Specifies the Log for output.
Definition: Solver.cs:36

MyCaffe.basecode.IXImageDatabaseBase
The IXImageDatabaseBase interface defines the general interface to the in-memory image database.
Definition: Interfaces.cs:878

MyCaffe.common.IXPersist
The IXPersist interface is used by the CaffeControl to load and save weights.
Definition: Interfaces.cs:187

MyCaffe.basecode
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12

MyCaffe.common
The MyCaffe.common namespace contains common MyCaffe classes.
Definition: BatchInput.cs:8

MyCaffe.db.image
The MyCaffe.db.image namespace contains all image database related classes.
Definition: Database.cs:18

MyCaffe.db
Definition: Database.cs:18

MyCaffe.param
The MyCaffe.param namespace contains parameters used to create models.
Definition: AttentionParameter.cs:9

MyCaffe.solvers
The MyCaffe.solvers namespace contains all solver classes, including the base Solver.
Definition: AdaDeltaSolver.cs:13

MyCaffe
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12

System
Definition: Component.cs:11