From a966db766cb7a61420909abd0d613af0c336ed5b Mon Sep 17 00:00:00 2001 From: Heaton Date: Thu, 31 Aug 2017 11:02:48 -0500 Subject: [PATCH] Added SGD --- .../Training/Propagation/SGD/BatchDataSet.cs | 15 + .../SGD/StochasticGradientDescent.cs | 288 +++++++++++++++--- 2 files changed, 263 insertions(+), 40 deletions(-) diff --git a/encog-core-cs/Neural/Networks/Training/Propagation/SGD/BatchDataSet.cs b/encog-core-cs/Neural/Networks/Training/Propagation/SGD/BatchDataSet.cs index 2cf43d33..6049f4d5 100644 --- a/encog-core-cs/Neural/Networks/Training/Propagation/SGD/BatchDataSet.cs +++ b/encog-core-cs/Neural/Networks/Training/Propagation/SGD/BatchDataSet.cs @@ -253,5 +253,20 @@ public IMLDataSet OpenAdditional() result.BatchSize = BatchSize; return result; } + + /// + /// Advance to the next batch. Should be called at the end of each training iteration. + /// + public void Advance() + { + if (RandomBatches) + { + GeneraterandomSample(); + } + else + { + _currentIndex = (_currentIndex + BatchSize) % _dataset.Count; + } + } } } diff --git a/encog-core-cs/Neural/Networks/Training/Propagation/SGD/StochasticGradientDescent.cs b/encog-core-cs/Neural/Networks/Training/Propagation/SGD/StochasticGradientDescent.cs index fe6e355d..1732f7fc 100644 --- a/encog-core-cs/Neural/Networks/Training/Propagation/SGD/StochasticGradientDescent.cs +++ b/encog-core-cs/Neural/Networks/Training/Propagation/SGD/StochasticGradientDescent.cs @@ -6,11 +6,13 @@ using Encog.Neural.Error; using Encog.MathUtil.Error; using Encog.MathUtil.Randomize.Generate; -using Encog.Neural.Networks.Training.Propagation; +using Encog.ML.Data; +using Encog.Util; +using Encog.Engine.Network.Activation; namespace Encog.Neural.Networks.Training.Propagation.SGD { - + public class StochasticGradientDescent : BasicTraining, IMomentum, ILearningRate @@ -25,77 +27,260 @@ public class StochasticGradientDescent : BasicTraining, IMomentum, /// public double Momentum { get; set; } - /** - * The gradients. - */ - private double[] gradients; + /// + /// The gradients. + /// + private double[] _gradients; - /** - * The deltas for each layer. - */ - private double[] layerDelta; + /// + /// The deltas for each layer. + /// + private double[] _layerDelta; - /** - * L1 regularization. - */ + /// + /// L1 regularization. + /// public double L1 { get; set; } - /** - * L2 regularization. - */ + /// + /// L2 regularization. + /// public double L2 { get; set; } - /** - * The update rule to use. - */ - private IUpdateRule updateRule = new AdamUpdate(); + /// + /// The update rule to use. + /// + public IUpdateRule UpdateRule { get; set; } - /** - * The last delta values. - */ + /// + /// The last delta values. + /// private double[] _lastDelta; - /** - * A flat neural network. - */ + /// + /// A flat neural network. + /// private FlatNetwork _flat; - /** - * The error function to use. - */ + /// + /// The error function to use. + /// private IErrorFunction _errorFunction = new CrossEntropyErrorFunction(); - /** - * The error calculation. - */ - private ErrorCalculation errorCalculation; + /// + /// The error calculation. + /// + private ErrorCalculation _errorCalculation; - private IGenerateRandom rnd; + private IGenerateRandom _rnd; private IMLMethod _method; - public StochasticGradientDescent(TrainingImplementationType implementationType) : base(implementationType) + + public StochasticGradientDescent(IContainsFlat network, + IMLDataSet training) : + this(network, training, new MersenneTwisterGenerateRandom()) { + } - public override bool CanContinue + + + public StochasticGradientDescent(IContainsFlat network, + IMLDataSet training, IGenerateRandom theRandom) : + base(TrainingImplementationType.Iterative) { - get { return false; } + Training = training; + UpdateRule = new AdamUpdate(); + + if (!(training is BatchDataSet)) + { + BatchSize = 25; + } + + _method = network; + _flat = network.Flat; + _layerDelta = new double[_flat.LayerOutput.Length]; + _gradients = new double[_flat.Weights.Length]; + _errorCalculation = new ErrorCalculation(); + _rnd = theRandom; + LearningRate = 0.001; + Momentum = 0.9; } - public override IMLMethod Method + public int BatchSize { get { - return _method; + if (Training is BatchDataSet) + { + return ((BatchDataSet)Training).BatchSize; + } + else + { + return 0; + } + } + set + { + if (Training is BatchDataSet) + { + ((BatchDataSet)Training).BatchSize = value; + } + else + { + BatchDataSet batchSet = new BatchDataSet(Training, _rnd); + batchSet.BatchSize = value; + Training = batchSet; + } + } + } + + public void Process(IMLDataPair pair) + { + _errorCalculation = new ErrorCalculation(); + + double[] actual = new double[_flat.OutputCount]; + + _flat.Compute(pair.Input, actual); + + _errorCalculation.UpdateError(actual, pair.Ideal, pair.Significance); + + // Calculate error for the output layer. + _errorFunction.CalculateError( + _flat.ActivationFunctions[0], _flat.LayerSums, _flat.LayerOutput, + pair.Ideal, actual, _layerDelta, 0, + pair.Significance); + + // Apply regularization, if requested. + if (L1 > EncogFramework.DefaultDoubleEqual + || L2 > EncogFramework.DefaultDoubleEqual) + { + double[] lp = new double[2]; + CalculateRegularizationPenalty(lp); + for (int i = 0; i < actual.Length; i++) + { + double p = (lp[0] * L1) + (lp[1] * L2); + _layerDelta[i] += p; + } + } + + // Propagate backwards (chain rule from calculus). + for (int i = _flat.BeginTraining; i < _flat + .EndTraining; i++) + { + ProcessLevel(i); + } + } + + public void Update() + { + if (IterationNumber == 0) + { + UpdateRule.Init(this); + } + + PreIteration(); + + UpdateRule.Update(_gradients, _flat.Weights); + Error = _errorCalculation.Calculate(); + + PostIteration(); + + EngineArray.Fill(_gradients, 0); + _errorCalculation.Reset(); + + if (Training is BatchDataSet) + { + ((BatchDataSet)Training).Advance(); + } + } + + public void ResetError() + { + _errorCalculation.Reset(); + } + + private void ProcessLevel(int currentLevel) + { + int fromLayerIndex = _flat.LayerIndex[currentLevel + 1]; + int toLayerIndex = _flat.LayerIndex[currentLevel]; + int fromLayerSize = _flat.LayerCounts[currentLevel + 1]; + int toLayerSize = _flat.LayerFeedCounts[currentLevel]; + double dropoutRate = 0; + + int index = _flat.WeightIndex[currentLevel]; + IActivationFunction activation = _flat + .ActivationFunctions[currentLevel]; + + // handle weights + // array references are made method local to avoid one indirection + double[] layerDelta = _layerDelta; + double[] weights = _flat.Weights; + double[] gradients = _gradients; + double[] layerOutput = _flat.LayerOutput; + double[] layerSums = _flat.LayerSums; + int yi = fromLayerIndex; + for (int y = 0; y < fromLayerSize; y++) + { + double output = layerOutput[yi]; + double sum = 0; + + int wi = index + y; + int loopEnd = toLayerIndex + toLayerSize; + + for (int xi = toLayerIndex; xi < loopEnd; xi++, wi += fromLayerSize) + { + gradients[wi] += output * layerDelta[xi]; + sum += weights[wi] * layerDelta[xi]; + } + layerDelta[yi] = sum + * (activation.DerivativeFunction(layerSums[yi], layerOutput[yi])); + + yi++; } } public override void Iteration() { + for (int i = 0; i < Training.Count; i++) + { + Process(Training[i]); + } + + if (IterationNumber == 0) + { + UpdateRule.Init(this); + } + + PreIteration(); + + Update(); + PostIteration(); + + if (Training is BatchDataSet) + { + ((BatchDataSet)Training).Advance(); + } + } + + + + public override bool CanContinue + { + get { return false; } + } + + public override IMLMethod Method + { + get + { + return _method; + } } + public override TrainingContinuation Pause() { throw new NotImplementedException(); @@ -113,6 +298,29 @@ public FlatNetwork Flat return _flat; } } - + + public void CalculateRegularizationPenalty(double[] l) + { + for (int i = 0; i < _flat.LayerCounts.Length - 1; i++) + { + LayerRegularizationPenalty(i, l); + } + } + + public void LayerRegularizationPenalty(int fromLayer, double[] l) + { + int fromCount = _flat.GetLayerTotalNeuronCount(fromLayer); + int toCount = _flat.GetLayerNeuronCount(fromLayer + 1); + + for (int fromNeuron = 0; fromNeuron < fromCount; fromNeuron++) + { + for (int toNeuron = 0; toNeuron < toCount; toNeuron++) + { + double w = _flat.GetWeight(fromLayer, fromNeuron, toNeuron); + l[0] += Math.Abs(w); + l[1] += w * w; + } + } + } } }