Skip to content

Commit

Permalink
Added SGD
Browse files Browse the repository at this point in the history
  • Loading branch information
Heaton committed Aug 31, 2017
1 parent c56ef25 commit a966db7
Show file tree
Hide file tree
Showing 2 changed files with 263 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,5 +253,20 @@ public IMLDataSet OpenAdditional()
result.BatchSize = BatchSize;
return result;
}

/// <summary>
/// Advance to the next batch. Should be called at the end of each training iteration.
/// </summary>
public void Advance()
{
if (RandomBatches)
{
GeneraterandomSample();
}
else
{
_currentIndex = (_currentIndex + BatchSize) % _dataset.Count;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
using Encog.Neural.Error;
using Encog.MathUtil.Error;
using Encog.MathUtil.Randomize.Generate;
using Encog.Neural.Networks.Training.Propagation;
using Encog.ML.Data;
using Encog.Util;
using Encog.Engine.Network.Activation;

namespace Encog.Neural.Networks.Training.Propagation.SGD
{


public class StochasticGradientDescent : BasicTraining, IMomentum,
ILearningRate
Expand All @@ -25,77 +27,260 @@ public class StochasticGradientDescent : BasicTraining, IMomentum,
/// </summary>
public double Momentum { get; set; }

/**
* The gradients.
*/
private double[] gradients;
/// <summary>
/// The gradients.
/// </summary>
private double[] _gradients;

/**
* The deltas for each layer.
*/
private double[] layerDelta;
/// <summary>
/// The deltas for each layer.
/// </summary>
private double[] _layerDelta;

/**
* L1 regularization.
*/
/// <summary>
/// L1 regularization.
/// </summary>
public double L1 { get; set; }

/**
* L2 regularization.
*/
/// <summary>
/// L2 regularization.
/// </summary>
public double L2 { get; set; }

/**
* The update rule to use.
*/
private IUpdateRule updateRule = new AdamUpdate();
/// <summary>
/// The update rule to use.
/// </summary>
public IUpdateRule UpdateRule { get; set; }

/**
* The last delta values.
*/
/// <summary>
/// The last delta values.
/// </summary>
private double[] _lastDelta;

/**
* A flat neural network.
*/
/// <summary>
/// A flat neural network.
/// </summary>
private FlatNetwork _flat;

/**
* The error function to use.
*/
/// <summary>
/// The error function to use.
/// </summary>
private IErrorFunction _errorFunction = new CrossEntropyErrorFunction();

/**
* The error calculation.
*/
private ErrorCalculation errorCalculation;
/// <summary>
/// The error calculation.
/// </summary>
private ErrorCalculation _errorCalculation;

private IGenerateRandom rnd;
private IGenerateRandom _rnd;

private IMLMethod _method;

public StochasticGradientDescent(TrainingImplementationType implementationType) : base(implementationType)

public StochasticGradientDescent(IContainsFlat network,
IMLDataSet training) :
this(network, training, new MersenneTwisterGenerateRandom())
{

}

public override bool CanContinue


public StochasticGradientDescent(IContainsFlat network,
IMLDataSet training, IGenerateRandom theRandom) :
base(TrainingImplementationType.Iterative)
{
get { return false; }
Training = training;
UpdateRule = new AdamUpdate();

if (!(training is BatchDataSet))
{
BatchSize = 25;
}

_method = network;
_flat = network.Flat;
_layerDelta = new double[_flat.LayerOutput.Length];
_gradients = new double[_flat.Weights.Length];
_errorCalculation = new ErrorCalculation();
_rnd = theRandom;
LearningRate = 0.001;
Momentum = 0.9;
}

public override IMLMethod Method
public int BatchSize
{
get
{
return _method;
if (Training is BatchDataSet)
{
return ((BatchDataSet)Training).BatchSize;
}
else
{
return 0;
}
}
set
{
if (Training is BatchDataSet)
{
((BatchDataSet)Training).BatchSize = value;
}
else
{
BatchDataSet batchSet = new BatchDataSet(Training, _rnd);
batchSet.BatchSize = value;
Training = batchSet;
}
}
}

public void Process(IMLDataPair pair)
{
_errorCalculation = new ErrorCalculation();

double[] actual = new double[_flat.OutputCount];

_flat.Compute(pair.Input, actual);

_errorCalculation.UpdateError(actual, pair.Ideal, pair.Significance);

// Calculate error for the output layer.
_errorFunction.CalculateError(
_flat.ActivationFunctions[0], _flat.LayerSums, _flat.LayerOutput,
pair.Ideal, actual, _layerDelta, 0,
pair.Significance);

// Apply regularization, if requested.
if (L1 > EncogFramework.DefaultDoubleEqual
|| L2 > EncogFramework.DefaultDoubleEqual)
{
double[] lp = new double[2];
CalculateRegularizationPenalty(lp);
for (int i = 0; i < actual.Length; i++)
{
double p = (lp[0] * L1) + (lp[1] * L2);
_layerDelta[i] += p;
}
}

// Propagate backwards (chain rule from calculus).
for (int i = _flat.BeginTraining; i < _flat
.EndTraining; i++)
{
ProcessLevel(i);
}
}

public void Update()
{
if (IterationNumber == 0)
{
UpdateRule.Init(this);
}

PreIteration();

UpdateRule.Update(_gradients, _flat.Weights);
Error = _errorCalculation.Calculate();

PostIteration();

EngineArray.Fill(_gradients, 0);
_errorCalculation.Reset();

if (Training is BatchDataSet)
{
((BatchDataSet)Training).Advance();
}
}

public void ResetError()
{
_errorCalculation.Reset();
}

private void ProcessLevel(int currentLevel)
{
int fromLayerIndex = _flat.LayerIndex[currentLevel + 1];
int toLayerIndex = _flat.LayerIndex[currentLevel];
int fromLayerSize = _flat.LayerCounts[currentLevel + 1];
int toLayerSize = _flat.LayerFeedCounts[currentLevel];
double dropoutRate = 0;

int index = _flat.WeightIndex[currentLevel];
IActivationFunction activation = _flat
.ActivationFunctions[currentLevel];

// handle weights
// array references are made method local to avoid one indirection
double[] layerDelta = _layerDelta;
double[] weights = _flat.Weights;
double[] gradients = _gradients;
double[] layerOutput = _flat.LayerOutput;
double[] layerSums = _flat.LayerSums;
int yi = fromLayerIndex;
for (int y = 0; y < fromLayerSize; y++)
{
double output = layerOutput[yi];
double sum = 0;

int wi = index + y;
int loopEnd = toLayerIndex + toLayerSize;

for (int xi = toLayerIndex; xi < loopEnd; xi++, wi += fromLayerSize)
{
gradients[wi] += output * layerDelta[xi];
sum += weights[wi] * layerDelta[xi];
}
layerDelta[yi] = sum
* (activation.DerivativeFunction(layerSums[yi], layerOutput[yi]));

yi++;
}
}

public override void Iteration()
{

for (int i = 0; i < Training.Count; i++)
{
Process(Training[i]);
}

if (IterationNumber == 0)
{
UpdateRule.Init(this);
}

PreIteration();

Update();
PostIteration();

if (Training is BatchDataSet)
{
((BatchDataSet)Training).Advance();
}
}



public override bool CanContinue
{
get { return false; }
}

public override IMLMethod Method
{
get
{
return _method;
}
}


public override TrainingContinuation Pause()
{
throw new NotImplementedException();
Expand All @@ -113,6 +298,29 @@ public FlatNetwork Flat
return _flat;
}
}


public void CalculateRegularizationPenalty(double[] l)
{
for (int i = 0; i < _flat.LayerCounts.Length - 1; i++)
{
LayerRegularizationPenalty(i, l);
}
}

public void LayerRegularizationPenalty(int fromLayer, double[] l)
{
int fromCount = _flat.GetLayerTotalNeuronCount(fromLayer);
int toCount = _flat.GetLayerNeuronCount(fromLayer + 1);

for (int fromNeuron = 0; fromNeuron < fromCount; fromNeuron++)
{
for (int toNeuron = 0; toNeuron < toCount; toNeuron++)
{
double w = _flat.GetWeight(fromLayer, fromNeuron, toNeuron);
l[0] += Math.Abs(w);
l[1] += w * w;
}
}
}
}
}

0 comments on commit a966db7

Please sign in to comment.