From 729393c4c785ad1066acfedf8af407db21928a73 Mon Sep 17 00:00:00 2001 From: Kiran Randhawa Date: Wed, 30 Dec 2015 02:11:23 +0000 Subject: [PATCH 1/3] Issue: #83 - Added basic support for StopTrainingStrategy tested with my own application. --- encog-core-cs/ML/EA/Train/BasicEA.cs | 12 ++++++- encog-core-cs/ML/EA/Train/TrainEA.cs | 53 ++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/encog-core-cs/ML/EA/Train/BasicEA.cs b/encog-core-cs/ML/EA/Train/BasicEA.cs index bde0eebd..7e2ecba3 100644 --- a/encog-core-cs/ML/EA/Train/BasicEA.cs +++ b/encog-core-cs/ML/EA/Train/BasicEA.cs @@ -376,8 +376,18 @@ public void Iteration() // purge invalid genomes Population.PurgeInvalidGenomes(); - } + PostIteration(); + } + + /// + /// post iteration added to support strategies. + /// + public virtual void PostIteration() + { + + } + /// /// The operators. /// diff --git a/encog-core-cs/ML/EA/Train/TrainEA.cs b/encog-core-cs/ML/EA/Train/TrainEA.cs index 824b0f4e..90afae6a 100644 --- a/encog-core-cs/ML/EA/Train/TrainEA.cs +++ b/encog-core-cs/ML/EA/Train/TrainEA.cs @@ -27,14 +27,22 @@ using Encog.ML.Train.Strategy; using Encog.Neural.Networks.Training; using Encog.Neural.Networks.Training.Propagation; - +using Encog.ML.Train.Strategy.End; +using System.Linq; + namespace Encog.ML.EA.Train { /// /// Provides a MLTrain compatible class that can be used to train genomes. /// public class TrainEA : BasicEA, IMLTrain - { + { + /// + /// The training strategies to use. + /// + /// + private readonly IList _strategies; + /// /// Create a trainer for a score function. /// @@ -43,6 +51,7 @@ public class TrainEA : BasicEA, IMLTrain public TrainEA(IPopulation thePopulation, ICalculateScore theScoreFunction) : base(thePopulation, theScoreFunction) { + _strategies = new List(); } /// @@ -65,17 +74,27 @@ public TrainEA(IPopulation thePopulation, IMLDataSet trainingData) { // not needed } + } + + /// + /// Call the strategies after an iteration. + /// + /// + public override void PostIteration() + { + foreach (IStrategy strategy in _strategies) + { + strategy.PostIteration(); + } } - - - /// - /// True if training can progress no further. - /// - public bool TrainingDone - { - get { return false; } + + /// True if training can progress no further. + public virtual bool TrainingDone + { + get { return _strategies.OfType().Any(end => end.ShouldStop()); } } + /// public TrainingImplementationType ImplementationType { @@ -95,7 +114,9 @@ public void Iteration(int count) { Iteration(); } - } + } + + /// public TrainingContinuation Pause() @@ -113,9 +134,11 @@ public void Resume(TrainingContinuation state) /// /// Not used. public void AddStrategy(IStrategy strategy) - { - throw new TrainingError( - "Strategies are not supported by this training method."); + { + strategy.Init(this); + _strategies.Add(strategy); + //throw new TrainingError( + // "Strategies are not supported by this training method."); } /// @@ -152,7 +175,7 @@ public IMLDataSet Training /// public IList Strategies { - get { return new List(); } + get { return _strategies; } } } } From e5d746a96ae46045df4374c1a109adc149349ac0 Mon Sep 17 00:00:00 2001 From: Kiran Randhawa Date: Wed, 30 Dec 2015 02:13:33 +0000 Subject: [PATCH 2/3] Added a new EndTimeSpan strategy. This gives you more specificity over duration of network running. This also works with the latest NEAT network changes I have been making. --- .../Train/Strategy/End/EndTimeSpanStrategy.cs | 50 +++++++++++++++++++ encog-core-cs/encog-core-cs.csproj | 1 + 2 files changed, 51 insertions(+) create mode 100644 encog-core-cs/ML/Train/Strategy/End/EndTimeSpanStrategy.cs diff --git a/encog-core-cs/ML/Train/Strategy/End/EndTimeSpanStrategy.cs b/encog-core-cs/ML/Train/Strategy/End/EndTimeSpanStrategy.cs new file mode 100644 index 00000000..cd3f1eb9 --- /dev/null +++ b/encog-core-cs/ML/Train/Strategy/End/EndTimeSpanStrategy.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Encog.ML.Train.Strategy.End +{ + /// + /// This End time span strategy gives greater specificity than EndMinutesStrategy. + /// You can specify Days, Hours, Minutes, Seconds etc... + /// + public class EndTimeSpanStrategy : IEndTrainingStrategy + { + private TimeSpan _duration; + private bool _started; + private DateTime _startedTime; + + /// + public EndTimeSpanStrategy(TimeSpan duration) + { + _duration = duration; + } + public void Init(IMLTrain train) + { + _started = true; + _startedTime = DateTime.Now; + } + + /// + public void PostIteration() + { + + } + + /// + public void PreIteration() + { + + } + + /// + public virtual bool ShouldStop() + { + lock (this) + { + return (DateTime.Now.Subtract(_startedTime) > _duration); + } + } + } +} diff --git a/encog-core-cs/encog-core-cs.csproj b/encog-core-cs/encog-core-cs.csproj index bfc3558a..42e20ec3 100644 --- a/encog-core-cs/encog-core-cs.csproj +++ b/encog-core-cs/encog-core-cs.csproj @@ -575,6 +575,7 @@ + From 7fecf6d69154adf617c61037fec714a80489cc7b Mon Sep 17 00:00:00 2001 From: Kiran Randhawa Date: Thu, 21 Apr 2016 21:52:03 +0100 Subject: [PATCH 3/3] So I've added an extension library to Encog which enables one to work with datasets. This is still work in progress and there are some improvements that can still be made (such as the ability to work with multiple data tables, speed enhancements and the implementation of time series data). The code will also need to be baked directly into the encog framework and the existing csv logic will need to be re-worked to use this new logic as a base. --- .gitignore | 54 ++--- .../Normalization/AnalystNormalizeDataSet.cs | 129 ++++++++++++ .../Normalization/AnalystWizardExtensions.cs | 41 ++++ .../Normalization/DatasetExtensions.cs | 78 ++++++++ .../Normalization/EncogAnalystExtensions.cs | 20 ++ .../Normalization/ObjectExtensions.cs | 76 +++++++ .../Normalization/PerformAnalysis.cs | 187 ++++++++++++++++++ EncogExtensions/Properties/AssemblyInfo.cs | 36 ++++ EncogExtensions/encog-extensions.csproj | 69 +++++++ EncogExtensions/readme.txt | 22 +++ encog-core-cs.sln | 40 +++- .../Properties/AssemblyInfo.cs | 36 ++++ .../TestDataSetNormalization.cs | 66 +++++++ .../encog-extensions-test.csproj | 94 +++++++++ 14 files changed, 918 insertions(+), 30 deletions(-) create mode 100644 EncogExtensions/Normalization/AnalystNormalizeDataSet.cs create mode 100644 EncogExtensions/Normalization/AnalystWizardExtensions.cs create mode 100644 EncogExtensions/Normalization/DatasetExtensions.cs create mode 100644 EncogExtensions/Normalization/EncogAnalystExtensions.cs create mode 100644 EncogExtensions/Normalization/ObjectExtensions.cs create mode 100644 EncogExtensions/Normalization/PerformAnalysis.cs create mode 100644 EncogExtensions/Properties/AssemblyInfo.cs create mode 100644 EncogExtensions/encog-extensions.csproj create mode 100644 EncogExtensions/readme.txt create mode 100644 encog-extensions-test/Properties/AssemblyInfo.cs create mode 100644 encog-extensions-test/TestDataSetNormalization.cs create mode 100644 encog-extensions-test/encog-extensions-test.csproj diff --git a/.gitignore b/.gitignore index 8ccd396f..dba909b3 100644 --- a/.gitignore +++ b/.gitignore @@ -14,28 +14,32 @@ encog-core-cs/obj/ encog-core-test/bin/ encog-core-test/obj/ -encog-core-cs.5.1.ReSharper.user -EncogCmd/VSdoc/EncogCmd.hhc -EncogCmd/VSdoc/encog-core-cs_dyn_help.xml -EncogCmd/VSdoc/msdn2/CFW.gif -EncogCmd/VSdoc/msdn2/collall.gif -SOMColors/SOMColors.csproj.vs10x -ConsoleExamples/ConsoleExamples.csproj.vs10x -OCR/OCR.csproj.vs10x -encog-core-cs/encog-core-cs.csproj.vs10x -EncogCmd/VSdoc/unregister_encog-core-cs.bat -EncogCmd/VSdoc/HelixoftHelpReg.exe -EncogCmd/VSdoc/FINAL_CHM_DOC/encog-core-cs.chm -EncogCmd/VSdoc/encog-core-cs.chm -encog-core-cs.5.1.ReSharper.user -encog-core-cs.5.1.ReSharper.user -encog-core-cs.5.1.ReSharper.user -ConsoleExamples/ConsoleExamples.vsdoc -encog-core-cs.5.1.ReSharper.user -encog-core-cs.5.1.ReSharper.user -encog-core-cs.5.1.ReSharper.user -QuickConsoleTests/Program.cs -encog-core-cs.sln.vsdoc -EncogCmd/EncogCmd.csproj.vs10x -encog-core-cs/encog-core-cs.vsdoc -encog-core-cs.5.1.ReSharper.user.orig \ No newline at end of file +encog-core-cs.5.1.ReSharper.user +EncogCmd/VSdoc/EncogCmd.hhc +EncogCmd/VSdoc/encog-core-cs_dyn_help.xml +EncogCmd/VSdoc/msdn2/CFW.gif +EncogCmd/VSdoc/msdn2/collall.gif +SOMColors/SOMColors.csproj.vs10x +ConsoleExamples/ConsoleExamples.csproj.vs10x +OCR/OCR.csproj.vs10x +encog-core-cs/encog-core-cs.csproj.vs10x +EncogCmd/VSdoc/unregister_encog-core-cs.bat +EncogCmd/VSdoc/HelixoftHelpReg.exe +EncogCmd/VSdoc/FINAL_CHM_DOC/encog-core-cs.chm +EncogCmd/VSdoc/encog-core-cs.chm +encog-core-cs.5.1.ReSharper.user +encog-core-cs.5.1.ReSharper.user +encog-core-cs.5.1.ReSharper.user +ConsoleExamples/ConsoleExamples.vsdoc +encog-core-cs.5.1.ReSharper.user +encog-core-cs.5.1.ReSharper.user +encog-core-cs.5.1.ReSharper.user +QuickConsoleTests/Program.cs +encog-core-cs.sln.vsdoc +EncogCmd/EncogCmd.csproj.vs10x +encog-core-cs/encog-core-cs.vsdoc +encog-core-cs.5.1.ReSharper.user.orig +encog-extensions-test/bin +encog-extensions-test/obj +EncogExtensions/bin +EncogExtensions/obj diff --git a/EncogExtensions/Normalization/AnalystNormalizeDataSet.cs b/EncogExtensions/Normalization/AnalystNormalizeDataSet.cs new file mode 100644 index 00000000..615bbbd4 --- /dev/null +++ b/EncogExtensions/Normalization/AnalystNormalizeDataSet.cs @@ -0,0 +1,129 @@ +using Encog.App.Analyst; +using Encog.App.Analyst.Missing; +using Encog.App.Analyst.Script.Normalize; +using Encog.Util.Arrayutil; +using System; +using System.Data; + +namespace EncogExtensions.Normalization +{ + public class AnalystNormalizeDataSet + { + private EncogAnalyst _analyst; + + public AnalystNormalizeDataSet(EncogAnalyst analyst) + { + this._analyst = analyst; + } + + public double[,] Normalize(DataSet dataSet) + { + int outputLength = _analyst.DetermineTotalColumns(); + double[,] result = new double[dataSet.Tables[0].Rows.Count, outputLength]; + + for (var irow = 0; irow < dataSet.Tables[0].Rows.Count; irow++) + { + var normalizedRowFields = ExtractFields(_analyst, dataSet.Tables[0].Rows[irow], outputLength, false); + if (normalizedRowFields != null) + { + for (var iNormColumn = 0; iNormColumn < outputLength; iNormColumn++) + { + result[irow, iNormColumn] = normalizedRowFields[iNormColumn]; + } + } + } + + return result; + } + + private double[] ExtractFields(EncogAnalyst analyst, DataRow rowData, int outputLength, bool v) + { + var output = new double[outputLength]; + int outputIndex = 0; + + foreach (AnalystField stat in analyst.Script.Normalize.NormalizedFields) + { + stat.Init(); + if (stat.Action == NormalizationAction.Ignore) + { + continue; + } + + var index = Array.FindIndex(analyst.Script.Fields, x => x.Name == stat.Name); + var str = rowData[index].ToString(); + // is this an unknown value? + if (str.Equals("?") || str.Length == 0) + { + IHandleMissingValues handler = analyst.Script.Normalize.MissingValues; + double[] d = handler.HandleMissing(analyst, stat); + + // should we skip the entire row + if (d == null) + { + return null; + } + + // copy the returned values in place of the missing values + for (int i = 0; i < d.Length; i++) + { + output[outputIndex++] = d[i]; + } + } + else + { + // known value + + if (stat.Action == NormalizationAction.Normalize) + { + double d = double.Parse(str.Trim()); + d = stat.Normalize(d); + output[outputIndex++] = d; + } + else if (stat.Action == NormalizationAction.PassThrough) + { + double d = double.Parse(str); + output[outputIndex++] = d; + } + else + { + double[] d = stat.Encode(str.Trim()); + + foreach (double element in d) + { + output[outputIndex++] = element; + } + } + } + } + + return output; + } + + /* + private double[] ExtractFields(EncogAnalyst _analyst, DataSet dataSet, bool skipOutput) + { + var output = new List(); + int outputIndex = 0; + + foreach (AnalystField stat in _analyst.Script.Normalize.NormalizedFields) + { + stat.Init(); + if (stat.Action == NormalizationAction.Ignore) + { + continue; + } + + if (stat.Output && skipOutput) + { + continue; + } + + if(stat.Action == NormalizationAction.Normalize) + { + output[outputIndex] = + } + } + } + */ + } +} \ No newline at end of file diff --git a/EncogExtensions/Normalization/AnalystWizardExtensions.cs b/EncogExtensions/Normalization/AnalystWizardExtensions.cs new file mode 100644 index 00000000..980acfa7 --- /dev/null +++ b/EncogExtensions/Normalization/AnalystWizardExtensions.cs @@ -0,0 +1,41 @@ +using Encog.App.Analyst; +using Encog.App.Analyst.Wizard; +using System; +using System.Collections.Generic; +using System.Data; + +namespace EncogExtensions.Normalization +{ + public static class AnalystWizardExtensions + { + + + public static void Wizard(this AnalystWizard wizard, DataSet data) + { + EncogAnalyst analyst = wizard.GetPrivateField("_analyst"); + int lagWindowSize = wizard.GetPrivateField("_lagWindowSize"); + int leadWindowSize = wizard.GetPrivateField("_leadWindowSize"); + + wizard.SetPrivateField("_timeSeries", (lagWindowSize > 0 || leadWindowSize > 0)); + + wizard.CallPrivateMethod("DetermineClassification"); + wizard.CallPrivateMethod("GenerateSettings"); + + + analyst.Analyze(data); + + + wizard.CallPrivateMethod("GenerateNormalizedFields"); + wizard.CallPrivateMethod("GenerateSegregate"); + wizard.CallPrivateMethod("GenerateGenerate"); + wizard.CallPrivateMethod("GenerateTasks"); + + if (wizard.GetPrivateField("_timeSeries") + && (lagWindowSize > 0) + && (leadWindowSize > 0)) + { + wizard.CallPrivateMethod("ExpandTimeSlices"); + } + } + } +} diff --git a/EncogExtensions/Normalization/DatasetExtensions.cs b/EncogExtensions/Normalization/DatasetExtensions.cs new file mode 100644 index 00000000..157db967 --- /dev/null +++ b/EncogExtensions/Normalization/DatasetExtensions.cs @@ -0,0 +1,78 @@ +using Encog.ML.Data.Market; +using Encog.ML.Data.Market.Loader; +using System; +using System.Collections.Generic; +using System.Data; +using System.Linq; + +namespace EncogExtensions.Normalization +{ + public static class DatasetExtensions + { + public static DataSet Convert(this DataSet dataSet, List data, string dataSetName = "dataset") + { + if (data.Count < 1) + { + throw new ArgumentOutOfRangeException("Loaded Market Data passed to DataSet.Convert() method appears to be empty (Contains 0 Rows)."); + } + + var resultDataSet = new DataSet("dataset"); + DataTable table = new DataTable("Market Data Table"); + + var dataRowCount = data.Count(); + var initialDataColumns = new List(); + + AddInitialColumn(initialDataColumns, "StockSymbol", typeof(String)); + AddInitialColumn(initialDataColumns, "Day", typeof(int)); + AddInitialColumn(initialDataColumns, "Month", typeof(int)); + AddInitialColumn(initialDataColumns, "Year", typeof(int)); + CopyInitialColumnsToTable(table, initialDataColumns); + + var dataColumnInitialIndex = initialDataColumns.Count; + + foreach (KeyValuePair column in data[0].Data) + { + DataColumn dataColumn = new DataColumn(column.Key.ToString()); + dataColumn.DataType = column.Value.GetType(); + table.Columns.Add(dataColumn); + dataColumnInitialIndex++; + } + + for (var dataRowIndex = 0; dataRowIndex < dataRowCount; dataRowIndex++) + { + var row = table.NewRow(); + row[0] = data[dataRowIndex].Ticker.Symbol; // stock symbol + row[1] = data[dataRowIndex].When.Day; + row[2] = data[dataRowIndex].When.Month; + row[3] = data[dataRowIndex].When.Year; + + var dataColumnIndex = initialDataColumns.Count; + + foreach (KeyValuePair entry in data[dataRowIndex].Data) + { + row[dataColumnIndex] = entry.Value; + dataColumnIndex++; + } + + table.Rows.Add(row); + } + + resultDataSet.Tables.Add(table); + + return resultDataSet; + } + + private static void CopyInitialColumnsToTable(DataTable table, List initialDataColumns) + { + table.Columns.AddRange(initialDataColumns.ToArray()); + } + + private static void AddInitialColumn(List initialDataColumns, string name, Type dataType) + { + DataColumn InitialColumn = null; + InitialColumn = new DataColumn(name); + InitialColumn.DataType = dataType; + initialDataColumns.Add(InitialColumn); + } + } +} \ No newline at end of file diff --git a/EncogExtensions/Normalization/EncogAnalystExtensions.cs b/EncogExtensions/Normalization/EncogAnalystExtensions.cs new file mode 100644 index 00000000..e9ad0cbb --- /dev/null +++ b/EncogExtensions/Normalization/EncogAnalystExtensions.cs @@ -0,0 +1,20 @@ +using Encog.App.Analyst; +using Encog.App.Analyst.Analyze; +using System; +using System.Collections.Generic; +using System.Data; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace EncogExtensions.Normalization +{ + public static class EncogAnalystExtensions + { + public static void Analyze(this EncogAnalyst analyst, DataSet data) + { + var a = new PerformAnalysis(analyst.Script); + a.Process(analyst, data);// Kiran:2 + } + } +} diff --git a/EncogExtensions/Normalization/ObjectExtensions.cs b/EncogExtensions/Normalization/ObjectExtensions.cs new file mode 100644 index 00000000..54280121 --- /dev/null +++ b/EncogExtensions/Normalization/ObjectExtensions.cs @@ -0,0 +1,76 @@ +using System; +using System.Reflection; + +namespace EncogExtensions.Normalization +{ + // Source("http://www.codeproject.com/Articles/80343/Accessing-private-members.aspx") + + public static class ObjectExtensions + { + public static T GetPrivateField(this object obj, string name) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + FieldInfo field = type.GetField(name, flags); + return (T)field.GetValue(obj); + } + + public static T GetPrivateProperty(this object obj, string name) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + PropertyInfo field = type.GetProperty(name, flags); + return (T)field.GetValue(obj, null); + } + + public static void SetPrivateField(this object obj, string name, object value) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + FieldInfo field = type.GetField(name, flags); + field.SetValue(obj, value); + } + + public static void SetPrivateProperty(this object obj, string name, object value) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + PropertyInfo field = type.GetProperty(name, flags); + field.SetValue(obj, value, null); + } + + //Void No Parameters Implementation + public static void CallPrivateMethod(this object obj, string name) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + MethodInfo method = type.GetMethod(name, flags); + method.Invoke(obj,null); + } + + //Void with Parameters Implementation + public static void CallPrivateMethod(this object obj, string name, params object[] param) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + MethodInfo method = type.GetMethod(name, flags); + method.Invoke(obj, param); + } + + + //No Parameters Implementation + public static T CallPrivateMethod(this object obj, string name) + { + return CallPrivateMethod(obj, name, null); + } + + //Parameters and Return Implementation + public static T CallPrivateMethod(this object obj, string name, params object[] param) + { + BindingFlags flags = BindingFlags.Instance | BindingFlags.NonPublic; + Type type = obj.GetType(); + MethodInfo method = type.GetMethod(name, flags); + return (T)method.Invoke(obj, param); + } + } +} \ No newline at end of file diff --git a/EncogExtensions/Normalization/PerformAnalysis.cs b/EncogExtensions/Normalization/PerformAnalysis.cs new file mode 100644 index 00000000..baf1e871 --- /dev/null +++ b/EncogExtensions/Normalization/PerformAnalysis.cs @@ -0,0 +1,187 @@ +using Encog.App.Analyst; +using Encog.App.Analyst.Analyze; +using Encog.App.Analyst.Script; +using Encog.App.Analyst.Script.Prop; +using System; +using System.Collections.Generic; +using System.Data; + +namespace EncogExtensions.Normalization +{ + public class PerformAnalysis + { + private Encog.App.Analyst.Analyze.PerformAnalysis _performAnalysis; + private AnalystScript _script; + private AnalyzedField[] _fields; + + public PerformAnalysis(AnalystScript script) + { + _script = script; + } + + public void Process(EncogAnalyst target, DataSet data) + { + var count = 0; + + // pass one, calculate the min/max + count = PerformCalculateMinMax(data, count); + + // pass two, standard deviation + count = PerformStandardDeviation(data, count); + + String str = _script.Properties.GetPropertyString(ScriptProperties.SetupConfigAllowedClasses) ?? ""; + + bool allowInt = str.Contains("int"); + bool allowReal = str.Contains("real") || str.Contains("double"); + bool allowString = str.Contains("string"); + + + // remove any classes that did not qualify + RemoveUnqualifiedClasses(allowInt, allowReal, allowString); + + // merge with existing + MergeWithExisting(target); + + // now copy the fields + CopyDataFields(target); + } + + private void CopyDataFields(EncogAnalyst target) + { + var df = new DataField[_fields.Length]; + + for (int i_4 = 0; i_4 < df.Length; i_4++) + { + df[i_4] = _fields[i_4].FinalizeField(); + } + + target.Script.Fields = df; + } + + private void MergeWithExisting(EncogAnalyst target) + { + if ((target.Script.Fields != null) + && (_fields.Length == target.Script.Fields.Length)) + { + for (int i = 0; i < _fields.Length; i++) + { + // copy the old field name + _fields[i].Name = target.Script.Fields[i].Name; + + if (_fields[i].Class) + { + IList t = _fields[i].AnalyzedClassMembers; + IList s = target.Script.Fields[i].ClassMembers; + + if (s.Count == t.Count) + { + for (int j = 0; j < s.Count; j++) + { + if (t[j].Code.Equals(s[j].Code)) + { + t[j].Name = s[j].Name; + } + } + } + } + } + } + } + + private void RemoveUnqualifiedClasses(bool allowInt, bool allowReal, bool allowString) + { + foreach (AnalyzedField field in _fields) + { + if (field.Class) + { + if (!allowInt && field.Integer) + { + field.Class = false; + } + + if (!allowString && (!field.Integer && !field.Real)) + { + field.Class = false; + } + + if (!allowReal && field.Real && !field.Integer) + { + field.Class = false; + } + } + } + } + + private int PerformCalculateMinMax(DataSet data, int count) + { + for (int counter = 0; counter < data.Tables[0].Rows.Count; counter++) + { + if (_fields == null) + { + GenerateFields(data.Tables[0].Columns); + } + + var columnCount = data.Tables[0].Columns.Count; + for (int i = 0; i < columnCount; i++) + { + if (_fields != null) + { + var str = Convert.ToString(data.Tables[0].Rows[counter][i]); + _fields[i].Analyze1(str); + } + } + count++; + } + + if (count == 0) + { + throw new AnalystError("Can't analyse data, it is empty."); + } + + if (_fields != null) + { + foreach (AnalyzedField field in _fields) + { + field.CompletePass1(); + } + } + + return count; + } + + private int PerformStandardDeviation(DataSet data, int count) + { + for (int counter = 0; counter < data.Tables[0].Rows.Count; counter++) + { + var columnCount = data.Tables[0].Columns.Count; + for (int i = 0; i < columnCount; i++) + { + if (_fields != null) + { + _fields[i].Analyze2(Convert.ToString(data.Tables[0].Rows[0][i])); + } + } + count++; + } + + if (_fields != null) + { + foreach (AnalyzedField field in _fields) + { + field.CompletePass2(); + } + } + + return count; + } + + private void GenerateFields(DataColumnCollection columns) + { + _fields = new AnalyzedField[columns.Count]; + for (int i = 0; i < _fields.Length; i++) + { + _fields[i] = new AnalyzedField(_script, "field:" + (i + 1)); + } + } + } +} \ No newline at end of file diff --git a/EncogExtensions/Properties/AssemblyInfo.cs b/EncogExtensions/Properties/AssemblyInfo.cs new file mode 100644 index 00000000..1c157265 --- /dev/null +++ b/EncogExtensions/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("EncogExtensions")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("EncogExtensions")] +[assembly: AssemblyCopyright("Copyright © 2016")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("2c10eb93-d43a-4ce8-b42e-d4eaaacd2e67")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/EncogExtensions/encog-extensions.csproj b/EncogExtensions/encog-extensions.csproj new file mode 100644 index 00000000..9863a79f --- /dev/null +++ b/EncogExtensions/encog-extensions.csproj @@ -0,0 +1,69 @@ + + + + + Debug + AnyCPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67} + Library + Properties + EncogExtensions + encog-extensions + v4.5 + 512 + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + + + + + + {ac6fadf9-0904-4ebd-b22c-1c787c7e7a95} + encog-core-cs + + + + + \ No newline at end of file diff --git a/EncogExtensions/readme.txt b/EncogExtensions/readme.txt new file mode 100644 index 00000000..e4196b36 --- /dev/null +++ b/EncogExtensions/readme.txt @@ -0,0 +1,22 @@ +Encog Extensions Library + +Personally I found myself in a situation where I needed to use normalization with randomized +in memory data. The data could not be saved to a CSV as this would conflict with another frameworks +persistence mechanism. + +The encog analyst is fantastic for normalizing data. It can take information stored in a CSV file +and automatically determine the normalized fields and their type of encoding +(including 1 of N equilateral encoding). + +The only downside of this is that the logic is tightly coupled with the ReadCSV class. +Favouring extension as opposed to modification I decided to go about creating extension methods and +alternative classes to create an analyst that would normalize a generic .NET dataset. + +At the moment this logic is a partial re-write of the existing encog analyst. For this reason +much of the code is duplicated (which I appreciate is a bad thing). My intention is to eventually +bake the logic directly into encog-core-cs by modifying the existing implementation. + + + + + diff --git a/encog-core-cs.sln b/encog-core-cs.sln index e3a192d1..b22b96d2 100644 --- a/encog-core-cs.sln +++ b/encog-core-cs.sln @@ -1,6 +1,8 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.24720.0 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "encog-core-cs", "encog-core-cs\encog-core-cs.csproj", "{AC6FADF9-0904-4EBD-B22C-1C787C7E7A95}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleExamples", "ConsoleExamples\ConsoleExamples.csproj", "{A65A5878-6336-4ACF-9C40-540F8FAF2CC7}" @@ -14,10 +16,11 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution TraceAndTestImpact.testsettings = TraceAndTestImpact.testsettings EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "encog-extensions", "EncogExtensions\encog-extensions.csproj", "{2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "encog-extensions-test", "encog-extensions-test\encog-extensions-test.csproj", "{01E14205-D596-4580-ACD1-C23716D63516}" +EndProject Global - GlobalSection(TestCaseManagementSettings) = postSolution - CategoryFile = encog-core-cs.vsmdi - EndGlobalSection GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Debug|Mixed Platforms = Debug|Mixed Platforms @@ -57,8 +60,35 @@ Global {7E02C68C-3412-4C39-BF1E-ECA40CEBA6B8}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU {7E02C68C-3412-4C39-BF1E-ECA40CEBA6B8}.Release|Mixed Platforms.Build.0 = Release|Any CPU {7E02C68C-3412-4C39-BF1E-ECA40CEBA6B8}.Release|x86.ActiveCfg = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|x86.ActiveCfg = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Debug|x86.Build.0 = Debug|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|Any CPU.Build.0 = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|x86.ActiveCfg = Release|Any CPU + {2C10EB93-D43A-4CE8-B42E-D4EAAACD2E67}.Release|x86.Build.0 = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|Any CPU.Build.0 = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|x86.ActiveCfg = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Debug|x86.Build.0 = Debug|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|Any CPU.ActiveCfg = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|Any CPU.Build.0 = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|x86.ActiveCfg = Release|Any CPU + {01E14205-D596-4580-ACD1-C23716D63516}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(TestCaseManagementSettings) = postSolution + CategoryFile = encog-core-cs.vsmdi + EndGlobalSection EndGlobal diff --git a/encog-extensions-test/Properties/AssemblyInfo.cs b/encog-extensions-test/Properties/AssemblyInfo.cs new file mode 100644 index 00000000..b63d2e1b --- /dev/null +++ b/encog-extensions-test/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("encog-extensions-test")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("encog-extensions-test")] +[assembly: AssemblyCopyright("Copyright © 2016")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("01e14205-d596-4580-acd1-c23716d63516")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/encog-extensions-test/TestDataSetNormalization.cs b/encog-extensions-test/TestDataSetNormalization.cs new file mode 100644 index 00000000..c80b3365 --- /dev/null +++ b/encog-extensions-test/TestDataSetNormalization.cs @@ -0,0 +1,66 @@ +using System; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.Collections.Generic; +using Encog.ML.Data.Market; +using Encog.ML.Data.Market.Loader; +using Encog.App.Analyst; +using Encog.App.Analyst.Wizard; +using EncogExtensions.Normalization; +using System.Data; +using System.Linq; +using Encog.Util.Arrayutil; + +namespace encog_extensions_test +{ + [TestClass] + public class TestDataSetNormalization + { + public object ArrayUtil { get; private set; } + + [TestMethod] + public void Normalize_Some_In_Memory_Data() + { + List MarketData = new List(); + MarketData.AddRange(DownloadStockData("MSFT",TimeSpan.FromDays(10))); + MarketData.AddRange(DownloadStockData("AAPL", TimeSpan.FromDays(10))); + MarketData.AddRange(DownloadStockData("YHOO", TimeSpan.FromDays(10))); + + // Convert stock data to dataset using encog-extensions + DataSet dataSet = new DataSet().Convert(MarketData, "Market DataSet"); + + // use encog-extensions to normalize the dataset + var analyst = new EncogAnalyst(); + var wizard = new AnalystWizard(analyst); + wizard.Wizard(dataSet); + + // DataSet Goes In... 2D Double Array Comes Out... + var normalizer = new AnalystNormalizeDataSet(analyst); + var normalizedData = normalizer.Normalize(dataSet); + + // Assert data is not null and differs from original + Assert.IsNotNull(normalizedData); + Assert.AreNotEqual(normalizedData[0, 0], dataSet.Tables[0].Rows[0][0]); + + } + + private static List DownloadStockData(string stockTickerSymbol,TimeSpan timeSpan) + { + IList dataNeeded = new List(); + dataNeeded.Add(MarketDataType.AdjustedClose); + dataNeeded.Add(MarketDataType.Close); + dataNeeded.Add(MarketDataType.Open); + dataNeeded.Add(MarketDataType.High); + dataNeeded.Add(MarketDataType.Low); + dataNeeded.Add(MarketDataType.Volume); + + List MarketData = + new YahooFinanceLoader().Load( + new TickerSymbol(stockTickerSymbol), + dataNeeded, + DateTime.Now.Subtract(timeSpan), + DateTime.Now).ToList(); + + return MarketData; + } + } +} diff --git a/encog-extensions-test/encog-extensions-test.csproj b/encog-extensions-test/encog-extensions-test.csproj new file mode 100644 index 00000000..72ab661a --- /dev/null +++ b/encog-extensions-test/encog-extensions-test.csproj @@ -0,0 +1,94 @@ + + + + Debug + AnyCPU + {01E14205-D596-4580-ACD1-C23716D63516} + Library + Properties + encog_extensions_test + encog-extensions-test + v4.5.2 + 512 + {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + 10.0 + $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) + $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages + False + UnitTest + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + + + + {ac6fadf9-0904-4ebd-b22c-1c787c7e7a95} + encog-core-cs + + + {2c10eb93-d43a-4ce8-b42e-d4eaaacd2e67} + encog-extensions + + + + + + + False + + + False + + + False + + + False + + + + + + + + \ No newline at end of file