Merge pull request #200 from NLeSC/keras-to-tensorflow

Keras to tensorflow
NLeSC · Nov 5, 2019 · bd1cde6 · bd1cde6
2 parents 8518886 + c25fae6
commit bd1cde6
Show file tree

Hide file tree

Showing 13 changed files with 300 additions and 172 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Change Log
 
+## v2.0.0
+- Using Tensorflow.keras instead of Keras
+- Using keyword 'accuracy' instead of 'acc' in logs like latest keras versions do
+
 ## v1.0.5
 - Requirements change (keras<2.3.0)
 

diff --git a/environment.yml b/environment.yml
@@ -7,7 +7,4 @@ dependencies:
 - numpy
 - scikit-learn>=0.15.0
 - scipy>=0.11
-- six==1.10.0
-- Keras>=2.0.0
-- pandas>=0.17.1
-- tensorflow>=0.12.1
+- tensorflow>=1.0.0
diff --git a/html/js/plots.js b/html/js/plots.js
@@ -14,7 +14,8 @@ var isModelValid = function(model){
 	/// Returns true when a model is valid, and false otherwise. Checks can be added
 	/// later. They include at least checkin for the presence of NaN or null values 
 	/// in loss or accuracy arrays.
-    var floatArrayKeys = ["train_metric", "train_loss", "train_acc", "val_metric", "val_loss", "val_acc"];
+    var floatArrayKeys = ["train_metric", "train_loss", "train_acc", "train_accuracy",
+                          "val_metric", "val_loss", "val_acc", "val_accuracy"];
     for (var key of floatArrayKeys){
         if(key in model){
             var floatArray = model[key];

diff --git a/mcfly/_version.py b/mcfly/_version.py
@@ -15,4 +15,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = '1.0.5'
+__version__ = '2.0.0'
diff --git a/mcfly/find_architecture.py b/mcfly/find_architecture.py
@@ -27,14 +27,16 @@
  Example function calls can be found in the tutorial notebook
  (https://github.com/NLeSC/mcfly-tutorial)
 """
-import numpy as np
-from . import modelgen
-from sklearn import neighbors, metrics as sklearnmetrics
-import warnings
 import json
 import os
-from keras.callbacks import EarlyStopping
-from keras import metrics
+import warnings
+
+import numpy as np
+from sklearn import neighbors, metrics as sklearnmetrics
+from tensorflow.keras import metrics
+from tensorflow.keras.callbacks import EarlyStopping
+
+from . import modelgen
 
 
 def train_models_on_samples(X_train, y_train, X_val, y_val, models,
@@ -88,18 +90,18 @@ def train_models_on_samples(X_train, y_train, X_val, y_val, models,
     X_train_sub = X_train[:subset_size, :, :]
     y_train_sub = y_train[:subset_size, :]
 
-    metric_name = get_metric_name(metric)
+    metric_name = _get_metric_name(metric)
 
     histories = []
     val_metrics = []
     val_losses = []
     for i, (model, params, model_types) in enumerate(models):
         if verbose:
             print('Training model %d' % i, model_types)
-        model_metrics = [get_metric_name(name) for name in model.metrics]
+        model_metrics = [_get_metric_name(metric.name) for metric in model.metrics]
         if metric_name not in model_metrics:
-            raise ValueError(
-                'Invalid metric. The model was not compiled with {} as metric'.format(metric_name))
+            raise ValueError('Invalid metric: "{}" is not among the metrics the models was compiled with ({}).'
+                             .format(metric_name, model_metrics))
         if early_stopping:
             callbacks = [
                 EarlyStopping(monitor='val_loss', patience=0, verbose=verbose, mode='auto')]
@@ -113,18 +115,38 @@ def train_models_on_samples(X_train, y_train, X_val, y_val, models,
                             callbacks=callbacks)
         histories.append(history)
 
-        val_metrics.append(history.history['val_' + metric_name][-1])
-        val_losses.append(history.history['val_loss'][-1])
+        val_metrics.append(_get_from_history('val_' + metric_name, history.history)[-1])
+        val_losses.append(_get_from_history('val_loss', history.history)[-1])
         if outputfile is not None:
-            store_train_hist_as_json(params, model_types,
-                                     history.history, outputfile)
+            store_train_hist_as_json(params, model_types, history.history, outputfile)
         if model_path is not None:
-                model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))
+            model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))
 
     return histories, val_metrics, val_losses
 
 
-def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='acc'):
+def _get_from_history(metric_name, history_history):
+    """Gets the metric from the history object. Tries to solve inconsistencies in abbreviation of accuracy between
+    Tensorflow/Keras versions. """
+    if metric_name == 'val_accuracy':
+        return _get_either_from_history('val_accuracy', 'val_acc', history_history)
+    elif metric_name == 'accuracy':
+        return _get_either_from_history('accuracy', 'acc', history_history)
+    else:
+        return history_history[metric_name]
+
+
+def _get_either_from_history(option1, option2, history_history):
+    try:
+        return history_history[option1]
+    except KeyError:
+        try:
+            return history_history[option2]
+        except KeyError:
+            raise KeyError('No {} or {} in history.'.format(option1, option2))
+
+
+def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='accuracy'):
     """
     This function stores the model parameters, the loss and accuracy history
     of one model in a JSON file. It appends the model information to the
@@ -144,15 +166,15 @@ def store_train_hist_as_json(params, model_type, history, outputfile, metric_nam
         name of metric from history to store
     """
     jsondata = params.copy()
-    for k in jsondata.keys():
-        if isinstance(jsondata[k], np.ndarray):
-            jsondata[k] = jsondata[k].tolist()
-    jsondata['train_metric'] = history[metric_name]
-    jsondata['train_loss'] = history['loss']
-    jsondata['val_metric'] = history['val_' + metric_name]
-    jsondata['val_loss'] = history['val_loss']
+    jsondata['train_metric'] = _get_from_history(metric_name, history)
+    jsondata['train_loss'] = _get_from_history('loss', history)
+    jsondata['val_metric'] = _get_from_history('val_' + metric_name, history)
+    jsondata['val_loss'] = _get_from_history('val_loss', history)
     jsondata['modeltype'] = model_type
     jsondata['metric'] = metric_name
+    for k in jsondata.keys():
+        if isinstance(jsondata[k], np.ndarray) or isinstance(jsondata[k], list):
+            jsondata[k] = [_cast_to_primitive_type(element) for element in jsondata[k]]
     if os.path.isfile(outputfile):
         with open(outputfile, 'r') as outfile:
             previousdata = json.load(outfile)
@@ -164,6 +186,15 @@ def store_train_hist_as_json(params, model_type, history, outputfile, metric_nam
                   indent=4, ensure_ascii=False)
 
 
+def _cast_to_primitive_type(obj):
+    if isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    else:
+        return obj
+
+
 def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
                            number_of_models=5, nr_epochs=5, subset_size=100,
                            outputpath=None, model_path=None, metric='accuracy',
@@ -252,7 +283,7 @@ def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
     return best_model, best_params, best_model_type, knn_acc
 
 
-def get_metric_name(name):
+def _get_metric_name(name):
     """
     Gives the keras name for a metric
 
@@ -265,7 +296,7 @@ def get_metric_name(name):
 
     """
     if name == 'acc' or name == 'accuracy':
-        return 'acc'
+        return 'accuracy'
     try:
         metric_fn = metrics.get(name)
         return metric_fn.__name__

diff --git a/mcfly/modelgen.py b/mcfly/modelgen.py
@@ -16,12 +16,12 @@
 # limitations under the License.
 #
 
-from keras.models import Sequential
-from keras.layers import Dense, Activation, Convolution1D, Lambda, \
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Activation, Convolution1D, Lambda, \
     Convolution2D, Flatten, \
     Reshape, LSTM, Dropout, TimeDistributed, BatchNormalization
-from keras.regularizers import l2
-from keras.optimizers import Adam
+from tensorflow.keras.regularizers import l2
+from tensorflow.keras.optimizers import Adam
 import numpy as np
 
 

diff --git a/mcfly/storage.py b/mcfly/storage.py
@@ -23,7 +23,7 @@
  implemented our own functions until the keras functionality has matured.
  Example function calls in 'Tutorial mcfly on PAMAP2.ipynb'
 """
-from keras.models import model_from_json
+from tensorflow.keras.models import model_from_json
 import json
 import numpy as np
 import os
@@ -48,16 +48,22 @@ def savemodel(model, filepath, modelname):
     numpy_path : str
         Path to npy file with weights
     """
-    json_string = model.to_json()  # save architecture to json string
+    json_path = _save_architecture_and_return_path(filepath, model, modelname)
+    numpy_path = _save_weights_and_return_path(filepath, model, modelname)
+    return json_path, numpy_path
+
+
+def _save_weights_and_return_path(filepath, model, modelname):
+    numpy_path = os.path.join(filepath, modelname + '_weights')
+    np.save(numpy_path, model.get_weights())
+    return numpy_path
+
+
+def _save_architecture_and_return_path(filepath, model, modelname):
     json_path = os.path.join(filepath, modelname + '_architecture.json')
     with open(json_path, 'w') as outfile:
-        json.dump(json_string, outfile, sort_keys=True, indent=4,
-                  ensure_ascii=False)
-    wweights = model.get_weights()  # get weight from model
-    numpy_path = os.path.join(filepath, modelname + '_weights')
-    np.save(numpy_path,
-            wweights)  # save weights in npy file
-    return json_path, numpy_path
+        json.dump(model.to_json(), outfile, sort_keys=True, indent=4, ensure_ascii=False)
+    return json_path
 
 
 def loadmodel(filepath, modelname):
@@ -72,18 +78,15 @@ def loadmodel(filepath, modelname):
 
     Returns
     ----------
-    model_repro : Keras object
+    model : Keras object
         reproduced model
     """
     with open(os.path.join(filepath, modelname + '_architecture.json'), 'r') as outfile:
-        json_string_loaded = json.load(outfile)
-    model_repro = model_from_json(json_string_loaded)
-    # wweights2 = model_repro.get_weights()
-    #  extracting the weights would give us the untrained/default weights
-    wweights_recovered = np.load(
-        os.path.join(filepath, modelname + '_weights.npy'))  # load the original weights
-    model_repro.set_weights(wweights_recovered)  # now set the weights
-    return model_repro
+        loaded_json = json.load(outfile)
+    model = model_from_json(loaded_json)
+    weights_path = os.path.join(filepath, modelname + '_weights.npy')
+    model.set_weights(np.load(weights_path, allow_pickle=True))
+    return model
 
 # If we would use standard Keras function, which stores model and weights
 # in HDF5 format it would look like code below. However, we did not use this

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,5 @@
 numpy
 scikit-learn>=0.15.0
 scipy>=0.11
-six>=1.10.0
-Keras>=2.0.0,<2.3.0
-tensorflow>=0.12.1
+tensorflow>=1.0.0
 h5py