Merge branch 'master' of https://github.com/fchollet/keras

j-zarp · Oct 31, 2016 · cdab739 · cdab739
2 parents 4401120 + fee03bd
commit cdab739
Show file tree

Hide file tree

Showing 13 changed files with 120 additions and 37 deletions.
diff --git a/docs/autogen.py b/docs/autogen.py
@@ -85,7 +85,10 @@
 from keras import constraints
 from keras import activations
 from keras import regularizers
+from keras.utils import data_utils
 from keras.utils import io_utils
+from keras.utils import layer_utils
+from keras.utils import np_utils
 
 
 EXCLUDE = {
@@ -244,11 +247,27 @@
         'all_module_functions': [backend],
     },
     {
-        'page': 'io_utils.md',
+        'page': 'utils/data_utils.md',
+        'functions': [
+            data_utils.get_file,
+        ]
+    },
+    {
+        'page': 'utils/io_utils.md',
         'classes': [
             io_utils.HDF5Matrix
         ],
     },
+    {
+        'page': 'utils/layer_utils.md',
+        'functions': [
+            layer_utils.layer_from_config,
+        ]
+    },
+    {
+        'page': 'utils/np_utils.md',
+        'all_module_functions': [np_utils]
+    },
 ]
 
 ROOT = 'http://keras.io/'

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -51,8 +51,10 @@ pages:
 - Visualization: visualization.md
 - Scikit-learn API: scikit-learn-api.md
 - Utils:
-  - I/O Utils: io_utils.md
-
+  - Data Utils: utils/data_utils.md
+  - I/O Utils: utils/io_utils.md
+  - Layer Utils: utils/layer_utils.md
+  - Numpy Utils: utils/np_utils.md
 
 
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
@@ -437,10 +437,25 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
     use_cudnn = ndim < 5 and (dev.startswith('cuda') or dev.startswith('gpu'))
     if use_cudnn:
         try:
-            return theano.sandbox.cuda.dnn.dnn_batch_normalization_test(x, gamma, beta, mean, var,
-                                                                        'spatial', epsilon)
+            axis = mean.broadcastable.index(False)
+            if axis != 1:
+                shuffle_pattern = list(range(ndim))
+                shuffle_pattern[1] = shuffle_pattern[axis]
+                shuffle_pattern[axis] = 1
+                x = x.dimshuffle(shuffle_pattern)
+                mean = mean.dimshuffle(shuffle_pattern)
+                var = var.dimshuffle(shuffle_pattern)
+                beta = beta.dimshuffle(shuffle_pattern)
+                gamma = gamma.dimshuffle(shuffle_pattern)
+            normed = theano.sandbox.cuda.dnn.dnn_batch_normalization_test(x, gamma, beta, mean, var,
+                                                                          'spatial', epsilon)
+            if axis != 1:
+                normed = normed.dimshuffle(shuffle_pattern)
+            return normed
         except AttributeError:
             pass
+        except ValueError:
+            pass
     return T.nnet.bn.batch_normalization(x, gamma, beta, mean, sqrt(var + epsilon),
                                          mode='high_mem')
 

diff --git a/keras/callbacks.py b/keras/callbacks.py
@@ -314,6 +314,10 @@ class EarlyStopping(Callback):
 
     # Arguments
         monitor: quantity to be monitored.
+        min_delta: minimum change in the monitored quantity
+            to qualify as an improvement, i.e. an absolute
+            change of less than min_delta, will count as no
+            improvement.
         patience: number of epochs with no improvement
             after which training will be stopped.
         verbose: verbosity mode.
@@ -325,12 +329,13 @@ class EarlyStopping(Callback):
             mode, the direction is automatically inferred
             from the name of the monitored quantity.
     '''
-    def __init__(self, monitor='val_loss', patience=0, verbose=0, mode='auto'):
+    def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto'):
         super(EarlyStopping, self).__init__()
 
         self.monitor = monitor
         self.patience = patience
         self.verbose = verbose
+        self.min_delta = min_delta
         self.wait = 0
 
         if mode not in ['auto', 'min', 'max']:
@@ -349,6 +354,11 @@ def __init__(self, monitor='val_loss', patience=0, verbose=0, mode='auto'):
             else:
                 self.monitor_op = np.less
 
+        if self.monitor_op == np.greater:
+            self.min_delta *= 1
+        else:
+            self.min_delta *= -1
+
     def on_train_begin(self, logs={}):
         self.wait = 0       # Allow instances to be re-used
         self.best = np.Inf if self.monitor_op == np.less else -np.Inf
@@ -359,7 +369,7 @@ def on_epoch_end(self, epoch, logs={}):
             warnings.warn('Early stopping requires %s available!' %
                           (self.monitor), RuntimeWarning)
 
-        if self.monitor_op(current, self.best):
+        if self.monitor_op(current - self.min_delta, self.best):
             self.best = current
             self.wait = 0
         else:

diff --git a/keras/engine/training.py b/keras/engine/training.py
@@ -1025,7 +1025,7 @@ def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
                 on this data at the end of each epoch.
             validation_data: data on which to evaluate the loss and any model metrics
                 at the end of each epoch. The model will not be trained on this data.
-                This could be a tuple (x_val, y_val) or a tuple (val_x, val_y, val_sample_weights).
+                This could be a tuple (x_val, y_val) or a tuple (x_val, y_val, val_sample_weights).
             shuffle: boolean, whether to shuffle the training data before each epoch.
             class_weight: optional dictionary mapping class indices (integers) to
                 a weight (float) to apply to the model's loss for the samples

diff --git a/keras/models.py b/keras/models.py
@@ -8,7 +8,7 @@
 from . import backend as K
 from .utils.io_utils import ask_to_proceed_with_overwrite
 from .engine.training import Model
-from .engine.topology import get_source_inputs, Node
+from .engine.topology import get_source_inputs, Node, Layer
 from .optimizers import optimizer_from_config
 from .legacy.models import Graph
 
@@ -260,6 +260,10 @@ def add(self, layer):
         # Arguments
             layer: layer instance.
         '''
+        if not isinstance(layer, Layer):
+            raise ValueError('The added layer must be '
+                             'an instance of class Layer. '
+                             'Found: ' + str(layer))
         if not self.outputs:
             # first layer in model: check that it is an input layer
             if len(layer.inbound_nodes) == 0:
@@ -573,7 +577,8 @@ def fit(self, x, y, batch_size=32, nb_epoch=10, verbose=1, callbacks=[],
                 See [callbacks](/callbacks).
             validation_split: float (0. < x < 1).
                 Fraction of the data to use as held-out validation data.
-            validation_data: tuple (X, y) to be used as held-out
+            validation_data: tuple (x_val, y_val) or tuple
+                (x_val, y_val, val_sample_weights) to be used as held-out
                 validation data. Will override validation_split.
             shuffle: boolean or str (for 'batch').
                 Whether to shuffle the samples at each epoch.

diff --git a/keras/preprocessing/image.py b/keras/preprocessing/image.py
@@ -411,7 +411,7 @@ def fit(self, X,
 
         if self.zca_whitening:
             flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
-            sigma = np.dot(flatX.T, flatX) / flatX.shape[1]
+            sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
             U, S, V = linalg.svd(sigma)
             self.principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
 

diff --git a/keras/utils/data_utils.py b/keras/utils/data_utils.py
@@ -40,6 +40,20 @@ def chunk_read(response, chunk_size=8192, reporthook=None):
 
 def get_file(fname, origin, untar=False,
              md5_hash=None, cache_subdir='datasets'):
+    '''Downloads a file from a URL if it not already in the cache.
+
+    Passing the MD5 hash will verify the file after download as well as if it is already present in the cache.
+
+    # Arguments
+        fname: name of the file
+        origin: original URL of the file
+        untar: boolean, whether the file should be decompressed
+        md5_hash: MD5 hash of the file for verification
+        cache_subdir: directory being used as the cache
+
+    # Returns
+        Path to the downloaded file
+    '''
     datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
     if not os.access(datadir_base, os.W_OK):
         datadir_base = os.path.join('/tmp', '.keras')
@@ -110,6 +124,15 @@ def dl_progress(count, block_size, total_size):
 
 
 def validate_file(fpath, md5_hash):
+    '''Validates a file against a MD5 hash
+
+    # Arguments
+        fpath: path to the file being validated
+        md5_hash: the MD5 hash being validated against
+
+    # Returns
+        Whether the file is valid
+    '''
     hasher = hashlib.md5()
     with open(fpath, 'rb') as f:
         buf = f.read()

diff --git a/keras/utils/layer_utils.py b/keras/utils/layer_utils.py
@@ -37,8 +37,14 @@ def layer_from_config(config, custom_objects={}):
 
 
 def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33, .55, .67, 1.]):
-    # line_length: total length of printed lines
-    # positions: relative or absolute positions of log elements in each line
+    '''Prints a summary of a layer
+
+    # Arguments
+        layers: list of layers to print summaries of
+        relevant_nodes: list of relevant nodes
+        line_length: total length of printed lines
+        positions: relative or absolute positions of log elements in each line
+    '''
     if positions[-1] <= 1:
         positions = [int(line_length * p) for p in positions]
     # header names for the different log elements

diff --git a/keras/utils/np_utils.py b/keras/utils/np_utils.py
@@ -7,8 +7,14 @@
 
 
 def to_categorical(y, nb_classes=None):
-    '''Convert class vector (integers from 0 to nb_classes)
-    to binary class matrix, for use with categorical_crossentropy.
+    '''Convert class vector (integers from 0 to nb_classes) to binary class matrix, for use with categorical_crossentropy.
+
+    # Arguments
+        y: class vector to be converted into a matrix
+        nb_classes: total number of classes
+
+    # Returns
+        A binary matrix representation of the input.
     '''
     if not nb_classes:
         nb_classes = np.max(y)+1

diff --git a/keras/utils/test_utils.py b/keras/utils/test_utils.py
@@ -1,7 +1,7 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import inspect
-import functools
+import six
 
 from ..engine import Model, Input
 from ..models import Sequential, model_from_json
@@ -112,7 +112,7 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
 def keras_test(func):
     '''Clean up after tensorflow tests.
     '''
-    @functools.wraps(func)
+    @six.wraps(func)
     def wrapper(*args, **kwargs):
         output = func(*args, **kwargs)
         if K._BACKEND == 'tensorflow':

diff --git a/keras/utils/visualize_util.py b/keras/utils/visualize_util.py
@@ -1,5 +1,7 @@
 import os
 
+from ..layers.wrappers import Wrapper
+
 try:
     # pydot-ng is a fork of pydot that is better maintained
     import pydot_ng as pydot
@@ -23,17 +25,25 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
         model = model.model
     layers = model.layers
 
-    # first, populate the nodes of the graph
+    # Create graph nodes.
     for layer in layers:
         layer_id = str(id(layer))
+
+        # Append a wrapped layer's label to node's label, if it exists.
+        layer_name = layer.name
+        class_name = layer.__class__.__name__
+        if isinstance(layer, Wrapper):
+            layer_name = '{}({})'.format(layer_name, layer.layer.name)
+            class_name = '{}({})'.format(class_name, layer.layer.__class__.__name__)
+
+        # Create node's label.
         if show_layer_names:
-            label = str(layer.name) + ' (' + layer.__class__.__name__ + ')'
+            label = '{}: {}'.format(layer_name, class_name)
         else:
-            label = layer.__class__.__name__
+            label = class_name
 
+        # Rebuild the label as a table including input/output shapes.
         if show_shapes:
-            # Build the label that will actually contain a table with the
-            # input/output
             try:
                 outputlabels = str(layer.output_shape)
             except:
@@ -50,13 +60,12 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True):
         node = pydot.Node(layer_id, label=label)
         dot.add_node(node)
 
-    # second, add the edges
+    # Connect nodes with edges.
     for layer in layers:
         layer_id = str(id(layer))
         for i, node in enumerate(layer.inbound_nodes):
             node_key = layer.name + '_ib-' + str(i)
             if node_key in model.container_nodes:
-                # add edges
                 for inbound_layer in node.inbound_layers:
                     inbound_layer_id = str(id(inbound_layer))
                     layer_id = str(id(layer))

diff --git a/tests/keras/layers/test_recurrent.py b/tests/keras/layers/test_recurrent.py
@@ -1,5 +1,4 @@
 import pytest
-import sys
 import numpy as np
 from numpy.testing import assert_allclose
 
@@ -21,18 +20,7 @@ def rnn_test(f):
     All the recurrent layers share the same interface,
     so we can run through them with a single function.
     """
-    kf = keras_test(f)
-
-    def wrapped(layer_class):
-        return kf(layer_class)
-
-    # functools doesnt propagate arguments info for pytest correctly in 2.7
-    # and wrapped doesnt work with pytest in 3.4
-    if sys.version_info >= (3, 0):
-        f = kf
-    else:
-        f = wrapped
-
+    f = keras_test(f)
     return pytest.mark.parametrize("layer_class", [
         recurrent.SimpleRNN,
         recurrent.GRU,