Fix OOM when adversarial testing

nitbix · Feb 17, 2021 · 48aec85 · 48aec85
1 parent 35b1545
commit 48aec85
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 62 deletions.
diff --git a/bin/base_model.py b/bin/base_model.py
@@ -15,7 +15,7 @@
 import logging
 import toupee as tp
 
-def main(args=None):
+def main(args=None, params=None):
     """ Train a base model as specified """
     parser = argparse.ArgumentParser(description='Train a single Base Model')
     parser.add_argument('params_file', help='the parameters file')
@@ -32,7 +32,10 @@ def main(args=None):
     args = parser.parse_args(args)
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
     logging.info(("using toupee version {0}".format(tp.version)))
-    params = tp.config.load_parameters(args.params_file)
+    if not params:
+        params = tp.config.load_parameters(args.params_file)
+    if args.epochs:
+        params.epochs = args.epochs
     if args.wandb:
         import wandb
         wandb_project = args.wandb_project or f"toupee-{params.dataset}-base_model"

diff --git a/bin/ensemble.py b/bin/ensemble.py
@@ -37,6 +37,8 @@ def main(args=None, params=None):
     logging.info(("using toupee version {0}".format(tp.version)))
     if not params:
         params = tp.config.load_parameters(args.params_file)
+    if args.epochs:
+        params.epochs = args.epochs
     data = tp.data.Dataset(src_dir=params.dataset, **params.__dict__)
     wandb_params = None
     if args.wandb:

diff --git a/toupee/adversarial.py b/toupee/adversarial.py
@@ -16,11 +16,12 @@
 
 
 def FGSM(model: toupee.model.Model, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
+    # WARNING: this treats everything as a single batch!
     with tf.GradientTape() as tape:
-        X = tf.cast(X, tf.float32)
+        X = tf.convert_to_tensor(X)
         tape.watch(X)
         prediction = model._model(X)
         loss = model._loss(Y, prediction)
         gradient = tape.gradient(loss, X)
         signed_grad = tf.sign(gradient)
-        return signed_grad.numpy()
+    return signed_grad.numpy()
diff --git a/toupee/ensembles/methods.py b/toupee/ensembles/methods.py
@@ -153,34 +153,8 @@ def evaluate(self, test_data=None):
         """ Evaluate model on some test data """
         #TODO: update for different data formats
         test_data = test_data or self.data.data['test']
-        all_y_pred = []
-        all_y_true = []
-        all_y_pred_onehot = []
-        all_y_true_onehot = []
-        all_x = []
-        for (x, y_true) in test_data:
-            all_x.append(x)
-            all_y_pred.append(self.predict_classes(x))
-            all_y_true.append(np.argmax(y_true.numpy(), axis=1))
-            all_y_pred_onehot.append(self.predict_proba(x))
-            all_y_true_onehot.append(y_true.numpy())
-        x = np.concatenate(all_x)
-        y_pred = np.concatenate(all_y_pred)
-        y_true = np.concatenate(all_y_true)
-        y_pred_onehot = np.concatenate(all_y_pred_onehot)
-        y_true_onehot = np.concatenate(all_y_true_onehot)
-        scores = tp.utils.eval_scores(y_true=y_true, y_pred=y_pred, y_true_onehot=y_true_onehot, y_pred_onehot=y_pred_onehot)
-        if self.adversarial_testing:
-            adversarial_scores = {}
-            # convention: we take the adversarial perturbations from the first member
-            adversarial_perturbation = tp.adversarial.FGSM(self.members[0], x, y_true_onehot)
-            for epsilon in tp.ADVERSARIAL_EPSILONS:
-                adversarial_x = x + epsilon * adversarial_perturbation
-                y_adv = self.predict_classes(adversarial_x)
-                y_adv_onehot = self.predict_proba(adversarial_x)
-                adversarial_scores[epsilon] = tp.utils.eval_scores(y_true, y_adv, y_true_onehot, y_adv_onehot)
-            scores['adversarial'] = adversarial_scores
-        return scores
+        return tp.metrics.evaluate(self, test_data, self.members[0] if self.adversarial_testing else None)
+
 
 ### Ensemble Types / Class Templates ###
 

diff --git a/toupee/metrics.py b/toupee/metrics.py
@@ -12,7 +12,7 @@
 
 import numpy as np # type: ignore
 from sklearn.calibration import calibration_curve # type: ignore
-
+import toupee as tp
 
 def ece_binary(prob_true, prob_pred, bin_sizes):
     ece = 0
@@ -48,4 +48,39 @@ def calibration(y_true, y_pred, n_bins=10):
     return {'ece': np.mean(ece_bin),
             'mce': np.mean(mce_bin),
             'rmsce': np.mean(rmsce_bin)
-    }
+    }
+
+def evaluate(model, test_data, adversarial_gradient_source=None):
+    """ Evaluate model on some test data handle """
+    #TODO: update for different data formats
+    all_y_pred = []
+    all_y_true = []
+    all_y_pred_onehot = []
+    all_y_true_onehot = []
+    all_x = []
+    all_adversarial = []
+    for (x, y_true) in test_data:
+        all_x.append(x)
+        all_y_pred.append(model.predict_classes(x))
+        all_y_pred_onehot.append(model.predict_proba(x))
+        all_y_true.append(np.argmax(y_true.numpy(), axis=1))
+        all_y_true_onehot.append(y_true.numpy())
+        if adversarial_gradient_source:
+            all_adversarial.append(tp.adversarial.FGSM(adversarial_gradient_source, x, y_true))
+    x = np.concatenate(all_x)
+    if adversarial_gradient_source:
+        adversarial_perturbation = np.concatenate(all_adversarial)
+    y_pred = np.concatenate(all_y_pred)
+    y_true = np.concatenate(all_y_true)
+    y_pred_onehot = np.concatenate(all_y_pred_onehot)
+    y_true_onehot = np.concatenate(all_y_true_onehot)
+    scores = tp.utils.eval_scores(y_true=y_true, y_pred=y_pred, y_true_onehot=y_true_onehot, y_pred_onehot=y_pred_onehot)
+    if adversarial_gradient_source:
+        adversarial_scores = {}
+        for epsilon in tp.ADVERSARIAL_EPSILONS:
+            adversarial_x = x + epsilon * adversarial_perturbation
+            y_adv = model.predict_classes(adversarial_x)
+            y_adv_onehot = model.predict_proba(adversarial_x)
+            adversarial_scores[epsilon] = tp.utils.eval_scores(y_true, y_adv, y_true_onehot, y_adv_onehot)
+        scores['adversarial'] = adversarial_scores
+    return scores
diff --git a/toupee/model.py b/toupee/model.py
@@ -182,34 +182,7 @@ def fit(self, data: tp.data.Dataset, epochs=None, verbose=None, log_wandb:bool=F
 
     def evaluate(self, test_data, adversarial:bool=False):
         """ Evaluate model on some test data handle """
-        #TODO: update for different data formats
-        all_y_pred = []
-        all_y_true = []
-        all_y_pred_onehot = []
-        all_y_true_onehot = []
-        all_x = []
-        for (x, y_true) in test_data:
-            all_x.append(x)
-            all_y_pred.append(self.predict_classes(x))
-            all_y_pred_onehot.append(self.predict_proba(x))
-            all_y_true.append(np.argmax(y_true.numpy(), axis=1))
-            all_y_true_onehot.append(y_true.numpy())
-        x = np.concatenate(all_x)
-        y_pred = np.concatenate(all_y_pred)
-        y_true = np.concatenate(all_y_true)
-        y_pred_onehot = np.concatenate(all_y_pred_onehot)
-        y_true_onehot = np.concatenate(all_y_true_onehot)
-        scores = tp.utils.eval_scores(y_true=y_true, y_pred=y_pred, y_true_onehot=y_true_onehot, y_pred_onehot=y_pred_onehot)
-        if adversarial:
-            adversarial_scores = {}
-            adversarial_perturbation = tp.adversarial.FGSM(self, x, y_true_onehot)
-            for epsilon in tp.ADVERSARIAL_EPSILONS:
-                adversarial_x = x + epsilon * adversarial_perturbation
-                y_adv = self.predict_classes(adversarial_x)
-                y_adv_onehot = self.predict_proba(adversarial_x)
-                adversarial_scores[epsilon] = tp.utils.eval_scores(y_true, y_adv, y_true_onehot, y_adv_onehot)
-            scores['adversarial'] = adversarial_scores
-        return scores
+        return tp.metrics.evaluate(self, test_data, adversarial_gradient_source=self if adversarial else None)
 
     def predict_proba(self, X):
         """ Output logits """