Merge pull request Trusted-AI#990 from shashankkotyan/master

beat-buesser · web-flow · commit 910448647b0a · 2021-06-11T13:58:32.000+01:00
Fixes Bug in Pixel Attack and Threshold Attack.
diff --git a/art/attacks/evasion/pixel_threshold.py b/art/attacks/evasion/pixel_threshold.py
@@ -30,7 +30,7 @@
 
 import logging
 from itertools import product
-from typing import List, Optional, Tuple, Union, TYPE_CHECKING
+from typing import List, Optional, Tuple, TYPE_CHECKING
 
 import numpy as np
 
@@ -39,6 +39,7 @@
 # from scipy.optimize import differential_evolution
 # In the meantime, the modified implementation is used which is defined in the
 # lines `453-1457`.
+# Otherwise may use Tensorflow's implementation of DE.
 
 from six import string_types
 from scipy._lib._util import check_random_state
@@ -60,31 +61,32 @@
 class PixelThreshold(EvasionAttack):
     """
     These attacks were originally implemented by Vargas et al. (2019) & Su et al.(2019).
-
     | One Pixel Attack Paper link:
         https://ieeexplore.ieee.org/abstract/document/8601309/citations#citations
         (arXiv link: https://arxiv.org/pdf/1710.08864.pdf)
     | Pixel and Threshold Attack Paper link:
         https://arxiv.org/abs/1906.06026
     """
 
-    attack_params = EvasionAttack.attack_params + ["th", "es", "targeted", "verbose"]
+    attack_params = EvasionAttack.attack_params + ["th", "es", "max_iter", "targeted", "verbose", "verbose_es"]
     _estimator_requirements = (BaseEstimator, NeuralNetworkMixin, ClassifierMixin)
 
     def __init__(
         self,
         classifier: "CLASSIFIER_NEURALNETWORK_TYPE",
-        th: Optional[int],
-        es: int,
-        targeted: bool,
+        th: Optional[int] = None,
+        es: int = 0,
+        max_iter: int = 100,
+        targeted: bool = False,
         verbose: bool = True,
+        verbose_es: bool = False,
     ) -> None:
         """
         Create a :class:`.PixelThreshold` instance.
-
         :param classifier: A trained classifier.
         :param th: threshold value of the Pixel/ Threshold attack. th=None indicates finding a minimum threshold.
         :param es: Indicates whether the attack uses CMAES (0) or DE (1) as Evolutionary Strategy.
+        :param max_iter: Sets the Maximum iterations to run the Evolutionary Strategies for optimisation.
         :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
         :param verbose: Print verbose messages of ES and show progress bars.
         """
@@ -94,8 +96,10 @@ def __init__(
         self.type_attack = -1
         self.th = th  # pylint: disable=C0103
         self.es = es  # pylint: disable=C0103
+        self.max_iter = max_iter
         self._targeted = targeted
         self.verbose = verbose
+        self.verbose_es = verbose_es
         PixelThreshold._check_params(self)
 
         if self.estimator.channels_first:
@@ -121,15 +125,20 @@ def _check_params(self) -> None:
         if not isinstance(self.verbose, bool):
             raise ValueError("The flag `verbose` has to be of type bool.")
 
-        if not isinstance(self.verbose, bool):
+        if not isinstance(self.verbose_es, bool):
             raise ValueError("The argument `verbose` has to be of type bool.")
+        if self.estimator.clip_values is None:
+            raise ValueError("This attack requires estimator clip values to be defined.")
 
-    def generate(  # pylint: disable=W0221
-        self, x: np.ndarray, y: Optional[np.ndarray] = None, max_iter: int = 100, **kwargs
-    ) -> np.ndarray:
+    def rescale_input(self, x):
+        """Rescale inputs"""
+        x = x.astype(np.float32) / 255.0
+        x = (x * (self.estimator.clip_values[1] - self.estimator.clip_values[0])) + self.estimator.clip_values[0]
+        return x
+
+    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
         """
         Generate adversarial samples and return them in an array.
-
         :param x: An array with the original inputs.
         :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                   (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
@@ -149,47 +158,70 @@ def generate(  # pylint: disable=W0221
                 y = np.argmax(y, axis=1)
 
         if self.th is None:
-            logger.info("Performing minimal perturbation Attack.")
+            logger.info(
+                "Performing minimal perturbation Attack. \
+                This takes substainally long time to process. \
+                For sanity check, pass th=10 to the Attack instance."
+            )
 
-        scale_input = bool(np.max(x) <= 1)
+        # NOTE: Pixel and Threshold Attacks are well defined for unprocessed images where the pixel values are,
+        #       8-Bit color i.e., the pixel values are np.uint8 in range [0, 255].
 
-        if scale_input:
+        # TO-DO: Better checking of input image.
+        #        All other cases not tested needs the images to be rescaled to [0, 255].
+        if self.estimator.clip_values[1] != 255.0:
+            self.rescale = True
+            x = (x - self.estimator.clip_values[0]) / (self.estimator.clip_values[1] - self.estimator.clip_values[0])
             x = x * 255.0
 
+        x = x.astype(np.uint8)
+
         adv_x_best = []
+        self.adv_th = []
         for image, target_class in tqdm(zip(x, y), desc="Pixel threshold", disable=not self.verbose):
+
             if self.th is None:
-                self.min_th = 127
+
+                min_th = -1
                 start, end = 1, 127
+
+                image_result = image
+
                 while True:
-                    image_result: Union[List[np.ndarray], np.ndarray] = []
+
                     threshold = (start + end) // 2
-                    success, trial_image_result = self._attack(image, target_class, threshold, max_iter)
-                    if image_result or success:
-                        image_result = trial_image_result
+                    success, trial_image_result = self._attack(image, target_class, threshold)
+
                     if success:
+                        image_result = trial_image_result
                         end = threshold - 1
+                        min_th = threshold
                     else:
                         start = threshold + 1
-                    if success:
-                        self.min_th = threshold
+
                     if end < start:
-                        if isinstance(image_result, list) and not image_result:
-                            # success = False
-                            image_result = image
                         break
+
+                self.adv_th = [min_th]
+
             else:
-                success, image_result = self._attack(image, target_class, self.th, max_iter)
+
+                success, image_result = self._attack(image, target_class, self.th)
+
+                if not success:
+                    image_result = image
+
             adv_x_best += [image_result]
 
         adv_x_best_array = np.array(adv_x_best)
 
-        if scale_input:
-            adv_x_best_array = adv_x_best_array / 255.0
-
         if y is not None:
             y = to_categorical(y, self.estimator.nb_classes)
 
+        if self.rescale:
+            x = self.rescale_input(x)
+            adv_x_best_array = self.rescale_input(adv_x_best_array)
+
         logger.info(
             "Success rate of Attack: %.2f%%",
             100 * compute_success(self.estimator, x, y, adv_x_best_array, self.targeted, 1),
@@ -230,37 +262,45 @@ def _attack_success(self, adv_x, x, target_class):
         """
         Checks whether the given perturbation `adv_x` for the image `img` is successful.
         """
-        predicted_class = np.argmax(self.estimator.predict(self._perturb_image(adv_x, x))[0])
+        adv = self._perturb_image(adv_x, x)
+
+        if self.rescale:
+            adv = self.rescale_input(adv)
+
+        predicted_class = np.argmax(self.estimator.predict(adv)[0])
         return bool(
             (self.targeted and predicted_class == target_class)
             or (not self.targeted and predicted_class != target_class)
         )
 
-    def _attack(
-        self, image: np.ndarray, target_class: np.ndarray, limit: int, max_iter: int
-    ) -> Tuple[bool, np.ndarray]:
+    def _attack(self, image: np.ndarray, target_class: np.ndarray, limit: int) -> Tuple[bool, np.ndarray]:
         """
         Attack the given image `image` with the threshold `limit` for the `target_class` which is true label for
         untargeted attack and targeted label for targeted attack.
         """
         bounds, initial = self._get_bounds(image, limit)
 
         def predict_fn(x):
-            predictions = self.estimator.predict(self._perturb_image(x, image))[:, target_class]
+            adv = self._perturb_image(x, image)
+
+            if self.rescale:
+                adv = self.rescale_input(adv)
+
+            predictions = self.estimator.predict(adv)[:, target_class]
             return predictions if not self.targeted else 1 - predictions
 
         def callback_fn(x, convergence=None):  # pylint: disable=R1710,W0613
             if self.es == 0:
                 if self._attack_success(x.result[0], image, target_class):
-                    raise Exception("Attack Completed :) Earlier than expected")
+                    raise CMAEarlyStoppingException("Attack Completed :) Earlier than expected")
             else:
                 return self._attack_success(x, image, target_class)
 
         if self.es == 0:
             from cma import CMAOptions
 
             opts = CMAOptions()
-            if not self.verbose:
+            if not self.verbose_es:
                 opts.set("verbose", -9)
                 opts.set("verb_disp", 40000)
                 opts.set("verb_log", 40000)
@@ -282,18 +322,19 @@ def callback_fn(x, convergence=None):  # pylint: disable=R1710,W0613
                     predict_fn,
                     maxfun=max(1, 400 // len(bounds)) * len(bounds) * 100,
                     callback=callback_fn,
-                    iterations=max_iter,
+                    iterations=self.max_iter,
                 )
-            except Exception as exception:  # pylint: disable=W0703
-                logger.info(exception)
+            except CMAEarlyStoppingException as err:
+                if self.verbose_es:
+                    logger.info(err)
 
             adv_x = strategy.result[0]
         else:
             strategy = differential_evolution(
                 predict_fn,
                 bounds,
-                disp=self.verbose,
-                maxiter=max_iter,
+                disp=self.verbose_es,
+                maxiter=self.max_iter,
                 popsize=max(1, 400 // len(bounds)),
                 recombination=1,
                 atol=-1,
@@ -312,7 +353,6 @@ class PixelAttack(PixelThreshold):
     """
     This attack was originally implemented by Vargas et al. (2019). It is generalisation of One Pixel Attack originally
     implemented by Su et al. (2019).
-
     | One Pixel Attack Paper link:
         https://ieeexplore.ieee.org/abstract/document/8601309/citations#citations
         (arXiv link: https://arxiv.org/pdf/1710.08864.pdf)
@@ -324,20 +364,21 @@ def __init__(
         self,
         classifier: "CLASSIFIER_NEURALNETWORK_TYPE",
         th: Optional[int] = None,
-        es: int = 0,
+        es: int = 1,
+        max_iter: int = 100,
         targeted: bool = False,
         verbose: bool = False,
     ) -> None:
         """
         Create a :class:`.PixelAttack` instance.
-
         :param classifier: A trained classifier.
         :param th: threshold value of the Pixel/ Threshold attack. th=None indicates finding a minimum threshold.
         :param es: Indicates whether the attack uses CMAES (0) or DE (1) as Evolutionary Strategy.
+        :param max_iter: Sets the Maximum iterations to run the Evolutionary Strategies for optimisation.
         :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
         :param verbose: Indicates whether to print verbose messages of ES used.
         """
-        super().__init__(classifier, th, es, targeted, verbose)
+        super().__init__(classifier, th, es, max_iter, targeted, verbose)
         self.type_attack = 0
 
     def _perturb_image(self, x: np.ndarray, img: np.ndarray) -> np.ndarray:
@@ -395,7 +436,6 @@ def _get_bounds(self, img: np.ndarray, limit) -> Tuple[List[list], list]:
 class ThresholdAttack(PixelThreshold):
     """
     This attack was originally implemented by Vargas et al. (2019).
-
     | Paper link:
         https://arxiv.org/abs/1906.06026
     """
@@ -405,19 +445,20 @@ def __init__(
         classifier: "CLASSIFIER_NEURALNETWORK_TYPE",
         th: Optional[int] = None,
         es: int = 0,
+        max_iter: int = 100,
         targeted: bool = False,
         verbose: bool = False,
     ) -> None:
         """
         Create a :class:`.PixelThreshold` instance.
-
         :param classifier: A trained classifier.
         :param th: threshold value of the Pixel/ Threshold attack. th=None indicates finding a minimum threshold.
         :param es: Indicates whether the attack uses CMAES (0) or DE (1) as Evolutionary Strategy.
+        :param max_iter: Sets the Maximum iterations to run the Evolutionary Strategies for optimisation.
         :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
         :param verbose: Indicates whether to print verbose messages of ES used.
         """
-        super().__init__(classifier, th, es, targeted, verbose)
+        super().__init__(classifier, th, es, max_iter, targeted, verbose)
         self.type_attack = 1
 
     def _perturb_image(self, x: np.ndarray, img: np.ndarray) -> np.ndarray:
@@ -440,6 +481,12 @@ def _perturb_image(self, x: np.ndarray, img: np.ndarray) -> np.ndarray:
         return imgs
 
 
+class CMAEarlyStoppingException(Exception):
+    """Raised when CMA is stopping early after successful optimisation."""
+
+    pass
+
+
 # TODO: Make the attack compatible with current version of SciPy Optimize
 # Differential Evolution
 # pylint: disable=W0105
@@ -448,9 +495,7 @@ def _perturb_image(self, x: np.ndarray, img: np.ndarray) -> np.ndarray:
 To speed up predictions, the entire parameters array is passed to `self.func`,
 where a neural network model can batch its computations and execute in parallel
 Search for `CHANGES` to find all code changes.
-
 Dan Kondratyuk 2018
-
 Original code adapted from
 https://github.com/scipy/scipy/blob/70e61dee181de23fdd8d893eaa9491100e2218d7/scipy/optimize/_differentialevolution.py
 ----------
diff --git a/tests/attacks/test_pixel_attack.py b/tests/attacks/test_pixel_attack.py
@@ -136,8 +136,8 @@ def _test_attack(self, classifier, x_test, y_test, targeted):
             targets = y_test
 
         for es in [1]:  # Option 0 is not easy to reproduce reliably, we should consider it at a later time
-            df = PixelAttack(classifier, th=64, es=es, targeted=targeted, verbose=False)
-            x_test_adv = df.generate(x_test_original, targets, max_iter=10)
+            df = PixelAttack(classifier, th=64, es=es, max_iter=10, targeted=targeted, verbose=False)
+            x_test_adv = df.generate(x_test_original, targets)
 
             np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, x_test, x_test_adv)
             self.assertFalse((0.0 == x_test_adv).all())
diff --git a/tests/attacks/test_threshold_attack.py b/tests/attacks/test_threshold_attack.py
@@ -127,8 +127,8 @@ def _test_attack(self, classifier, x_test, y_test, targeted):
             targets = y_test
 
         for es in [1]:  # Option 0 is not easy to reproduce reliably, we should consider it at a later time
-            df = ThresholdAttack(classifier, th=128, es=es, targeted=targeted, verbose=False)
-            x_test_adv = df.generate(x_test_original, targets, max_iter=10)
+            df = ThresholdAttack(classifier, th=128, es=es, max_iter=10, targeted=targeted, verbose=False)
+            x_test_adv = df.generate(x_test_original, targets)
 
             np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, x_test, x_test_adv)
             self.assertFalse((0.0 == x_test_adv).all())