diff --git a/.github/workflows/ci-tensorflow-v1.yml b/.github/workflows/ci-tensorflow-v1.yml index a66a082b14..ad287653a7 100644 --- a/.github/workflows/ci-tensorflow-v1.yml +++ b/.github/workflows/ci-tensorflow-v1.yml @@ -48,7 +48,7 @@ jobs: sudo apt-get update sudo apt-get -y -q install ffmpeg libavcodec-extra python -m pip install --upgrade pip setuptools wheel - pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^kornia/d;/^scikit-learn/d;/^pytest-mock/d;/^GPy/d;/^lief/d;/^statsmodels/d;/^ultralytics/d;/^ipython/d;/^numba/d;/^pytest/d;/^pylint/d;/^mypy/d;/^pycodestyle/d;/^black/d;/^types-PyYAML/d;/^types-setuptools/d' requirements_test.txt) + pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^kornia/d;/^scikit-learn/d;/^pytest-mock/d;/^GPy/d;/^lief/d;/^statsmodels/d;/^ultralytics/d;/^ipython/d;/^numba/d;/^pytest/d;/^pylint/d;/^mypy/d;/^pycodestyle/d;/^black/d;/^types-PyYAML/d;/^types-setuptools/d;/^requests/d' requirements_test.txt) pip install pandas==1.3.5 pip install scipy==1.7.2 pip install matplotlib==3.5.3 @@ -71,6 +71,7 @@ jobs: pip install numba==0.56.4 pip install pytest==7.4.4 pip install pytest-cov + pip install requests==2.31.0 pip list - name: Run Tests run: ./run_tests.sh ${{ matrix.framework }} diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml index bcd676b6b0..91337e7c18 100644 --- a/.github/workflows/dockerhub.yml +++ b/.github/workflows/dockerhub.yml @@ -30,7 +30,7 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 + uses: docker/metadata-action@369eb591f429131d6889c46b94e711f089e6ca96 with: images: adversarialrobustnesstoolbox/releases tags: | @@ -38,7 +38,7 @@ jobs: type=semver,pattern={{version}} - name: Build and push Docker image - uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 + uses: docker/build-push-action@48aba3b46d1b1fec4febb7c5d0c644b249a11355 with: context: . push: true diff --git a/art/attacks/evasion/__init__.py b/art/attacks/evasion/__init__.py index dbee974ab0..d39c645285 100644 --- a/art/attacks/evasion/__init__.py +++ b/art/attacks/evasion/__init__.py @@ -14,6 +14,7 @@ from art.attacks.evasion.auto_attack import AutoAttack from art.attacks.evasion.auto_projected_gradient_descent import AutoProjectedGradientDescent from art.attacks.evasion.auto_conjugate_gradient import AutoConjugateGradient +from art.attacks.evasion.rescaling_auto_conjugate_gradient import RescalingAutoConjugateGradient if importlib.util.find_spec("numba") is not None: from art.attacks.evasion.brendel_bethge import BrendelBethgeAttack @@ -62,6 +63,7 @@ from art.attacks.evasion.shapeshifter import ShapeShifter from art.attacks.evasion.simba import SimBA from art.attacks.evasion.spatial_transformation import SpatialTransformation +from art.attacks.evasion.steal_now_attack_later.steal_now_attack_later import SNAL from art.attacks.evasion.square_attack import SquareAttack from art.attacks.evasion.pixel_threshold import ThresholdAttack from art.attacks.evasion.universal_perturbation import UniversalPerturbation diff --git a/art/attacks/evasion/auto_attack.py b/art/attacks/evasion/auto_attack.py index 01a4046ec7..a148e43b20 100644 --- a/art/attacks/evasion/auto_attack.py +++ b/art/attacks/evasion/auto_attack.py @@ -78,7 +78,7 @@ def __init__( batch_size: int = 32, estimator_orig: "CLASSIFIER_TYPE" | None = None, targeted: bool = False, - parallel: bool = False, + parallel_pool_size: int = 0, ): """ Create a :class:`.AutoAttack` instance. @@ -93,7 +93,8 @@ def __init__( :param estimator_orig: Original estimator to be attacked by adversarial examples. :param targeted: If False run only untargeted attacks, if True also run targeted attacks against each possible target. - :param parallel: If True run attacks in parallel. + :param parallel_pool_size: Number of parallel threads / pool size in multiprocessing. If parallel_pool_size=0 + computation runs without multiprocessing. """ super().__init__(estimator=estimator) @@ -151,7 +152,7 @@ def __init__( self.estimator_orig = estimator self._targeted = targeted - self.parallel = parallel + self.parallel_pool_size = parallel_pool_size self.best_attacks: np.ndarray = np.array([]) self._check_params() @@ -199,7 +200,7 @@ def generate(self, x: np.ndarray, y: np.ndarray | None = None, **kwargs) -> np.n if attack.targeted: attack.set_params(targeted=False) - if self.parallel: + if self.parallel_pool_size > 0: args.append( ( deepcopy(x_adv), @@ -253,7 +254,7 @@ def generate(self, x: np.ndarray, y: np.ndarray | None = None, **kwargs) -> np.n targeted_labels[:, i], nb_classes=self.estimator.nb_classes ) - if self.parallel: + if self.parallel_pool_size > 0: args.append( ( deepcopy(x_adv), @@ -287,8 +288,8 @@ def generate(self, x: np.ndarray, y: np.ndarray | None = None, **kwargs) -> np.n except ValueError as error: logger.warning("Error completing attack: %s}", str(error)) - if self.parallel: - with multiprocess.get_context("spawn").Pool() as pool: + if self.parallel_pool_size > 0: + with multiprocess.get_context("spawn").Pool(processes=self.parallel_pool_size) as pool: # Results come back in the order that they were issued results = pool.starmap(run_attack, args) perturbations = [] @@ -320,7 +321,7 @@ def __repr__(self) -> str: This method returns a summary of the best performing (lowest perturbation in the parallel case) attacks per image passed to the AutoAttack class. """ - if self.parallel: + if self.parallel_pool_size > 0: best_attack_meta = "\n".join( [ f"image {i+1}: {str(self.args[idx][3])}" if idx != 0 else f"image {i+1}: n/a" @@ -328,7 +329,8 @@ def __repr__(self) -> str: ] ) auto_attack_meta = ( - f"AutoAttack(targeted={self.targeted}, parallel={self.parallel}, num_attacks={len(self.args)})" + f"AutoAttack(targeted={self.targeted}, parallel_pool_size={self.parallel_pool_size}, " + + "num_attacks={len(self.args)})" ) return f"{auto_attack_meta}\nBestAttacks:\n{best_attack_meta}" @@ -339,7 +341,8 @@ def __repr__(self) -> str: ] ) auto_attack_meta = ( - f"AutoAttack(targeted={self.targeted}, parallel={self.parallel}, num_attacks={len(self.attacks)})" + f"AutoAttack(targeted={self.targeted}, parallel_pool_size={self.parallel_pool_size}, " + + "num_attacks={len(self.attacks)})" ) return f"{auto_attack_meta}\nBestAttacks:\n{best_attack_meta}" diff --git a/art/attacks/evasion/rescaling_auto_conjugate_gradient.py b/art/attacks/evasion/rescaling_auto_conjugate_gradient.py new file mode 100644 index 0000000000..2b2e53a595 --- /dev/null +++ b/art/attacks/evasion/rescaling_auto_conjugate_gradient.py @@ -0,0 +1,664 @@ +# MIT License + +# Copyright (c) 2024 Keiichiro Yamamura + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +This module implements the 'Rescaling-ACG' attack. + +| Paper link: https://arxiv.org/abs/2408.03972 +""" +import abc +import logging +import math +from typing import Optional, Union, TYPE_CHECKING + +import numpy as np +from tqdm.auto import trange + +from art.config import ART_NUMPY_DTYPE +from art.attacks.attack import EvasionAttack +from art.estimators.estimator import BaseEstimator, LossGradientsMixin +from art.estimators.classification.classifier import ClassifierMixin +from art.utils import check_and_transform_label_format, projection, random_sphere, is_probability, get_labels_np_array + +if TYPE_CHECKING: + from art.utils import CLASSIFIER_LOSS_GRADIENTS_TYPE + +logger = logging.getLogger(__name__) + + +class RescalingAutoConjugateGradient(EvasionAttack): + """ + Implementation of the 'Rescaling-ACG' attack. + The original implementation is https://github.com/yamamura-k/ReACG. + + | Paper link: + """ + + attack_params = EvasionAttack.attack_params + [ + "norm", + "eps", + "eps_step", + "max_iter", + "targeted", + "nb_random_init", + "batch_size", + "loss_type", + "verbose", + ] + _estimator_requirements = (BaseEstimator, LossGradientsMixin, ClassifierMixin) + _predefined_losses = [None, "cross_entropy", "difference_logits_ratio"] + + def __init__( + self, + estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE", + norm: Union[int, float, str] = np.inf, + eps: float = 0.3, + eps_step: float = 0.1, + max_iter: int = 100, + targeted: bool = False, + nb_random_init: int = 5, + batch_size: int = 32, + loss_type: Optional[str] = None, + verbose: bool = True, + ): + """ + Create a :class:`.RescalingAutoConjugateGradient` instance. + + :param estimator: An trained estimator. + :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2. + :param eps: Maximum perturbation that the attacker can introduce. + :param eps_step: Attack step size (input variation) at each iteration. + :param max_iter: The maximum number of iterations. + :param targeted: Indicates whether the attack is targeted (True) or untargeted (False). + :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 + starting at the original input. + :param batch_size: Size of the batch on which adversarial samples are generated. + :param loss_type: Defines the loss to attack. Available options: None (Use loss defined by estimator), + "cross_entropy", or "difference_logits_ratio" + :param verbose: Show progress bars. + """ + from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier + + if isinstance(estimator, TensorFlowClassifier): + raise ValueError("This attack does not support TensorFlow v1.") + + if loss_type not in self._predefined_losses: + raise ValueError( + f"The argument loss_type has an invalid value. The following options for `loss_type` are currently " + f"supported: {self._predefined_losses}" + ) + + if loss_type is None: + if hasattr(estimator, "predict") and is_probability( + estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32)) + ): + raise ValueError( # pragma: no cover + "AutoProjectedGradientDescent is expecting logits as estimator output, the provided " + "estimator seems to predict probabilities." + ) + + estimator_reacg = estimator + else: + if isinstance(estimator, TensorFlowV2Classifier): + import tensorflow as tf + + class TensorFlowV2Loss: + """abstract class of loss function of tensorflow v2""" + + @abc.abstractmethod + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor, *args, **kwargs) -> tf.Tensor: + raise NotImplementedError + + if loss_type == "cross_entropy": + + class CrossEntropyLossV2(TensorFlowV2Loss): + """Class defining cross entropy loss with reduction options.""" + + def __init__(self, from_logits, reduction="sum"): + self.ce_loss = tf.keras.losses.CategoricalCrossentropy( + from_logits=from_logits, + reduction=tf.keras.losses.Reduction.NONE, + ) + self.reduction = reduction + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor, *args, **kwargs) -> tf.Tensor: + if self.reduction == "mean": + return tf.reduce_mean(self.ce_loss(y_true, y_pred)) + if self.reduction == "sum": + return tf.reduce_sum(self.ce_loss(y_true, y_pred)) + if self.reduction == "none": + return self.ce_loss(y_true, y_pred) + raise NotImplementedError() + + if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): + _loss_object_tf: TensorFlowV2Loss = CrossEntropyLossV2(from_logits=False) + else: + _loss_object_tf = CrossEntropyLossV2(from_logits=True) + elif loss_type == "difference_logits_ratio": + if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))): + raise ValueError( # pragma: no cover + "The provided estimator seems to predict probabilities. " + "If loss_type='difference_logits_ratio' the estimator has to to predict logits." + ) + + class DifferenceLogitsRatioTensorFlowV2(TensorFlowV2Loss): + """ + Callable class for Difference Logits Ratio loss in TensorFlow v2. + """ + + def __init__(self): + self.reduction = "sum" + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor, *args, **kwargs) -> tf.Tensor: + i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32) + i_y_pred_arg = tf.argsort(y_pred, axis=1) + i_z_i_list = [] + + for i in range(y_true.shape[0]): + if i_y_pred_arg[i, -1] != i_y_true[i]: + i_z_i_list.append(i_y_pred_arg[i, -1]) + else: + i_z_i_list.append(i_y_pred_arg[i, -2]) + + i_z_i = tf.stack(i_z_i_list) + + z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0) + z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0) + z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0) + z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0) + + z_1 = tf.linalg.diag_part(z_1) + z_3 = tf.linalg.diag_part(z_3) + z_i = tf.linalg.diag_part(z_i) + z_y = tf.linalg.diag_part(z_y) + + dlr = -(z_y - z_i) / (z_1 - z_3) + if self.reduction == "mean": + return tf.reduce_mean(dlr) + if self.reduction == "sum": + return tf.reduce_sum(dlr) + if self.reduction == "none": + return dlr + raise NotImplementedError() + + _loss_object_tf = DifferenceLogitsRatioTensorFlowV2() + else: + raise NotImplementedError() + + estimator_reacg = TensorFlowV2Classifier( + model=estimator.model, + nb_classes=estimator.nb_classes, + input_shape=estimator.input_shape, + loss_object=_loss_object_tf, + optimizer=estimator.optimizer, + train_step=estimator.train_step, + channels_first=estimator.channels_first, + clip_values=estimator.clip_values, + preprocessing_defences=estimator.preprocessing_defences, + postprocessing_defences=estimator.postprocessing_defences, + preprocessing=estimator.preprocessing, + ) + elif isinstance(estimator, PyTorchClassifier): + import torch + + if loss_type == "cross_entropy": + if is_probability( + estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32)) + ): + raise ValueError( # pragma: no cover + "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' " + "the estimator has to to predict logits." + ) + + class CrossEntropyLossTorch(torch.nn.modules.loss._Loss): # pylint: disable=W0212 + """Class defining cross entropy loss with reduction options.""" + + def __init__(self, reduction="sum"): + super().__init__() + self.ce_loss = torch.nn.CrossEntropyLoss(reduction="none") + self.reduction = reduction + + def __call__(self, y_true: torch.Tensor, y_pred: torch.Tensor, *args, **kwargs) -> torch.Tensor: + if self.reduction == "mean": + return self.ce_loss(y_true, y_pred).mean() + if self.reduction == "sum": + return self.ce_loss(y_true, y_pred).sum() + if self.reduction == "none": + return self.ce_loss(y_true, y_pred) + raise NotImplementedError() + + def forward( + self, input: torch.Tensor, target: torch.Tensor # pylint: disable=W0622 + ) -> torch.Tensor: + """ + Forward method. + :param input: Predicted labels of shape (nb_samples, nb_classes). + :param target: Target labels of shape (nb_samples, nb_classes). + :return: Difference Logits Ratio Loss. + """ + return self.__call__(y_true=target, y_pred=input) + + _loss_object_pt: torch.nn.modules.loss._Loss = CrossEntropyLossTorch(reduction="mean") + + elif loss_type == "difference_logits_ratio": + if is_probability( + estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=ART_NUMPY_DTYPE)) + ): + raise ValueError( # pragma: no cover + "The provided estimator seems to predict probabilities. " + "If loss_type='difference_logits_ratio' the estimator has to to predict logits." + ) + + class DifferenceLogitsRatioPyTorch(torch.nn.modules.loss._Loss): # pylint: disable=W0212 + """ + Callable class for Difference Logits Ratio loss in PyTorch. + """ + + def __init__(self): + super().__init__() + self.reduction = "sum" + + def __call__(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor: + if isinstance(y_true, np.ndarray): + y_true = torch.from_numpy(y_true) + if isinstance(y_pred, np.ndarray): + y_pred = torch.from_numpy(y_pred) + + y_true = y_true.float() + + i_y_true = torch.argmax(y_true, dim=1) + i_y_pred_arg = torch.argsort(y_pred, dim=1) + i_z_i_list = [] + + for i in range(y_true.shape[0]): + if i_y_pred_arg[i, -1] != i_y_true[i]: + i_z_i_list.append(i_y_pred_arg[i, -1]) + else: + i_z_i_list.append(i_y_pred_arg[i, -2]) + + i_z_i = torch.stack(i_z_i_list) + + z_1 = y_pred[:, i_y_pred_arg[:, -1]] + z_3 = y_pred[:, i_y_pred_arg[:, -3]] + z_i = y_pred[:, i_z_i] + z_y = y_pred[:, i_y_true] + + z_1 = torch.diagonal(z_1) + z_3 = torch.diagonal(z_3) + z_i = torch.diagonal(z_i) + z_y = torch.diagonal(z_y) + + dlr = (-(z_y - z_i) / (z_1 - z_3)).float() + if self.reduction == "mean": + return dlr.mean() + if self.reduction == "sum": + return dlr.sum() + if self.reduction == "none": + return dlr + raise NotImplementedError() + + def forward( + self, input: torch.Tensor, target: torch.Tensor # pylint: disable=W0622 + ) -> torch.Tensor: + """ + Forward method. + :param input: Predicted labels of shape (nb_samples, nb_classes). + :param target: Target labels of shape (nb_samples, nb_classes). + :return: Difference Logits Ratio Loss. + """ + return self.__call__(y_true=target, y_pred=input) + + _loss_object_pt = DifferenceLogitsRatioPyTorch() + else: + raise NotImplementedError() + + estimator_reacg = PyTorchClassifier( + model=estimator.model, + loss=_loss_object_pt, + input_shape=estimator.input_shape, + nb_classes=estimator.nb_classes, + optimizer=None, + channels_first=estimator.channels_first, + clip_values=estimator.clip_values, + preprocessing_defences=estimator.preprocessing_defences, + postprocessing_defences=estimator.postprocessing_defences, + preprocessing=estimator.preprocessing, + device_type=str(estimator._device), + ) + + else: # pragma: no cover + raise ValueError(f"The loss type {loss_type} is not supported for the provided estimator.") + + super().__init__(estimator=estimator_reacg) + self.norm = norm + self.eps = eps + self.eps_step = eps_step + self.max_iter = max_iter + self.targeted = targeted + self.nb_random_init = nb_random_init + self.batch_size = batch_size + self.loss_type = loss_type + self.verbose = verbose + self._check_params() + + def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: + """ + Generate adversarial samples and return them in an array. + + :param x: An array with the original inputs. + :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape + (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial + samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect + (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`. + :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations. + Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any + features for which the mask is zero will not be adversarially perturbed. + :type mask: `np.ndarray` + :return: An array holding the adversarial examples. + """ + mask = kwargs.get("mask") + + if y is not None: + y = check_and_transform_label_format(y, nb_classes=self.estimator.nb_classes) + + if y is None: + if self.targeted: + raise ValueError("Target labels `y` need to be provided for a targeted attack.") + y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size)).astype(int) + + if self.estimator.nb_classes == 2 and y.shape[1] == 1: + raise ValueError( + "This attack has not yet been tested for binary classification with a single output classifier." + ) + + x_adv = x.astype(ART_NUMPY_DTYPE) + + for _ in trange(max(1, self.nb_random_init), desc="ReACG - restart", disable=not self.verbose): + # Determine correctly predicted samples + y_pred = self.estimator.predict(x_adv) + if self.targeted: + sample_is_robust = np.argmax(y_pred, axis=1) != np.argmax(y, axis=1) + elif not self.targeted: + sample_is_robust = np.argmax(y_pred, axis=1) == np.argmax(y, axis=1) + + if np.sum(sample_is_robust) == 0: + break + + x_robust = x_adv[sample_is_robust] + y_robust = y[sample_is_robust] + x_init = x[sample_is_robust] + + n = x_robust.shape[0] + m = np.prod(x_robust.shape[1:]).item() + random_perturbation = ( + random_sphere(n, m, self.eps, self.norm).reshape(x_robust.shape).astype(ART_NUMPY_DTYPE) + ) + + x_robust = x_robust + random_perturbation + + if self.estimator.clip_values is not None: + clip_min, clip_max = self.estimator.clip_values + x_robust = np.clip(x_robust, clip_min, clip_max) + + perturbation = projection(x_robust - x_init, self.eps, self.norm) + x_robust = x_init + perturbation + + # Compute perturbation with implicit batching + for batch_id in trange( + int(np.ceil(x_robust.shape[0] / float(self.batch_size))), + desc="ReACG - batch", + leave=False, + disable=not self.verbose, + ): + batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size + x_k = x_robust[batch_index_1:batch_index_2].astype(ART_NUMPY_DTYPE) + x_init_batch = x_init[batch_index_1:batch_index_2].astype(ART_NUMPY_DTYPE) + y_batch = y_robust[batch_index_1:batch_index_2] + + p_0 = 0 + p_1 = 0.43 + var_w = [p_0, p_1] + + while True: + p_j_p_1 = var_w[-1] + max(var_w[-1] - var_w[-2] - 0.24, 0.08) + if p_j_p_1 > 1: + break + var_w.append(p_j_p_1) + + var_w = [math.ceil(p * self.max_iter) for p in var_w] + + # self.eta = np.full((self.batch_size, 1, 1, 1), 2 * self.eps_step).astype(ART_NUMPY_DTYPE) + _batch_size = x_k.shape[0] + eta = np.full((_batch_size,) + (1,) * len(self.estimator.input_shape), self.eps_step).astype( + ART_NUMPY_DTYPE + ) + self.count_condition_1 = np.zeros(shape=(_batch_size,)) + gradk_1 = np.zeros_like(x_k) + cgradk_1 = np.zeros_like(x_k) + cgradk = np.zeros_like(x_k) + gradk_1_best = np.zeros_like(x_k) + cgradk_1_best = np.zeros_like(x_k) + gradk_1_tmp = np.zeros_like(x_k) + cgradk_1_tmp = np.zeros_like(x_k) + + for k_iter in trange(self.max_iter, desc="ReACG - iteration", leave=False, disable=not self.verbose): + + # Get perturbation, use small scalar to avoid division by 0 + tol = 10e-8 + + # Get gradient wrt loss; invert it if attack is targeted + grad = self.estimator.loss_gradient(x_k, y_batch) * (1 - 2 * int(self.targeted)) + if k_iter == 0: + gradk_1 = grad.copy() + cgradk_1 = grad.copy() + cgradk = grad.copy() + else: + beta = get_beta(grad, gradk_1, cgradk_1) + # Modify the coefficient beta when |beta| >> avg.(|grad / cgradk_1|) + _beta_normalized = get_beta( + grad / np.linalg.norm(grad), gradk_1 / np.linalg.norm(gradk_1), cgradk_1 + ) + grad_ratio_value = np.abs(grad / cgradk_1).reshape((_batch_size, -1)) + grad_ratio = grad_ratio_value.mean(1) + normalize_inds = np.abs(beta).reshape((_batch_size,)) > grad_ratio + smaller_beta_inds = (np.abs(beta) > np.abs(_beta_normalized)).reshape((_batch_size,)) + normalize_inds = np.logical_and(normalize_inds, smaller_beta_inds) + beta[normalize_inds] = _beta_normalized[normalize_inds].copy() + cgradk = grad + beta * cgradk_1 + + # Apply norm bound + if self.norm in [np.inf, "inf"]: + grad = np.sign(cgradk) + elif self.norm == 1: + ind = tuple(range(1, len(x_k.shape))) + cgradk = cgradk / (np.sum(np.abs(cgradk), axis=ind, keepdims=True) + tol) + elif self.norm == 2: + ind = tuple(range(1, len(x_k.shape))) + cgradk = cgradk / (np.sqrt(np.sum(np.square(cgradk), axis=ind, keepdims=True)) + tol) + assert x_k.shape == cgradk.shape + + perturbation = cgradk + + if mask is not None: + perturbation = perturbation * (mask.astype(ART_NUMPY_DTYPE)) + + # Apply perturbation and clip + x_k_p_1 = x_k + eta * perturbation + + if self.estimator.clip_values is not None: + clip_min, clip_max = self.estimator.clip_values + x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max) + + if k_iter == 0: + x_1 = x_k_p_1 + perturbation = projection(x_1 - x_init_batch, self.eps, self.norm) + x_1 = x_init_batch + perturbation + + f_0 = self.estimator.compute_loss(x=x_k, y=y_batch, reduction="none") + f_1 = self.estimator.compute_loss(x=x_1, y=y_batch, reduction="none") + + self.eta_w_j_m_1 = eta.copy() + self.f_max_w_j_m_1 = f_0.copy() + + self.f_max = f_0.copy() + self.x_max = x_k.copy() + + f1_ge_f0 = f_1 >= f_0 + f_1_tmp = f_1[f1_ge_f0].copy() + self.f_max[f1_ge_f0] = f_1_tmp.copy() + x_1_tmp = x_1[f1_ge_f0].copy() + self.x_max[f1_ge_f0] = x_1_tmp.copy() + self.count_condition_1[f1_ge_f0] += 1 + + # Settings for next iteration k + x_k = x_1 + gradk_1_best = gradk_1.copy() + cgradk_1_best = cgradk_1.copy() + + else: + perturbation = projection(x_k_p_1 - x_init_batch, self.eps, self.norm) + x_k_p_1 = x_init_batch + perturbation + + if self.estimator.clip_values is not None: + clip_min, clip_max = self.estimator.clip_values + x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max) + + perturbation = projection(x_k_p_1 - x_init_batch, self.eps, self.norm) + x_k_p_1 = x_init_batch + perturbation + + f_k_p_1 = self.estimator.compute_loss(x=x_k_p_1, y=y_batch, reduction="none") + + if (f_k_p_1 == 0.0).all(): + x_k = x_k_p_1.copy() + break + + if self.targeted: + fk_ge_fm = f_k_p_1 < self.f_max # assume the loss function is cross-entropy + else: + fk_ge_fm = f_k_p_1 > self.f_max + + self.count_condition_1[fk_ge_fm] += 1 + # update the best points + x_k_p_1_tmp = x_k_p_1[fk_ge_fm].copy() + self.x_max[fk_ge_fm] = x_k_p_1_tmp.copy() + f_k_p_1_tmp = f_k_p_1[fk_ge_fm].copy() + self.f_max[fk_ge_fm] = f_k_p_1_tmp.copy() + gradk_1_tmp = gradk_1[fk_ge_fm].copy() + gradk_1_best[fk_ge_fm] = gradk_1_tmp.copy() + cgradk_1_tmp = cgradk_1[fk_ge_fm].copy() + cgradk_1_best[fk_ge_fm] = cgradk_1_tmp.copy() + + # update the search points + x_k = x_k_p_1.copy() + gradk_1 = grad.copy() + cgradk_1 = cgradk.copy() + + if k_iter in var_w: + + rho = 0.75 + + condition_1 = self.count_condition_1 < rho * (k_iter - var_w[var_w.index(k_iter) - 1]) + condition_2 = np.logical_and( + (self.eta_w_j_m_1 == eta).squeeze(), self.f_max_w_j_m_1 == self.f_max + ) + condition = np.logical_or(condition_1, condition_2) + + # halve the stepsize if the condition is satisfied + eta[condition] /= 2 + # move to the best point + x_max_tmp = self.x_max[condition].copy() + x_k[condition] = x_max_tmp.copy() + gradk_1_tmp = gradk_1_best[condition].copy() + gradk_1[condition] = gradk_1_tmp.copy() + cgradk_1_tmp = cgradk_1_best[condition].copy() + cgradk_1[condition] = cgradk_1_tmp.copy() + + self.count_condition_1[:] = 0 + self.eta_w_j_m_1 = eta.copy() + self.f_max_w_j_m_1 = self.f_max.copy() + + y_pred_adv_k = self.estimator.predict(x_k) + if self.targeted: + sample_is_not_robust_k = np.invert(np.argmax(y_pred_adv_k, axis=1) != np.argmax(y_batch, axis=1)) + elif not self.targeted: + sample_is_not_robust_k = np.invert(np.argmax(y_pred_adv_k, axis=1) == np.argmax(y_batch, axis=1)) + + x_robust[batch_index_1:batch_index_2][sample_is_not_robust_k] = x_k[sample_is_not_robust_k] + + x_adv[sample_is_robust] = x_robust + + return x_adv + + def _check_params(self) -> None: + if self.norm not in [1, 2, np.inf, "inf"]: + raise ValueError('The argument norm has to be either 1, 2, np.inf, or "inf".') + + if not isinstance(self.eps, (int, float)) or self.eps <= 0.0: + raise ValueError("The argument eps has to be either of type int or float and larger than zero.") + + if not isinstance(self.eps_step, (int, float)) or self.eps_step <= 0.0: + raise ValueError("The argument eps_step has to be either of type int or float and larger than zero.") + + if not isinstance(self.max_iter, int) or self.max_iter <= 0: + raise ValueError("The argument max_iter has to be of type int and larger than zero.") + + if not isinstance(self.targeted, bool): + raise ValueError("The argument targeted has to be of bool.") + + if not isinstance(self.nb_random_init, int) or self.nb_random_init <= 0: + raise ValueError("The argument nb_random_init has to be of type int and larger than zero.") + + if not isinstance(self.batch_size, int) or self.batch_size <= 0: + raise ValueError("The argument batch_size has to be of type int and larger than zero.") + + if not isinstance(self.verbose, bool): + raise ValueError("The argument `verbose` has to be of type bool.") + + +def get_beta(gradk, gradk_1, cgradk_1): + """compute the coefficient beta required to update CG direction""" + _batch_size = gradk.shape[0] + _cgradk_1 = cgradk_1.reshape(_batch_size, -1) + _gradk = -gradk.reshape(_batch_size, -1) + _gradk_1 = -gradk_1.reshape(_batch_size, -1) + delta_gradk = _gradk - _gradk_1 + betak = -(_gradk * delta_gradk).sum(axis=1) / ( + (_cgradk_1 * delta_gradk).sum(axis=1) + np.finfo(ART_NUMPY_DTYPE).eps + ) + return betak.reshape((_batch_size,) + (1,) * (len(gradk.shape) - 1)) diff --git a/art/attacks/evasion/steal_now_attack_later/__init__.py b/art/attacks/evasion/steal_now_attack_later/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/art/attacks/evasion/steal_now_attack_later/bbox_ioa.py b/art/attacks/evasion/steal_now_attack_later/bbox_ioa.py new file mode 100644 index 0000000000..bb50c57cea --- /dev/null +++ b/art/attacks/evasion/steal_now_attack_later/bbox_ioa.py @@ -0,0 +1,698 @@ +# pylint: disable=C0114 +# GNU AFFERO GENERAL PUBLIC LICENSE +# Version 3, 19 November 2007 +# +# Copyright (C) 2007 Free Software Foundation, Inc. +# Everyone is permitted to copy and distribute verbatim copies +# of this license document, but changing it is not allowed. +# +# Preamble +# +# The GNU Affero General Public License is a free, copyleft license for +# software and other kinds of works, specifically designed to ensure +# cooperation with the community in the case of network server software. +# +# The licenses for most software and other practical works are designed +# to take away your freedom to share and change the works. By contrast, +# our General Public Licenses are intended to guarantee your freedom to +# share and change all versions of a program--to make sure it remains free +# software for all its users. +# +# When we speak of free software, we are referring to freedom, not +# price. Our General Public Licenses are designed to make sure that you +# have the freedom to distribute copies of free software (and charge for +# them if you wish), that you receive source code or can get it if you +# want it, that you can change the software or use pieces of it in new +# free programs, and that you know you can do these things. +# +# Developers that use our General Public Licenses protect your rights +# with two steps: (1) assert copyright on the software, and (2) offer +# you this License which gives you legal permission to copy, distribute +# and/or modify the software. +# +# A secondary benefit of defending all users' freedom is that +# improvements made in alternate versions of the program, if they +# receive widespread use, become available for other developers to +# incorporate. Many developers of free software are heartened and +# encouraged by the resulting cooperation. However, in the case of +# software used on network servers, this result may fail to come about. +# The GNU General Public License permits making a modified version and +# letting the public access it on a server without ever releasing its +# source code to the public. +# +# The GNU Affero General Public License is designed specifically to +# ensure that, in such cases, the modified source code becomes available +# to the community. It requires the operator of a network server to +# provide the source code of the modified version running there to the +# users of that server. Therefore, public use of a modified version, on +# a publicly accessible server, gives the public access to the source +# code of the modified version. +# +# An older license, called the Affero General Public License and +# published by Affero, was designed to accomplish similar goals. This is +# a different license, not a version of the Affero GPL, but Affero has +# released a new version of the Affero GPL which permits relicensing under +# this license. +# +# The precise terms and conditions for copying, distribution and +# modification follow. +# +# TERMS AND CONDITIONS +# +# 0. Definitions. +# +# "This License" refers to version 3 of the GNU Affero General Public License. +# +# "Copyright" also means copyright-like laws that apply to other kinds of +# works, such as semiconductor masks. +# +# "The Program" refers to any copyrightable work licensed under this +# License. Each licensee is addressed as "you". "Licensees" and +# "recipients" may be individuals or organizations. +# +# To "modify" a work means to copy from or adapt all or part of the work +# in a fashion requiring copyright permission, other than the making of an +# exact copy. The resulting work is called a "modified version" of the +# earlier work or a work "based on" the earlier work. +# +# A "covered work" means either the unmodified Program or a work based +# on the Program. +# +# To "propagate" a work means to do anything with it that, without +# permission, would make you directly or secondarily liable for +# infringement under applicable copyright law, except executing it on a +# computer or modifying a private copy. Propagation includes copying, +# distribution (with or without modification), making available to the +# public, and in some countries other activities as well. +# +# To "convey" a work means any kind of propagation that enables other +# parties to make or receive copies. Mere interaction with a user through +# a computer network, with no transfer of a copy, is not conveying. +# +# An interactive user interface displays "Appropriate Legal Notices" +# to the extent that it includes a convenient and prominently visible +# feature that (1) displays an appropriate copyright notice, and (2) +# tells the user that there is no warranty for the work (except to the +# extent that warranties are provided), that licensees may convey the +# work under this License, and how to view a copy of this License. If +# the interface presents a list of user commands or options, such as a +# menu, a prominent item in the list meets this criterion. +# +# 1. Source Code. +# +# The "source code" for a work means the preferred form of the work +# for making modifications to it. "Object code" means any non-source +# form of a work. +# +# A "Standard Interface" means an interface that either is an official +# standard defined by a recognized standards body, or, in the case of +# interfaces specified for a particular programming language, one that +# is widely used among developers working in that language. +# +# The "System Libraries" of an executable work include anything, other +# than the work as a whole, that (a) is included in the normal form of +# packaging a Major Component, but which is not part of that Major +# Component, and (b) serves only to enable use of the work with that +# Major Component, or to implement a Standard Interface for which an +# implementation is available to the public in source code form. A +# "Major Component", in this context, means a major essential component +# (kernel, window system, and so on) of the specific operating system +# (if any) on which the executable work runs, or a compiler used to +# produce the work, or an object code interpreter used to run it. +# +# The "Corresponding Source" for a work in object code form means all +# the source code needed to generate, install, and (for an executable +# work) run the object code and to modify the work, including scripts to +# control those activities. However, it does not include the work's +# System Libraries, or general-purpose tools or generally available free +# programs which are used unmodified in performing those activities but +# which are not part of the work. For example, Corresponding Source +# includes interface definition files associated with source files for +# the work, and the source code for shared libraries and dynamically +# linked subprograms that the work is specifically designed to require, +# such as by intimate data communication or control flow between those +# subprograms and other parts of the work. +# +# The Corresponding Source need not include anything that users +# can regenerate automatically from other parts of the Corresponding +# Source. +# +# The Corresponding Source for a work in source code form is that +# same work. +# +# 2. Basic Permissions. +# +# All rights granted under this License are granted for the term of +# copyright on the Program, and are irrevocable provided the stated +# conditions are met. This License explicitly affirms your unlimited +# permission to run the unmodified Program. The output from running a +# covered work is covered by this License only if the output, given its +# content, constitutes a covered work. This License acknowledges your +# rights of fair use or other equivalent, as provided by copyright law. +# +# You may make, run and propagate covered works that you do not +# convey, without conditions so long as your license otherwise remains +# in force. You may convey covered works to others for the sole purpose +# of having them make modifications exclusively for you, or provide you +# with facilities for running those works, provided that you comply with +# the terms of this License in conveying all material for which you do +# not control copyright. Those thus making or running the covered works +# for you must do so exclusively on your behalf, under your direction +# and control, on terms that prohibit them from making any copies of +# your copyrighted material outside their relationship with you. +# +# Conveying under any other circumstances is permitted solely under +# the conditions stated below. Sublicensing is not allowed; section 10 +# makes it unnecessary. +# +# 3. Protecting Users' Legal Rights From Anti-Circumvention Law. +# +# No covered work shall be deemed part of an effective technological +# measure under any applicable law fulfilling obligations under article +# 11 of the WIPO copyright treaty adopted on 20 December 1996, or +# similar laws prohibiting or restricting circumvention of such +# measures. +# +# When you convey a covered work, you waive any legal power to forbid +# circumvention of technological measures to the extent such circumvention +# is effected by exercising rights under this License with respect to +# the covered work, and you disclaim any intention to limit operation or +# modification of the work as a means of enforcing, against the work's +# users, your or third parties' legal rights to forbid circumvention of +# technological measures. +# +# 4. Conveying Verbatim Copies. +# +# You may convey verbatim copies of the Program's source code as you +# receive it, in any medium, provided that you conspicuously and +# appropriately publish on each copy an appropriate copyright notice; +# keep intact all notices stating that this License and any +# non-permissive terms added in accord with section 7 apply to the code; +# keep intact all notices of the absence of any warranty; and give all +# recipients a copy of this License along with the Program. +# +# You may charge any price or no price for each copy that you convey, +# and you may offer support or warranty protection for a fee. +# +# 5. Conveying Modified Source Versions. +# +# You may convey a work based on the Program, or the modifications to +# produce it from the Program, in the form of source code under the +# terms of section 4, provided that you also meet all of these conditions: +# +# a) The work must carry prominent notices stating that you modified +# it, and giving a relevant date. +# +# b) The work must carry prominent notices stating that it is +# released under this License and any conditions added under section +# 7. This requirement modifies the requirement in section 4 to +# "keep intact all notices". +# +# c) You must license the entire work, as a whole, under this +# License to anyone who comes into possession of a copy. This +# License will therefore apply, along with any applicable section 7 +# additional terms, to the whole of the work, and all its parts, +# regardless of how they are packaged. This License gives no +# permission to license the work in any other way, but it does not +# invalidate such permission if you have separately received it. +# +# d) If the work has interactive user interfaces, each must display +# Appropriate Legal Notices; however, if the Program has interactive +# interfaces that do not display Appropriate Legal Notices, your +# work need not make them do so. +# +# A compilation of a covered work with other separate and independent +# works, which are not by their nature extensions of the covered work, +# and which are not combined with it such as to form a larger program, +# in or on a volume of a storage or distribution medium, is called an +# "aggregate" if the compilation and its resulting copyright are not +# used to limit the access or legal rights of the compilation's users +# beyond what the individual works permit. Inclusion of a covered work +# in an aggregate does not cause this License to apply to the other +# parts of the aggregate. +# +# 6. Conveying Non-Source Forms. +# +# You may convey a covered work in object code form under the terms +# of sections 4 and 5, provided that you also convey the +# machine-readable Corresponding Source under the terms of this License, +# in one of these ways: +# +# a) Convey the object code in, or embodied in, a physical product +# (including a physical distribution medium), accompanied by the +# Corresponding Source fixed on a durable physical medium +# customarily used for software interchange. +# +# b) Convey the object code in, or embodied in, a physical product +# (including a physical distribution medium), accompanied by a +# written offer, valid for at least three years and valid for as +# long as you offer spare parts or customer support for that product +# model, to give anyone who possesses the object code either (1) a +# copy of the Corresponding Source for all the software in the +# product that is covered by this License, on a durable physical +# medium customarily used for software interchange, for a price no +# more than your reasonable cost of physically performing this +# conveying of source, or (2) access to copy the +# Corresponding Source from a network server at no charge. +# +# c) Convey individual copies of the object code with a copy of the +# written offer to provide the Corresponding Source. This +# alternative is allowed only occasionally and noncommercially, and +# only if you received the object code with such an offer, in accord +# with subsection 6b. +# +# d) Convey the object code by offering access from a designated +# place (gratis or for a charge), and offer equivalent access to the +# Corresponding Source in the same way through the same place at no +# further charge. You need not require recipients to copy the +# Corresponding Source along with the object code. If the place to +# copy the object code is a network server, the Corresponding Source +# may be on a different server (operated by you or a third party) +# that supports equivalent copying facilities, provided you maintain +# clear directions next to the object code saying where to find the +# Corresponding Source. Regardless of what server hosts the +# Corresponding Source, you remain obligated to ensure that it is +# available for as long as needed to satisfy these requirements. +# +# e) Convey the object code using peer-to-peer transmission, provided +# you inform other peers where the object code and Corresponding +# Source of the work are being offered to the general public at no +# charge under subsection 6d. +# +# A separable portion of the object code, whose source code is excluded +# from the Corresponding Source as a System Library, need not be +# included in conveying the object code work. +# +# A "User Product" is either (1) a "consumer product", which means any +# tangible personal property which is normally used for personal, family, +# or household purposes, or (2) anything designed or sold for incorporation +# into a dwelling. In determining whether a product is a consumer product, +# doubtful cases shall be resolved in favor of coverage. For a particular +# product received by a particular user, "normally used" refers to a +# typical or common use of that class of product, regardless of the status +# of the particular user or of the way in which the particular user +# actually uses, or expects or is expected to use, the product. A product +# is a consumer product regardless of whether the product has substantial +# commercial, industrial or non-consumer uses, unless such uses represent +# the only significant mode of use of the product. +# +# "Installation Information" for a User Product means any methods, +# procedures, authorization keys, or other information required to install +# and execute modified versions of a covered work in that User Product from +# a modified version of its Corresponding Source. The information must +# suffice to ensure that the continued functioning of the modified object +# code is in no case prevented or interfered with solely because +# modification has been made. +# +# If you convey an object code work under this section in, or with, or +# specifically for use in, a User Product, and the conveying occurs as +# part of a transaction in which the right of possession and use of the +# User Product is transferred to the recipient in perpetuity or for a +# fixed term (regardless of how the transaction is characterized), the +# Corresponding Source conveyed under this section must be accompanied +# by the Installation Information. But this requirement does not apply +# if neither you nor any third party retains the ability to install +# modified object code on the User Product (for example, the work has +# been installed in ROM). +# +# The requirement to provide Installation Information does not include a +# requirement to continue to provide support service, warranty, or updates +# for a work that has been modified or installed by the recipient, or for +# the User Product in which it has been modified or installed. Access to a +# network may be denied when the modification itself materially and +# adversely affects the operation of the network or violates the rules and +# protocols for communication across the network. +# +# Corresponding Source conveyed, and Installation Information provided, +# in accord with this section must be in a format that is publicly +# documented (and with an implementation available to the public in +# source code form), and must require no special password or key for +# unpacking, reading or copying. +# +# 7. Additional Terms. +# +# "Additional permissions" are terms that supplement the terms of this +# License by making exceptions from one or more of its conditions. +# Additional permissions that are applicable to the entire Program shall +# be treated as though they were included in this License, to the extent +# that they are valid under applicable law. If additional permissions +# apply only to part of the Program, that part may be used separately +# under those permissions, but the entire Program remains governed by +# this License without regard to the additional permissions. +# +# When you convey a copy of a covered work, you may at your option +# remove any additional permissions from that copy, or from any part of +# it. (Additional permissions may be written to require their own +# removal in certain cases when you modify the work.) You may place +# additional permissions on material, added by you to a covered work, +# for which you have or can give appropriate copyright permission. +# +# Notwithstanding any other provision of this License, for material you +# add to a covered work, you may (if authorized by the copyright holders of +# that material) supplement the terms of this License with terms: +# +# a) Disclaiming warranty or limiting liability differently from the +# terms of sections 15 and 16 of this License; or +# +# b) Requiring preservation of specified reasonable legal notices or +# author attributions in that material or in the Appropriate Legal +# Notices displayed by works containing it; or +# +# c) Prohibiting misrepresentation of the origin of that material, or +# requiring that modified versions of such material be marked in +# reasonable ways as different from the original version; or +# +# d) Limiting the use for publicity purposes of names of licensors or +# authors of the material; or +# +# e) Declining to grant rights under trademark law for use of some +# trade names, trademarks, or service marks; or +# +# f) Requiring indemnification of licensors and authors of that +# material by anyone who conveys the material (or modified versions of +# it) with contractual assumptions of liability to the recipient, for +# any liability that these contractual assumptions directly impose on +# those licensors and authors. +# +# All other non-permissive additional terms are considered "further +# restrictions" within the meaning of section 10. If the Program as you +# received it, or any part of it, contains a notice stating that it is +# governed by this License along with a term that is a further +# restriction, you may remove that term. If a license document contains +# a further restriction but permits relicensing or conveying under this +# License, you may add to a covered work material governed by the terms +# of that license document, provided that the further restriction does +# not survive such relicensing or conveying. +# +# If you add terms to a covered work in accord with this section, you +# must place, in the relevant source files, a statement of the +# additional terms that apply to those files, or a notice indicating +# where to find the applicable terms. +# +# Additional terms, permissive or non-permissive, may be stated in the +# form of a separately written license, or stated as exceptions; +# the above requirements apply either way. +# +# 8. Termination. +# +# You may not propagate or modify a covered work except as expressly +# provided under this License. Any attempt otherwise to propagate or +# modify it is void, and will automatically terminate your rights under +# this License (including any patent licenses granted under the third +# paragraph of section 11). +# +# However, if you cease all violation of this License, then your +# license from a particular copyright holder is reinstated (a) +# provisionally, unless and until the copyright holder explicitly and +# finally terminates your license, and (b) permanently, if the copyright +# holder fails to notify you of the violation by some reasonable means +# prior to 60 days after the cessation. +# +# Moreover, your license from a particular copyright holder is +# reinstated permanently if the copyright holder notifies you of the +# violation by some reasonable means, this is the first time you have +# received notice of violation of this License (for any work) from that +# copyright holder, and you cure the violation prior to 30 days after +# your receipt of the notice. +# +# Termination of your rights under this section does not terminate the +# licenses of parties who have received copies or rights from you under +# this License. If your rights have been terminated and not permanently +# reinstated, you do not qualify to receive new licenses for the same +# material under section 10. +# +# 9. Acceptance Not Required for Having Copies. +# +# You are not required to accept this License in order to receive or +# run a copy of the Program. Ancillary propagation of a covered work +# occurring solely as a consequence of using peer-to-peer transmission +# to receive a copy likewise does not require acceptance. However, +# nothing other than this License grants you permission to propagate or +# modify any covered work. These actions infringe copyright if you do +# not accept this License. Therefore, by modifying or propagating a +# covered work, you indicate your acceptance of this License to do so. +# +# 10. Automatic Licensing of Downstream Recipients. +# +# Each time you convey a covered work, the recipient automatically +# receives a license from the original licensors, to run, modify and +# propagate that work, subject to this License. You are not responsible +# for enforcing compliance by third parties with this License. +# +# An "entity transaction" is a transaction transferring control of an +# organization, or substantially all assets of one, or subdividing an +# organization, or merging organizations. If propagation of a covered +# work results from an entity transaction, each party to that +# transaction who receives a copy of the work also receives whatever +# licenses to the work the party's predecessor in interest had or could +# give under the previous paragraph, plus a right to possession of the +# Corresponding Source of the work from the predecessor in interest, if +# the predecessor has it or can get it with reasonable efforts. +# +# You may not impose any further restrictions on the exercise of the +# rights granted or affirmed under this License. For example, you may +# not impose a license fee, royalty, or other charge for exercise of +# rights granted under this License, and you may not initiate litigation +# (including a cross-claim or counterclaim in a lawsuit) alleging that +# any patent claim is infringed by making, using, selling, offering for +# sale, or importing the Program or any portion of it. +# +# 11. Patents. +# +# A "contributor" is a copyright holder who authorizes use under this +# License of the Program or a work on which the Program is based. The +# work thus licensed is called the contributor's "contributor version". +# +# A contributor's "essential patent claims" are all patent claims +# owned or controlled by the contributor, whether already acquired or +# hereafter acquired, that would be infringed by some manner, permitted +# by this License, of making, using, or selling its contributor version, +# but do not include claims that would be infringed only as a +# consequence of further modification of the contributor version. For +# purposes of this definition, "control" includes the right to grant +# patent sublicenses in a manner consistent with the requirements of +# this License. +# +# Each contributor grants you a non-exclusive, worldwide, royalty-free +# patent license under the contributor's essential patent claims, to +# make, use, sell, offer for sale, import and otherwise run, modify and +# propagate the contents of its contributor version. +# +# In the following three paragraphs, a "patent license" is any express +# agreement or commitment, however denominated, not to enforce a patent +# (such as an express permission to practice a patent or covenant not to +# sue for patent infringement). To "grant" such a patent license to a +# party means to make such an agreement or commitment not to enforce a +# patent against the party. +# +# If you convey a covered work, knowingly relying on a patent license, +# and the Corresponding Source of the work is not available for anyone +# to copy, free of charge and under the terms of this License, through a +# publicly available network server or other readily accessible means, +# then you must either (1) cause the Corresponding Source to be so +# available, or (2) arrange to deprive yourself of the benefit of the +# patent license for this particular work, or (3) arrange, in a manner +# consistent with the requirements of this License, to extend the patent +# license to downstream recipients. "Knowingly relying" means you have +# actual knowledge that, but for the patent license, your conveying the +# covered work in a country, or your recipient's use of the covered work +# in a country, would infringe one or more identifiable patents in that +# country that you have reason to believe are valid. +# +# If, pursuant to or in connection with a single transaction or +# arrangement, you convey, or propagate by procuring conveyance of, a +# covered work, and grant a patent license to some of the parties +# receiving the covered work authorizing them to use, propagate, modify +# or convey a specific copy of the covered work, then the patent license +# you grant is automatically extended to all recipients of the covered +# work and works based on it. +# +# A patent license is "discriminatory" if it does not include within +# the scope of its coverage, prohibits the exercise of, or is +# conditioned on the non-exercise of one or more of the rights that are +# specifically granted under this License. You may not convey a covered +# work if you are a party to an arrangement with a third party that is +# in the business of distributing software, under which you make payment +# to the third party based on the extent of your activity of conveying +# the work, and under which the third party grants, to any of the +# parties who would receive the covered work from you, a discriminatory +# patent license (a) in connection with copies of the covered work +# conveyed by you (or copies made from those copies), or (b) primarily +# for and in connection with specific products or compilations that +# contain the covered work, unless you entered into that arrangement, +# or that patent license was granted, prior to 28 March 2007. +# +# Nothing in this License shall be construed as excluding or limiting +# any implied license or other defenses to infringement that may +# otherwise be available to you under applicable patent law. +# +# 12. No Surrender of Others' Freedom. +# +# If conditions are imposed on you (whether by court order, agreement or +# otherwise) that contradict the conditions of this License, they do not +# excuse you from the conditions of this License. If you cannot convey a +# covered work so as to satisfy simultaneously your obligations under this +# License and any other pertinent obligations, then as a consequence you may +# not convey it at all. For example, if you agree to terms that obligate you +# to collect a royalty for further conveying from those to whom you convey +# the Program, the only way you could satisfy both those terms and this +# License would be to refrain entirely from conveying the Program. +# +# 13. Remote Network Interaction; Use with the GNU General Public License. +# +# Notwithstanding any other provision of this License, if you modify the +# Program, your modified version must prominently offer all users +# interacting with it remotely through a computer network (if your version +# supports such interaction) an opportunity to receive the Corresponding +# Source of your version by providing access to the Corresponding Source +# from a network server at no charge, through some standard or customary +# means of facilitating copying of software. This Corresponding Source +# shall include the Corresponding Source for any work covered by version 3 +# of the GNU General Public License that is incorporated pursuant to the +# following paragraph. +# +# Notwithstanding any other provision of this License, you have +# permission to link or combine any covered work with a work licensed +# under version 3 of the GNU General Public License into a single +# combined work, and to convey the resulting work. The terms of this +# License will continue to apply to the part which is the covered work, +# but the work with which it is combined will remain governed by version +# 3 of the GNU General Public License. +# +# 14. Revised Versions of this License. +# +# The Free Software Foundation may publish revised and/or new versions of +# the GNU Affero General Public License from time to time. Such new versions +# will be similar in spirit to the present version, but may differ in detail to +# address new problems or concerns. +# +# Each version is given a distinguishing version number. If the +# Program specifies that a certain numbered version of the GNU Affero General +# Public License "or any later version" applies to it, you have the +# option of following the terms and conditions either of that numbered +# version or of any later version published by the Free Software +# Foundation. If the Program does not specify a version number of the +# GNU Affero General Public License, you may choose any version ever published +# by the Free Software Foundation. +# +# If the Program specifies that a proxy can decide which future +# versions of the GNU Affero General Public License can be used, that proxy's +# public statement of acceptance of a version permanently authorizes you +# to choose that version for the Program. +# +# Later license versions may give you additional or different +# permissions. However, no additional obligations are imposed on any +# author or copyright holder as a result of your choosing to follow a +# later version. +# +# 15. Disclaimer of Warranty. +# +# THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +# APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +# HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +# OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +# IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +# ALL NECESSARY SERVICING, REPAIR OR CORRECTION. +# +# 16. Limitation of Liability. +# +# IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +# WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +# THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +# GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +# USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +# DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +# PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +# EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGES. +# +# 17. Interpretation of Sections 15 and 16. +# +# If the disclaimer of warranty and limitation of liability provided +# above cannot be given local legal effect according to their terms, +# reviewing courts shall apply local law that most closely approximates +# an absolute waiver of all civil liability in connection with the +# Program, unless a warranty or assumption of liability accompanies a +# copy of the Program in return for a fee. +# +# END OF TERMS AND CONDITIONS +# +# How to Apply These Terms to Your New Programs +# +# If you develop a new program, and you want it to be of the greatest +# possible use to the public, the best way to achieve this is to make it +# free software which everyone can redistribute and change under these terms. +# +# To do so, attach the following notices to the program. It is safest +# to attach them to the start of each source file to most effectively +# state the exclusion of warranty; and each file should have at least +# the "copyright" line and a pointer to where the full notice is found. +# +# +# Copyright (C) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# Also add information on how to contact you by electronic and paper mail. +# +# If your software can interact with users remotely through a computer +# network, you should also make sure that it provides a way for users to +# get its source. For example, if your program is a web application, its +# interface could display a "Source" link that leads users to an archive +# of the code. There are many ways you could offer source, and different +# solutions will be better for different programs; see section 13 for the +# specific requirements. +# +# You should also get your employer (if you work as a programmer) or school, +# if any, to sign a "copyright disclaimer" for the program, if necessary. +# For more information on this, and how to apply and follow the GNU AGPL, see +# . + +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + import torch + + +def bbox_ioa(box1: "torch.Tensor", box2: "torch.Tensor", eps: float = 1e-7) -> "torch.Tensor": + """ + === NOTE === + This function is copied from YOLOv5 repository (yolov5/utils/metrics.py) + === ==== === + Calculate the intersection over two boxes represented by the format x1y1x2y2. + + :param box1: The first box. + :param box2: The second box. + + :return: Intersection over box2 area + """ + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1 + b2_x1, b2_y1, b2_x2, b2_y2 = box2.T + + # Intersection area + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * ( + np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1) + ).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps + + # Intersection over box2 area + return inter_area / box2_area diff --git a/art/attacks/evasion/steal_now_attack_later/drop_block2d.py b/art/attacks/evasion/steal_now_attack_later/drop_block2d.py new file mode 100644 index 0000000000..29fecf3414 --- /dev/null +++ b/art/attacks/evasion/steal_now_attack_later/drop_block2d.py @@ -0,0 +1,70 @@ +# pylint: disable=C0114 +# BSD 3-Clause License +# +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import torch + + +def drop_block2d(x: "torch.Tensor", prob: float, block_size: int): + """ + === NOTE === + This function is modified from torchvision (torchvision/ops/drop_block.py) + BSD 3-Clause License + === ==== === + :param x (Tensor[N, C, H, W]): The input tensor or 4-dimensions with the first one + being its batch i.e. a batch with ``N`` rows. + :param prob (float): Probability of an element to be dropped. + :param block_size (int): Size of the block to drop. + + :return: Tensor[N, C, H, W]: The mask of activate pixels. + """ + import torch + + if prob < 0.0 or prob > 1.0: + raise ValueError(f"drop probability has to be between 0 and 1, but got {prob}.") + if x.ndim != 4: + raise ValueError(f"input should be 4 dimensional. Got {x.ndim} dimensions.") + + N, _, H, W = x.size() # pylint: disable=C0103 + block_size = min(block_size, W, H) + # compute the gamma of Bernoulli distribution + gamma = (prob * H * W) / ((block_size**2) * ((H - block_size + 1) * (W - block_size + 1))) + noise = torch.empty((N, 1, H - block_size + 1, W - block_size + 1), dtype=x.dtype, device=x.device) + noise.bernoulli_(gamma) + + noise = torch.nn.functional.pad(noise, [block_size // 2] * 4, value=0) + noise = torch.nn.functional.max_pool2d( + noise, stride=(1, 1), kernel_size=(block_size, block_size), padding=block_size // 2 + ) + mask = 1 - noise + return mask diff --git a/art/attacks/evasion/steal_now_attack_later/steal_now_attack_later.py b/art/attacks/evasion/steal_now_attack_later/steal_now_attack_later.py new file mode 100644 index 0000000000..4dfe47f770 --- /dev/null +++ b/art/attacks/evasion/steal_now_attack_later/steal_now_attack_later.py @@ -0,0 +1,734 @@ +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +This module implements the paper: Steal Now and Attack Later: Evaluating Robustness of Object Detection against +Black-box Adversarial Attacks + +| Paper link: https://arxiv.org/abs/2304.05370 +""" + +# pylint: disable=C0302 + +import logging +import random +from typing import Callable, Optional, Tuple, TYPE_CHECKING + +import numpy as np + +from art.attacks.attack import EvasionAttack +from art.attacks.evasion.steal_now_attack_later.bbox_ioa import bbox_ioa +from art.attacks.evasion.steal_now_attack_later.drop_block2d import drop_block2d + +if TYPE_CHECKING: + # pylint: disable=C0412 + import torch + from art.utils import PYTORCH_OBJECT_DETECTOR_TYPE + +logger = logging.getLogger(__name__) + + +# tiling +def _generate_tile_kernel(patch: list, mask: list, tile_size: int) -> Tuple["torch.Tensor", "torch.Tensor"]: + """ + Generate specific size of pertuerbed tiles from randomly selected patches. + + :param patch: Candiate patches. + :param mask: Masks for each patch. + :param tile_size: The size of each tile. + :return: Pertuerbed tiles and corresponding maskes. + """ + import torch + import torchvision + + idx_seq = list(range(len(patch))) + target = random.sample(idx_seq, k=1)[0] + t_patch = patch[target] + t_mask = mask[target] + if t_mask is None: + t_mask = torch.ones_like(t_patch) + width, height = t_patch.shape[-2], t_patch.shape[-1] + boundary = 1 + tile_size = max(tile_size - 2 * boundary, 1) + + if height > width: + flip = True + FlipOp = torchvision.transforms.RandomVerticalFlip(0.2) # pylint: disable=C0103 + max_len = height + min_len = width + t_patch = torch.permute(t_patch, (0, 2, 1)) + t_mask = torch.permute(t_mask, (0, 2, 1)) + else: + flip = False + FlipOp = torchvision.transforms.RandomHorizontalFlip(0.2) # pylint: disable=C0103 + max_len = width + min_len = height + + if max_len > tile_size: + new_len = round(min_len * tile_size / max_len) + p_1 = torchvision.transforms.Resize((tile_size, new_len))(t_patch) + # fix for the case that (strides - new_len) > new_len + p_list = [] + + for _ in range(tile_size // new_len): + p_list.append(FlipOp(p_1)) + + p_2 = torchvision.transforms.RandomCrop((tile_size, tile_size % new_len))(p_1) + p_list.append(FlipOp(p_2)) + + n_patch = torch.cat(p_list, dim=-1) + n_patch = torchvision.transforms.CenterCrop((tile_size + 2 * boundary, tile_size + 2 * boundary))(n_patch) + n_mask = torch.where(n_patch == 0, torch.zeros_like(n_patch), torch.ones_like(n_patch)) + + elif max_len >= tile_size / 2.0: + new_len = round(min_len * (tile_size / 2.0) / max_len) + + p_list = [] + for _ in range(tile_size // new_len): + repeat = 2 + p1_list = [] + for _ in range(repeat): + p_1 = torchvision.transforms.Resize((tile_size // 2, new_len))(t_patch) + if torch.rand([]) < 0.6: + p1_list.append(FlipOp(p_1)) + else: + p1_list.append(torch.zeros_like(p_1)) + p_1 = torch.cat(p1_list, dim=-2) + p_list.append(p_1) + + p_2 = torchvision.transforms.RandomCrop((tile_size, tile_size % new_len))(p_1) + p_list.append(FlipOp(p_2)) + + n_patch = torch.cat(p_list, dim=-1) + n_patch = torchvision.transforms.CenterCrop((tile_size + 2 * boundary, tile_size + 2 * boundary))(n_patch) + n_mask = torch.where(n_patch == 0, torch.zeros_like(n_patch), torch.ones_like(n_patch)) + + else: + t_1 = torch.cat([t_patch[None, :], t_mask[None, :]], dim=0) + p_list = [] + n_list = [] + for _ in range(tile_size // min_len): + p1_list = [] + m1_list = [] + for _ in range(tile_size // max_len): + if torch.rand([]) < 0.4: + t_1 = FlipOp(t_1) + p1_list.append(t_1[0, :]) + m1_list.append(t_1[1, :]) + else: + p1_list.append(torch.zeros_like(t_patch)) + m1_list.append(torch.zeros_like(t_mask)) + p_1 = torch.cat(p1_list, dim=-2) + m_1 = torch.cat(m1_list, dim=-2) + p_list.append(p_1) + n_list.append(m_1) + n_patch = torch.cat(p_list, dim=-1) + n_mask = torch.cat(n_list, dim=-1) + n_patch = torchvision.transforms.CenterCrop((tile_size + 2 * boundary, tile_size + 2 * boundary))(n_patch) + n_mask = torchvision.transforms.CenterCrop((tile_size + 2 * boundary, tile_size + 2 * boundary))(n_mask) + + if flip: + n_patch = torch.permute(n_patch, (0, 2, 1)) + n_mask = torch.permute(n_mask, (0, 2, 1)) + + return n_patch, n_mask + + +def generate_tile(patches: list, masks: list, tile_size: int, scale: list) -> Tuple["torch.Tensor", "torch.Tensor"]: + """ + Generate different size of pertuerbed tiles from randomly selected patches. + + :param patch: Candiate patches. + :param mask: Masks for each patch. + :param tile_size: The size of each tile. + :param scale: Scale factor for various tileing size. + :return: Pertuerbed tiles and corresponding maskes. + """ + import torch + + if len(patches) == 0: + raise ValueError("candidates should not be empty.") + device = patches[0].device + + tile = torch.zeros((0, 3, tile_size, tile_size), device=device) + mask = torch.zeros((0, 3, tile_size, tile_size), device=device) + for cur_s in scale: + cur_strides = tile_size // cur_s + cur_tile = [] + cur_mask = [] + + for _ in range(cur_s): + t1_list = [] + m1_list = [] + for _ in range(cur_s): + g_tile, f_mask = _generate_tile_kernel(patches, masks, tile_size=cur_strides) + t1_list.append(g_tile[None, :]) + m1_list.append(f_mask[None, :]) + cur_t = torch.cat(t1_list, dim=-2) + cur_m = torch.cat(m1_list, dim=-2) + cur_tile.append(cur_t) + cur_mask.append(cur_m) + cur_tile = torch.cat(cur_tile, dim=-1) # type: ignore + cur_mask = torch.cat(cur_mask, dim=-1) # type: ignore + + tile = torch.cat([tile, cur_tile], dim=0) # type: ignore + mask = torch.cat([mask, cur_mask], dim=0) # type: ignore + + return tile, mask + + +class TileObj: + """ + Internally used object that stores information about each tile. + """ + + def __init__(self, tile_size: int, device: "torch.device") -> None: + """ + Create a tile instance. + """ + import torch + + self.patch = torch.zeros((3, tile_size, tile_size), device=device) + self.diff = torch.ones([], device=device) * self.patch.shape.numel() + self.bcount = 0 + self.eligible = False + + def update(self, eligible=None, bcount=None, diff=None, patch=None) -> None: + """ + Update the properties of the object + """ + if eligible is not None: + self.eligible = eligible + + if bcount is not None: + self.bcount = bcount + + if diff is not None: + self.diff = diff + + if patch is not None: + self.patch = patch + + def compare(self, target: "TileObj") -> bool: + """ + Comparison operation. + """ + + if self.eligible is True and target.eligible is False: + return True + + if self.eligible is False and target.eligible is True: + return False + + if self.bcount > target.bcount: + return True + if self.bcount < target.bcount: + return False + + return bool(self.diff < target.diff) + + +class TileArray: + """ + Internally used object that stores the list of tiles. + """ + + def __init__(self, xyxy: list, threshold: int, tile_size: int, k: int, device: "torch.device") -> None: + """ + Initialization operation. + """ + import torch + + self.threshold = threshold + self.tile_size = tile_size + self.device = device + self.xyxy = torch.Tensor(xyxy).to(device) + self.k = k + self.patch_list = [TileObj(tile_size=tile_size, device=device)] * self.k + + def insert(self, target: TileObj) -> None: + """ + Insertion operation. + """ + if target.bcount < self.threshold: + return + + prev = self.patch_list + out = [] + for k_it in range(self.k): + if target.compare(prev[k_it]): + out.append(target) + out = out + prev[k_it:] + break + + out.append(prev[k_it]) + + self.patch_list = out[: self.k] + + def pop(self) -> None: + """ + Pop operation. + """ + out = self.patch_list[1:] + [TileObj(tile_size=self.tile_size, device=self.device)] + self.patch_list = out + + +class SNAL(EvasionAttack): + """ + Steal Now and Attack Later + + | Paper link: https://arxiv.org/abs/2404.15881 + """ + + attack_params = EvasionAttack.attack_params + [ + "eps", + "max_iter", + "num_grid", + "batch_size", + ] + + _estimator_requirements = () + + def __init__( + self, + estimator: "PYTORCH_OBJECT_DETECTOR_TYPE", + candidates: list, + collector: Callable, + eps: float, + max_iter: int, + num_grid: int, + ) -> None: + """ + Create a SNAL attack instance. + + :param estimator: A trained YOLOv8 model or other models with the same output format + :param candidates: The collected pateches to generate perturbations. + :param collector: A callbel uses to generate patches. + :param eps: Maximum perturbation that the attacker can introduce. + :param max_iter: The maximum number of iterations. + :param num_grid: The number of grids for width and high dimension. + """ + super().__init__(estimator=estimator) + self.eps = eps + self.max_iter = max_iter + self.num_grid = num_grid + self.batch_size = 1 + self.candidates = candidates + self.threshold_objs = 1 # the expect number of objects + self.collector = collector + self._check_params() + + def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: + """ + Generate adversarial samples and return them in an array. + + :param x: An array with the original inputs to be attacked. + :param y: Not used. + :return: An array holding the adversarial examples. + """ + + # Compute adversarial examples with implicit batching + x_adv = x.copy() + for batch_id in range(int(np.ceil(x_adv.shape[0] / float(self.batch_size)))): + batch_index_1 = batch_id * self.batch_size + batch_index_2 = min((batch_id + 1) * self.batch_size, x_adv.shape[0]) + x_batch = x_adv[batch_index_1:batch_index_2] + x_adv[batch_index_1:batch_index_2] = self._generate_batch(x_batch) + + return x_adv + + def _generate_batch( + self, x_batch: np.ndarray, y_batch: Optional[np.ndarray] = None # pylint: disable=W0613 + ) -> np.ndarray: + """ + Run the attack on a batch of images. + + :param x_batch: A batch of original examples. + :param y_batch: Not Used. + :return: A batch of adversarial examples. + """ + import torch + + x_org = torch.from_numpy(x_batch).to(self.estimator.device) + x_adv = x_org.clone() + + cond = torch.logical_or(x_org < 0.0, x_org > 1.0) + if torch.any(cond): + raise ValueError("The value of each pixel must be normalized in the range [0, 1].") + + x_adv = self._attack(x_adv, x_org) + + return x_adv.cpu().detach().numpy() + + def _attack(self, x_adv: "torch.Tensor", x: "torch.Tensor") -> "torch.Tensor": + """ + Run attack. + + :param x_batch: A batch of original examples. + :param y_batch: Not Used. + :return: A batch of adversarial examples. + """ + import torch + + if self.candidates is None: + raise ValueError("A set of patches should be collected before executing the attack.") + + if x.shape[-1] % self.num_grid != 0 or x.shape[-2] % self.num_grid != 0: + raise ValueError("The size of the image must be divided by the number of grids") + tile_size = x.shape[-1] // self.num_grid + + # Prapare a 2D array to store the results of each grid + buffer_depth = 5 + tile_mat = {} + for idx_i in range(self.num_grid): + for idx_j in range(self.num_grid): + x_1 = idx_i * tile_size + y_1 = idx_j * tile_size + x_2 = x_1 + tile_size + y_2 = y_1 + tile_size + tile_mat[(idx_i, idx_j)] = TileArray( + list([x_1, y_1, x_2, y_2]), self.threshold_objs, tile_size, buffer_depth, self.estimator.device + ) + + # init guess + n_samples = 10 + x_adv, tile_mat = self._init_guess(tile_mat, x_adv, x, tile_size, n_samples=n_samples) + + batch_idx = 0 + candidates_patch = self.candidates + candidates_mask = [None] * len(candidates_patch) + + r_tile = torch.zeros((0, 3, tile_size, tile_size), device=self.estimator.device) + r_mask = torch.zeros((0, 3, tile_size, tile_size), device=self.estimator.device) + while r_tile.shape[0] < n_samples: + t_tile, t_mask = generate_tile(candidates_patch, candidates_mask, tile_size, [1, 2]) + r_tile = torch.cat([r_tile, t_tile], dim=0) + r_mask = torch.cat([r_mask, t_mask], dim=0) + + for _ in range(self.max_iter): + adv_patch, adv_position = self.collector(self.estimator, x_adv) + adv_position = adv_position[0] + candidates_patch = candidates_patch + adv_patch[0] + candidates_mask = candidates_mask + [None] * len(adv_patch[0]) + + for key, obj in tile_mat.items(): + idx_i, idx_j = key + box_1 = obj.xyxy + obj_threshold = obj.threshold + [x_1, y_1, x_2, y_2] = box_1.type(torch.IntTensor) # type: ignore + overlay = bbox_ioa(box_1.type(torch.FloatTensor), adv_position.type(torch.FloatTensor)) # type: ignore + bcount = torch.sum(overlay > 0.0).item() + + pert = x_adv[batch_idx, :, y_1:y_2, x_1:x_2] - x[batch_idx, :, y_1:y_2, x_1:x_2] + loss = self._get_loss(pert, self.eps) + eligible = torch.max(torch.abs(pert)) < self.eps and bcount >= obj_threshold + tpatch_cur = TileObj(tile_size=tile_size, device=self.estimator.device) + tpatch_cur.update(eligible, bcount, torch.sum(loss), x_adv[batch_idx, :, y_1:y_2, x_1:x_2].clone()) + + # insert op + prev = tile_mat[(idx_i, idx_j)] + prev.insert(tpatch_cur) + tile_mat[(idx_i, idx_j)] = prev + + sorted_patch = tile_mat[(idx_i, idx_j)].patch_list + bcount_list = [] + for cur_sp in sorted_patch: + if cur_sp.bcount >= obj_threshold: + bcount_list.append(cur_sp) + + if len(bcount_list) == buffer_depth and bcount_list[-1].bcount > obj_threshold: + tile_mat[(idx_i, idx_j)].threshold = obj_threshold + 1 + + if len(bcount_list) < buffer_depth: + + while r_tile.shape[0] < int(1.5 * n_samples): + t_tile, t_mask = generate_tile(candidates_patch, candidates_mask, tile_size, [1, 2]) + r_tile = torch.cat([r_tile, t_tile], dim=0) + r_mask = torch.cat([r_mask, t_mask], dim=0) + + # select n_sample candidates + c_tile = r_tile + idx_perm = torch.randperm(c_tile.shape[0]) + idx_perm = idx_perm[:n_samples] + c_tile = r_tile[idx_perm, :] + c_mask = r_mask[idx_perm, :] + x_ref = x[:, :, y_1:y_2, x_1:x_2] + + updated = ((1.0 - c_mask) * x_ref) + c_mask * (0.0 * x_ref + 1.0 * c_tile) + + n_mask = drop_block2d(c_mask, 0.05, 1) + updated = (1.0 - n_mask) * x_ref + n_mask * updated + pert = updated - x_ref + + loss = torch.sum(self._get_loss(pert, self.eps), dim=(1, 2, 3)) + min_idx = torch.min(loss, dim=0).indices.item() + updated = updated[min_idx, :] + updated = updated[None, :] + + else: + target = bcount_list[0].patch[None, :] + x_ref = x[batch_idx, :, y_1:y_2, x_1:x_2] + updated = self._color_projection(target, x_ref, self.eps) + + x_adv[batch_idx, :, y_1:y_2, x_1:x_2] = updated + x_adv = torch.round(x_adv * 255.0) / 255.0 + x_adv = torch.clamp(x_adv, x - 2.5 * self.eps, x + 2.5 * self.eps) + x_adv = torch.clamp(x_adv, 0.0, 1.0) + + x_out = self._assemble(tile_mat, x) + mask = torch.zeros_like(x_out) + _, adv_position = self.collector(self.estimator, x_out) + for pos in adv_position[0]: + mask[:, :, pos[1] : pos[3], pos[0] : pos[2]] = mask[:, :, pos[1] : pos[3], pos[0] : pos[2]] + 1 + mask = torch.where(mask > 0, torch.ones_like(mask), torch.zeros_like(mask)) + x_adv = mask * x_out + (1.0 - mask) * x + x_adv = torch.clamp(x_adv, x - self.eps, x + self.eps) + x_adv = torch.clamp(x_adv, 0.0, 1.0) + + return x_adv + + def _get_loss(self, pert: "torch.Tensor", epsilon: float) -> "torch.Tensor": # pylint: disable=R0201 + """ + Calculate accumulated distance of the perturbations outside the epslion ball. + + :param pert: Perturbations in the pixel space. + :param epsilon: The radius of the eplion bass. + :return: loss. + """ + import torch + + count = torch.where(pert == 0, torch.zeros_like(pert), torch.ones_like(pert)) + pert = torch.where(torch.abs(pert) <= epsilon, torch.zeros_like(pert), pert) + pert = torch.abs(pert) + loss = torch.sqrt(pert) / torch.sum(count) + + return loss + + def _color_projection( # pylint: disable=R0201 + self, tile: "torch.Tensor", x_ref: "torch.Tensor", epsilon: float + ) -> "torch.Tensor": + """ + Convert statistics information from target to source. + + :param tile: The target to convert. + :param x_ref: The source data. + :param epsilon: The radius of the eplion bass. + :return: The converted tile. + """ + import torch + + if len(tile.shape) == 3: + tile = tile[None, :] + if len(x_ref.shape) == 3: + x_ref = x_ref[None, :] + + pert = tile - x_ref + cond = torch.abs(pert) > epsilon + sign = (torch.rand_like(pert) - 0.5) * 2 + + u_bound = torch.max(pert, torch.ones_like(pert) * epsilon) + l_bound = torch.min(pert, torch.ones_like(pert) * -epsilon) + set1 = torch.where(sign > 0, 0.5 * pert, pert - torch.sign(pert) * epsilon) + set1 = torch.clamp(set1, l_bound, u_bound) + set1 = set1 + x_ref + + set2 = tile + mean_s = torch.mean(x_ref, dim=(-2, -1), keepdim=True) + mean_t = torch.mean(x_ref, dim=(-2, -1), keepdim=True) + std_s = torch.std(set2, dim=(-2, -1), keepdim=True) + std_t = torch.std(set2, dim=(-2, -1), keepdim=True) + scale = std_s / std_t + set2 = (set2 - mean_t) * scale + mean_s + set2 = torch.clamp(set2, 0.0, 1.0) + + set2 = set2 + sign * epsilon * scale + set2 = torch.clamp(set2, 0, 1) + + updated = torch.where(cond, set1, set2) + + return updated + + def _assemble(self, tile_mat: dict, x_org: "torch.Tensor") -> "torch.Tensor": # pylint: disable=R0201 + """ + Combine the best patches from each grid into a single image. + + :param tile_mat: Internal structure used to store patches for each mesh. + :param x_org: The original images. + :return: Perturbed images. + """ + import torch + + ans = x_org.clone() + for obj in tile_mat.values(): + [x_1, y_1, x_2, y_2] = obj.xyxy.type(torch.IntTensor) + tile = obj.patch_list[0].patch[None, :] + mask = torch.where(tile != 0, torch.ones_like(tile), torch.zeros_like(tile)) + ans[0, :, y_1:y_2, x_1:x_2] = mask * tile + (1.0 - mask) * ans[0, :, y_1:y_2, x_1:x_2] + return ans + + def _init_guess( + self, tile_mat: dict, x_init: "torch.Tensor", x_org: "torch.Tensor", tile_size: int, n_samples: int + ) -> Tuple["torch.Tensor", dict]: + """ + Generate an initial perturbation for each grid. + + :param tile_mat: Internal structure used to store patches for each mesh. + :param x_init: Perturbed images from previous runs. + :param x_org: The original images. + :param tile_size: The size of each tile. + :return: Guessed images and internal structure. + """ + import torch + + TRIAL = 10 # pylint: disable=C0103 + patches = self.candidates + masks = [None] * len(self.candidates) + for _ in range(TRIAL): + x_cand = torch.zeros( + (n_samples, 3, x_init.shape[-2], x_init.shape[-1]), dtype=x_init.dtype, device=self.estimator.device + ) + + # generate tiles + # To save the computing time, we generate some tiles in advance. + # partial tiles are updated on-the-fly + r_tile = torch.zeros((0, 3, tile_size, tile_size), device=self.estimator.device) + r_mask = torch.zeros((0, 3, tile_size, tile_size), device=self.estimator.device) + while r_tile.shape[0] < n_samples: + t_tile, t_mask = generate_tile(patches, masks, tile_size, [1, 2]) + r_tile = torch.cat([r_tile, t_tile], dim=0) + r_mask = torch.cat([r_mask, t_mask], dim=0) + + for _, obj in tile_mat.items(): + # select n_samples + while r_tile.shape[0] < int(1.5 * n_samples): + t_tile, t_mask = generate_tile(patches, masks, tile_size, [1, 2]) + r_tile = torch.cat([r_tile, t_tile], dim=0) + r_mask = torch.cat([r_mask, t_mask], dim=0) + + idx_perm = torch.randperm(r_tile.shape[0]) + idx_perm = idx_perm[:n_samples] + tile_perm = r_tile[idx_perm, :] + mask_perm = r_mask[idx_perm, :] + + # merge tiles + box_1 = obj.xyxy + [x_1, y_1, x_2, y_2] = box_1.type(torch.IntTensor) + x_ref = x_init[:, :, y_1:y_2, x_1:x_2] + x_new = ((1.0 - mask_perm) * x_ref) + mask_perm * (0.0 * x_ref + 1.0 * tile_perm) + + # randomly roll-back + rand_rb = torch.rand([n_samples, 1, 1, 1], device=self.estimator.device) + x_new = torch.where(rand_rb < 0.8, x_new, x_ref) + x_cand[:, :, y_1:y_2, x_1:x_2] = x_new + + # spatial drop + n_mask = drop_block2d(x_cand, 0.05, 3) + x_cand = (1.0 - n_mask) * x_org + n_mask * x_cand + # x_cand = smooth_image(x_cand, x_org, epsilon, 10) + x_cand = torch.round(x_cand * 255.0) / 255.0 + x_cand = torch.clamp(x_cand, x_org - 2.5 * self.eps, x_org + 2.5 * self.eps) + x_cand = torch.clamp(x_cand, 0.0, 1.0) + + # update results + _, adv_position = self.collector(self.estimator, x_cand) + for idx in range(n_samples): + cur_position = adv_position[idx] + + for key, obj in tile_mat.items(): + + idx_i, idx_j = key + box_1 = obj.xyxy + obj_threshold = obj.threshold + [x_1, y_1, x_2, y_2] = box_1.type(torch.IntTensor) + overlay = bbox_ioa(box_1.type(torch.FloatTensor), cur_position.type(torch.FloatTensor)) + bcount = torch.sum(overlay > 0.0).item() + + x_ref = x_org[:, :, y_1:y_2, x_1:x_2] + x_cur = x_cand[idx, :, y_1:y_2, x_1:x_2].clone() + + pert = x_cur - x_ref + loss = self._get_loss(pert, self.eps) + eligible = torch.max(torch.abs(pert)) < self.eps and bcount >= obj_threshold + tpatch_cur = TileObj(tile_size=tile_size, device=self.estimator.device) + tpatch_cur.update(eligible, bcount, torch.sum(loss), x_cur) + # insert op + prev = tile_mat[(idx_i, idx_j)] + prev.insert(tpatch_cur) + tile_mat[(idx_i, idx_j)] = prev + + # clean non-active regions + x_out = x_init.clone() + x_eval = self._assemble(tile_mat, x_org) + _, adv_position = self.collector(self.estimator, x_eval) + cur_position = adv_position[0] + for key, obj in tile_mat.items(): + idx_i, idx_j = key + box_1 = obj.xyxy + [x_1, y_1, x_2, y_2] = box_1.type(torch.IntTensor) + overlay = bbox_ioa(box_1.type(torch.FloatTensor), cur_position.type(torch.FloatTensor)) + bcount = torch.sum(overlay > 0.0).item() + + x_ref = x_init[:, :, y_1:y_2, x_1:x_2] + x_tag = x_eval[:, :, y_1:y_2, x_1:x_2] + cur_mask = torch.zeros_like(x_ref) + if bcount > 1: + bbox = cur_position[overlay > 0.0] + for box in bbox: + bx1 = torch.clamp_min(box[0] - x_1, 0) + by1 = torch.clamp_min(box[1] - y_1, 0) + bx2 = torch.clamp_max(box[2] - x_1, (x_2 - x_1 - 1).to(self.estimator.device)) + by2 = torch.clamp_max(box[3] - y_1, (y_2 - y_1 - 1).to(self.estimator.device)) + cur_mask[:, :, by1:by2, bx1:bx2] = 1.0 + else: + prev = tile_mat[(idx_i, idx_j)] + prev.pop() + tile_mat[(idx_i, idx_j)] = prev + + a_mask = drop_block2d(x_ref, 0.05, 1) + cur_mask = cur_mask * a_mask + updated = ((1.0 - cur_mask) * x_ref) + cur_mask * (0.0 * x_ref + 1.0 * x_tag) + updated = ((1.0 - cur_mask) * x_ref) + cur_mask * (0.0 * x_ref + 1.0 * updated) + + x_out[:, :, y_1:y_2, x_1:x_2] = updated + + return x_out, tile_mat + + def _check_params(self) -> None: + + if not isinstance(self.eps, float): + raise TypeError("The eps has to be of type float.") + + if self.eps < 0 or self.eps > 1: + raise ValueError("The eps must be in the range [0, 1].") + + if not isinstance(self.max_iter, int): + raise TypeError("The max_iter has to be of type int.") + + if self.max_iter < 1: + raise ValueError("The number of iterations must be a positive integer.") + + if not isinstance(self.num_grid, int): + raise TypeError("The num_grid has to be of type int.") + + if self.num_grid < 1: + raise ValueError("The number of grid must be a positive integer.") + + if not isinstance(self.candidates, list): + raise TypeError("Candidates must be stored in list.") + + if len(self.candidates) < 1: + raise ValueError("The list of candidates is empty.") diff --git a/art/defences/detector/evasion/__init__.py b/art/defences/detector/evasion/__init__.py index 26112a2afe..2a08f29e7a 100644 --- a/art/defences/detector/evasion/__init__.py +++ b/art/defences/detector/evasion/__init__.py @@ -6,3 +6,4 @@ from art.defences.detector.evasion.binary_input_detector import BinaryInputDetector from art.defences.detector.evasion.binary_activation_detector import BinaryActivationDetector from art.defences.detector.evasion.subsetscanning.detector import SubsetScanningDetector +from art.defences.detector.evasion.beyond_detector import BeyondDetectorPyTorch diff --git a/art/defences/detector/evasion/beyond_detector.py b/art/defences/detector/evasion/beyond_detector.py new file mode 100644 index 0000000000..e48c056ea6 --- /dev/null +++ b/art/defences/detector/evasion/beyond_detector.py @@ -0,0 +1,185 @@ +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +This module implements the BEYOND detector for adversarial examples detection. + +| Paper link: https://openreview.net/pdf?id=S4LqI6CcJ3 +""" +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Callable + +import numpy as np + +if TYPE_CHECKING: + import torch + from art.utils import CLASSIFIER_NEURALNETWORK_TYPE + + +from art.defences.detector.evasion.evasion_detector import EvasionDetector + + +class BeyondDetectorPyTorch(EvasionDetector): + """ + BEYOND detector for adversarial samples detection. + This detector uses a combination of SSL and target model predictions to detect adversarial examples. + + | Paper link: https://openreview.net/pdf?id=S4LqI6CcJ3 + """ + + defence_params = ["target_model", "ssl_model", "augmentations", "aug_num", "alpha", "var_K", "percentile"] + + def __init__( + self, + target_classifier: "CLASSIFIER_NEURALNETWORK_TYPE", + ssl_classifier: "CLASSIFIER_NEURALNETWORK_TYPE", + augmentations: Callable, + aug_num: int = 50, + alpha: float = 0.8, + var_K: int = 20, + percentile: int = 5, + ) -> None: + """ + Initialize the BEYOND detector. + + :param target_classifier: The target model to be protected + :param ssl_classifier: The self-supervised learning model used for feature extraction + :param augmentations: data augmentations for generating neighborhoods + :param aug_num: Number of augmentations to apply to each sample (default: 50) + :param alpha: Weight factor for combining label and representation similarities (default: 0.8) + :param var_K: Number of top similarities to consider (default: 20) + :param percentile: using to calculate the threshold + """ + import torch + + super().__init__() + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + self.target_model = target_classifier.model.to(self.device) + self.ssl_model = ssl_classifier.model.to(self.device) + self.aug_num = aug_num + self.alpha = alpha + self.var_K = var_K + + self.backbone = self.ssl_model.backbone + self.model_classifier = self.ssl_model.classifier + self.projector = self.ssl_model.projector + + self.img_augmentations = augmentations + + self.percentile = percentile # determine the threshold + self.threshold: float | None = None + + def _multi_transform(self, img: "torch.Tensor") -> "torch.Tensor": + import torch + + return torch.stack([self.img_augmentations(img) for _ in range(self.aug_num)], dim=1) + + def _get_metrics(self, x: np.ndarray, batch_size: int = 128) -> np.ndarray: + """ + Calculate similarities that combining label consistency and representation similarity for given samples + + :param x: Input samples + :param batch_size: Batch size for processing + :return: A report similarities + """ + import torch + import torch.nn.functional as F + + samples = torch.from_numpy(x).to(self.device) + + self.target_model.eval() + self.backbone.eval() + self.model_classifier.eval() + self.projector.eval() + + number_batch = int(math.ceil(len(samples) / batch_size)) + + similarities_list = [] + + with torch.no_grad(): + for index in range(number_batch): + start = index * batch_size + end = min((index + 1) * batch_size, len(samples)) + + batch_samples = samples[start:end] + b, c, h, w = batch_samples.shape + + trans_images = self._multi_transform(batch_samples).to(self.device) + ssl_backbone_out = self.backbone(batch_samples) + + ssl_repre = self.projector(ssl_backbone_out) + ssl_pred = self.model_classifier(ssl_backbone_out) + ssl_label = torch.max(ssl_pred, -1)[1] + + aug_backbone_out = self.backbone(trans_images.reshape(-1, c, h, w)) + aug_repre = self.projector(aug_backbone_out) + aug_pred = self.model_classifier(aug_backbone_out) + aug_pred = aug_pred.reshape(b, self.aug_num, -1) + + sim_repre = F.cosine_similarity( + ssl_repre.unsqueeze(dim=1), aug_repre.reshape(b, self.aug_num, -1), dim=2 + ) + + sim_preds = F.cosine_similarity( + F.one_hot(ssl_label, num_classes=ssl_pred.shape[-1]).unsqueeze(dim=1), + aug_pred, + dim=2, + ) + + similarities_list.append( + (self.alpha * sim_preds + (1 - self.alpha) * sim_repre).sort(descending=True)[0].cpu().numpy() + ) + + similarities = np.concatenate(similarities_list, axis=0) + + return similarities + + def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: int = 20, **kwargs) -> None: + """ + Determine a threshold that covers 95% of clean samples. + + :param x: Clean sample data + :param y: Clean sample labels (not used in this method) + :param batch_size: Batch size for processing + :param nb_epochs: Number of training epochs (not used in this method) + """ + clean_metrics = self._get_metrics(x=x, batch_size=batch_size) + k_minus_one_metrics = clean_metrics[:, self.var_K - 1] + self.threshold = np.percentile(k_minus_one_metrics, q=self.percentile) + + def detect(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> tuple[np.ndarray, np.ndarray]: # type: ignore + """ + Detect whether given samples are adversarial + + :param x: Input samples + :param batch_size: Batch size for processing + :return: (report, is_adversarial): + where report containing detection results + where is_adversarial is a boolean list indicating whether samples are adversarial or not + """ + if self.threshold is None: + raise ValueError("Detector has not been fitted. Call fit() before detect().") + + similarities = self._get_metrics(x, batch_size) + + report = similarities[:, self.var_K - 1] + is_adversarial = report < self.threshold + + return report, is_adversarial diff --git a/art/estimators/certification/randomized_smoothing/randomized_smoothing.py b/art/estimators/certification/randomized_smoothing/randomized_smoothing.py index 3f8a2cd7e3..75f3bbd8ff 100644 --- a/art/estimators/certification/randomized_smoothing/randomized_smoothing.py +++ b/art/estimators/certification/randomized_smoothing/randomized_smoothing.py @@ -85,7 +85,7 @@ def predict(self, x: np.ndarray, batch_size: int = 128, verbose: bool = False, * :type is_abstain: `boolean` :return: Array of predictions of shape `(nb_inputs, nb_classes)`. """ - from scipy.stats import binom_test + from scipy.stats import binomtest is_abstain = kwargs.get("is_abstain") if is_abstain is not None and not isinstance(is_abstain, bool): # pragma: no cover @@ -100,12 +100,15 @@ def predict(self, x: np.ndarray, batch_size: int = 128, verbose: bool = False, * # get class counts counts_pred = self._prediction_counts(x_i, batch_size=batch_size) top = counts_pred.argsort()[::-1] - count1 = np.max(counts_pred) - count2 = counts_pred[top[1]] + # conversion to int + count1 = int(np.max(counts_pred)) + count2 = int(counts_pred[top[1]]) # predict or abstain smooth_prediction = np.zeros(counts_pred.shape) - if (not is_abstain) or (binom_test(count1, count1 + count2, p=0.5) <= self.alpha): + # get p value from BinomTestResult object + p_value = binomtest(count1, count1 + count2, p=0.5).pvalue + if (not is_abstain) or (p_value <= self.alpha): smooth_prediction[np.argmax(counts_pred)] = 1 elif is_abstain: n_abstained += 1 diff --git a/art/estimators/classification/classifier.py b/art/estimators/classification/classifier.py index 33e52202ff..33db5bb926 100644 --- a/art/estimators/classification/classifier.py +++ b/art/estimators/classification/classifier.py @@ -116,7 +116,7 @@ def nb_classes(self, nb_classes: int): """ Set the number of output classes. """ - if nb_classes is None or nb_classes < 2: + if nb_classes is None or (isinstance(nb_classes, (int, np.integer)) and nb_classes < 2): raise ValueError("nb_classes must be greater than or equal to 2.") self._nb_classes = nb_classes diff --git a/art/estimators/object_detection/pytorch_object_detector.py b/art/estimators/object_detection/pytorch_object_detector.py index 3d4cc92e29..ff0fbeaa9d 100644 --- a/art/estimators/object_detection/pytorch_object_detector.py +++ b/art/estimators/object_detection/pytorch_object_detector.py @@ -66,6 +66,7 @@ def __init__( "loss_rpn_box_reg", ), device_type: str = "gpu", + is_yolov8: bool = False, ): """ Initialization. @@ -93,6 +94,7 @@ def __init__( 'loss_objectness', and 'loss_rpn_box_reg'. :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU if available otherwise run on CPU. + :param is_yolov8: The flag to be used for marking the YOLOv8 model. """ import torch import torchvision @@ -137,7 +139,11 @@ def __init__( self._model: torch.nn.Module self._model.to(self._device) - self._model.eval() + self.is_yolov8 = is_yolov8 + if self.is_yolov8: + self._model.model.eval() + else: + self._model.eval() @property def native_label_is_pytorch_format(self) -> bool: @@ -403,7 +409,10 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> list[dict[s from torch.utils.data import TensorDataset, DataLoader # Set model to evaluation mode - self._model.eval() + if self.is_yolov8: + self._model.model.eval() + else: + self._model.eval() # Apply preprocessing and convert to tensors x_preprocessed, _ = self._preprocess_and_convert_inputs(x=x, y=None, fit=False, no_grad=True) diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py index cd7fc69b55..cfe251e8bc 100644 --- a/art/estimators/object_detection/pytorch_yolo.py +++ b/art/estimators/object_detection/pytorch_yolo.py @@ -64,6 +64,7 @@ def __init__( "loss_rpn_box_reg", ), device_type: str = "gpu", + is_yolov8: bool = False, ): """ Initialization. @@ -92,6 +93,7 @@ def __init__( 'loss_objectness', and 'loss_rpn_box_reg'. :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU if available otherwise run on CPU. + :param is_yolov8: The flag to be used for marking the YOLOv8 model. """ super().__init__( model=model, @@ -104,6 +106,7 @@ def __init__( preprocessing=preprocessing, attack_losses=attack_losses, device_type=device_type, + is_yolov8=is_yolov8, ) def _translate_labels(self, labels: list[dict[str, "torch.Tensor"]]) -> "torch.Tensor": diff --git a/art/utils.py b/art/utils.py index 102720505d..de211a830a 100644 --- a/art/utils.py +++ b/art/utils.py @@ -799,15 +799,18 @@ def check_and_transform_label_format( labels: np.ndarray, nb_classes: int | None, return_one_hot: bool = True ) -> np.ndarray: """ - Check label format and transform to one-hot-encoded labels if necessary + Check label format and transform to one-hot-encoded labels if necessary. Only supports single-output classification. :param labels: An array of integer labels of shape `(nb_samples,)`, `(nb_samples, 1)` or `(nb_samples, nb_classes)`. - :param nb_classes: The number of classes. If None the number of classes is determined automatically. + :param nb_classes: The number of classes, as an integer. If None the number of classes is determined automatically. :param return_one_hot: True if returning one-hot encoded labels, False if returning index labels. :return: Labels with shape `(nb_samples, nb_classes)` (one-hot) or `(nb_samples,)` (index). """ labels_return = labels + if nb_classes is not None and not isinstance(nb_classes, (int, np.integer)): + raise TypeError("nb_classes that is not an integer is not supported") + if len(labels.shape) == 2 and labels.shape[1] > 1: # multi-class, one-hot encoded if not return_one_hot: labels_return = np.argmax(labels, axis=1) diff --git a/docs/modules/attacks/evasion.rst b/docs/modules/attacks/evasion.rst index f6f41ad95c..39531bcea3 100644 --- a/docs/modules/attacks/evasion.rst +++ b/docs/modules/attacks/evasion.rst @@ -50,6 +50,12 @@ Auto Conjugate Gradient (Auto-CG) :members: :special-members: +Rescaling-Auto Conjugate Gradient (ReACG) +--------------------------------- +.. autoclass:: RescalingAutoConjugateGradient + :members: + :special-members: + Boundary Attack / Decision-Based Attack --------------------------------------- .. autoclass:: BoundaryAttack diff --git a/notebooks/snal.ipynb b/notebooks/snal.ipynb new file mode 100644 index 0000000000..22a663e9cc --- /dev/null +++ b/notebooks/snal.ipynb @@ -0,0 +1,871 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Steal Now and Attack Later\n", + "\n", + "This notebook provides a demonstration showing how to use ART to launch the SNAL attack [1].\n", + "\n", + "The core concept of this attack is to first collect objects from any model and then in a second step append valid patches to the target image and weaken the impact of unimportant pixels.\n", + "\n", + "\n", + "[1] Steal Now and Attack Later: Evaluating Robustness of Object Detection against Black-box Adversarial Attacks (https://arxiv.org/abs/2404.15881)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "\n", + "import numpy as np\n", + "import torch\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "logger.setLevel(level=logging.INFO)\n", + "logger.addHandler(logging.StreamHandler(sys.stdout))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#%% Download a target image from MS COCO dataset\n", + "from io import BytesIO\n", + "from PIL import Image\n", + "\n", + "import requests\n", + "TARGET = 'https://farm2.staticflickr.com/1065/705706084_39a7f28fc9_z.jpg' # val2017/000000552842.jpg\n", + "response = requests.get(TARGET)\n", + "org_img = np.asarray(Image.open(BytesIO(response.content)).resize((640, 640)))\n", + "org_x = np.stack([org_img.transpose((2, 0, 1))], axis=0).astype(np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "#%% Download YOLOv8 model\n", + "# If ultralytics is not found, please run the command: `pip install ultralytics`\n", + "from ultralytics import YOLO\n", + "from art.estimators.object_detection import PyTorchYolo\n", + "\n", + "model = YOLO('yolov8m')\n", + "py_model = PyTorchYolo(model=model,\n", + " input_shape=(3, 640, 640),\n", + " channels_first=True,\n", + " is_yolov8=True)\n", + "\n", + "# Define a custom function to collect patches from images\n", + "def collect_patches_from_images(model: \"torch.nn.Module\",\n", + " imgs: \"torch.Tensor\"):\n", + " \"\"\"\n", + " Collect patches and corrsponding spatial information by the model from images.\n", + "\n", + " :param model: Object detection model.\n", + " :param imgs: Target images.\n", + "\n", + " :return: Detected objects and corrsponding spatial information.\n", + " \"\"\"\n", + " import torch\n", + "\n", + " bs = imgs.shape[0]\n", + " with torch.no_grad():\n", + " pred = model.model(imgs)\n", + " y = []\n", + " for obj in pred:\n", + " y.append(obj.boxes.xyxy)\n", + "\n", + " candidates_patch = []\n", + " candidates_position = []\n", + " for i in range(bs):\n", + " patch = []\n", + " if y[i].shape[0] == 0:\n", + " candidates_patch.append(patch)\n", + " candidates_position.append(torch.zeros((0, 4), device=model.device))\n", + " continue\n", + "\n", + " pos_matrix = y[i][:, :4].clone().int()\n", + " pos_matrix[:, 0] = torch.clamp_min(pos_matrix[:, 0], 0)\n", + " pos_matrix[:, 1] = torch.clamp_min(pos_matrix[:, 1], 0)\n", + " pos_matrix[:, 2] = torch.clamp_max(pos_matrix[:, 2], imgs.shape[3])\n", + " pos_matrix[:, 3] = torch.clamp_max(pos_matrix[:, 3], imgs.shape[2])\n", + " for e in pos_matrix:\n", + " p = imgs[i, :, e[1]:e[3], e[0]:e[2]]\n", + " patch.append(p.to(model.device))\n", + "\n", + " candidates_patch.append(patch)\n", + " candidates_position.append(pos_matrix)\n", + "\n", + " return candidates_patch, candidates_position" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#%% Prepare dataset\n", + "import os\n", + "import time\n", + "\n", + "# Select images randomly from COCO dataset\n", + "list_url = ['http://farm4.staticflickr.com/3572/5744200926_082c11c43c_z.jpg', #000000460229\n", + " 'http://farm4.staticflickr.com/3010/2749181045_ed450e5d36_z.jpg', #000000057760\n", + " 'http://farm4.staticflickr.com/3826/9451771633_f14cef3a8b_z.jpg', #000000468332\n", + " 'http://farm7.staticflickr.com/6194/6106161903_e505cbc192_z.jpg', #000000190841\n", + " 'http://farm1.staticflickr.com/48/140268688_947e2bcc96_z.jpg', #000000078420\n", + " 'http://farm6.staticflickr.com/5011/5389083366_fdf13f2ee6_z.jpg', #000000309655\n", + " 'http://farm4.staticflickr.com/3552/5812461870_eb24c8eac5_z.jpg', #000000293324\n", + " 'http://farm4.staticflickr.com/3610/3361019695_1005dd49fd_z.jpg', #000000473821\n", + " 'http://farm8.staticflickr.com/7323/9725958435_3359641442_z.jpg', #000000025386\n", + " 'http://farm4.staticflickr.com/3317/3427794620_9db24fe462_z.jpg', #000000347693\n", + " 'http://farm6.staticflickr.com/5143/5589997131_22f51b308c_z.jpg', #000000058029\n", + " 'http://farm5.staticflickr.com/4061/4376326145_7ef66603e3_z.jpg', #000000389933\n", + " 'http://farm3.staticflickr.com/2028/2188480725_5fbf27a5b3_z.jpg', #000000311789\n", + " 'http://farm1.staticflickr.com/172/421715600_666b0f6a2b_z.jpg', #000000506004\n", + " 'http://farm9.staticflickr.com/8331/8100320407_6044d243a5_z.jpg', #000000076648\n", + " 'http://farm4.staticflickr.com/3236/2487649513_1ef6a6d5c9_z.jpg', #000000201646\n", + " 'http://farm4.staticflickr.com/3094/2684280938_a5b59c0fac_z.jpg', #000000447187\n", + " 'http://farm1.staticflickr.com/42/100911501_005e4d3aa8_z.jpg', #000000126107\n", + " 'http://farm1.staticflickr.com/56/147795701_40d7bc8331_z.jpg', #000000505942\n", + " 'http://farm5.staticflickr.com/4103/5074895283_71a73d77e5_z.jpg', #000000360951\n", + " 'http://farm1.staticflickr.com/160/404335548_3bdc1f2ed9_z.jpg', #000000489764\n", + " 'http://farm9.staticflickr.com/8446/7857456044_401a257790_z.jpg', #000000407574\n", + " ]\n", + "\n", + "ROOT_MSCOCO = 'datasets'\n", + "os.makedirs(ROOT_MSCOCO, exist_ok = True)\n", + "for idx, img_url in enumerate(list_url):\n", + " response = requests.get(img_url)\n", + " with open(f'{ROOT_MSCOCO}/{idx:03d}.jpg', 'wb') as f:\n", + " f.write(response.content)\n", + " time.sleep(0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0: 640x640 1 person, 1 stop sign, 359.4ms\n", + "Speed: 0.0ms preprocess, 359.4ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 12 persons, 295.8ms\n", + "Speed: 0.0ms preprocess, 295.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 12\n", + "\n", + "0: 640x640 4 persons, 321.3ms\n", + "Speed: 0.0ms preprocess, 321.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 4\n", + "\n", + "0: 640x640 1 person, 1 remote, 279.5ms\n", + "Speed: 0.0ms preprocess, 279.5ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 1 couch, 1 tv, 327.3ms\n", + "Speed: 0.0ms preprocess, 327.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 4 persons, 376.3ms\n", + "Speed: 0.0ms preprocess, 376.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 4\n", + "\n", + "0: 640x640 2 traffic lights, 326.8ms\n", + "Speed: 0.0ms preprocess, 326.8ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 1 bicycle, 1 car, 3 boats, 291.8ms\n", + "Speed: 0.0ms preprocess, 291.8ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 5\n", + "\n", + "0: 640x640 1 toilet, 386.5ms\n", + "Speed: 0.0ms preprocess, 386.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 1\n", + "\n", + "0: 640x640 10 persons, 300.6ms\n", + "Speed: 0.0ms preprocess, 300.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 10\n", + "\n", + "0: 640x640 3 bananas, 295.7ms\n", + "Speed: 0.0ms preprocess, 295.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 3\n", + "\n", + "0: 640x640 4 persons, 1 boat, 1 fire hydrant, 1 bird, 287.3ms\n", + "Speed: 0.0ms preprocess, 287.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 7\n", + "\n", + "0: 640x640 1 person, 1 umbrella, 2 kites, 282.3ms\n", + "Speed: 0.0ms preprocess, 282.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 4\n", + "\n", + "0: 640x640 1 bench, 1 bed, 345.9ms\n", + "Speed: 0.0ms preprocess, 345.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 1 dog, 318.5ms\n", + "Speed: 0.0ms preprocess, 318.5ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 1\n", + "\n", + "0: 640x640 1 keyboard, 283.9ms\n", + "Speed: 0.0ms preprocess, 283.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 1\n", + "\n", + "0: 640x640 (no detections), 307.8ms\n", + "Speed: 0.0ms preprocess, 307.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 0\n", + "\n", + "0: 640x640 (no detections), 313.3ms\n", + "Speed: 0.0ms preprocess, 313.3ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 0\n", + "\n", + "0: 640x640 2 airplanes, 2 trucks, 300.5ms\n", + "Speed: 0.0ms preprocess, 300.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 4\n", + "\n", + "0: 640x640 1 boat, 284.9ms\n", + "Speed: 0.0ms preprocess, 284.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 1\n", + "\n", + "0: 640x640 1 stop sign, 315.9ms\n", + "Speed: 0.0ms preprocess, 315.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 1\n", + "\n", + "0: 640x640 2 persons, 295.9ms\n", + "Speed: 0.0ms preprocess, 295.9ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 2\n", + "\n", + "0: 640x640 1 laptop, 1 mouse, 1 keyboard, 1 cell phone, 326.6ms\n", + "Speed: 0.0ms preprocess, 326.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "Number of objects are detected: 4\n", + "74\n" + ] + } + ], + "source": [ + "#%% Collect patches\n", + "import glob\n", + "from torchvision import transforms\n", + "from torchvision.datasets.vision import VisionDataset\n", + "\n", + "class CustomDatasetFolder(VisionDataset):\n", + " def __init__(self, root, transform=None):\n", + " super(CustomDatasetFolder, self).__init__(root)\n", + " self.transform = transform\n", + " samples = glob.glob(f\"{root}/*.jpg\")\n", + "\n", + " self.samples = samples\n", + "\n", + " def __getitem__(self, index):\n", + " sample = self._loader(self.samples[index])\n", + " if self.transform is not None:\n", + " sample = self.transform(sample)\n", + " return sample\n", + " \n", + " def __len__(self):\n", + " return len(self.samples)\n", + "\n", + " def _loader(self, path):\n", + " return Image.open(path).convert(\"RGB\")\n", + "\n", + "img_dataset = CustomDatasetFolder(\n", + " ROOT_MSCOCO,\n", + " transforms.Compose([\n", + " transforms.RandomResizedCrop((640,640)),\n", + " transforms.AutoAugment(),\n", + " transforms.RandomHorizontalFlip(),\n", + " transforms.ToTensor(),\n", + " ]))\n", + "img_loader = torch.utils.data.DataLoader(img_dataset, batch_size=1, shuffle=True)\n", + "\n", + "candidates_list = []\n", + "TILE_SIZE = 64\n", + "MAX_IMGS = 25\n", + "img_count = 0\n", + "for x in iter(img_loader):\n", + " img_count = img_count + 1\n", + " if img_count == MAX_IMGS:\n", + " break\n", + "\n", + " candidates, _ = collect_patches_from_images(py_model, x.to(py_model.device))\n", + " print(f'Number of objects are detected: {len(candidates[0])}')\n", + " candidates_list = candidates_list + candidates[0]\n", + "\n", + "print(len(candidates_list))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "0: 640x640 3 persons, 2 bicycles, 1 bottle, 312.4ms\n", + "1: 640x640 7 persons, 1 bicycle, 312.4ms\n", + "2: 640x640 3 persons, 1 bicycle, 312.4ms\n", + "3: 640x640 9 persons, 4 bicycles, 312.4ms\n", + "4: 640x640 16 persons, 7 bicycles, 312.4ms\n", + "5: 640x640 8 persons, 1 bicycle, 1 bottle, 1 sink, 312.4ms\n", + "6: 640x640 8 persons, 3 bicycles, 312.4ms\n", + "7: 640x640 11 persons, 2 bicycles, 312.4ms\n", + "8: 640x640 6 persons, 4 bicycles, 1 bus, 2 sports balls, 1 apple, 312.4ms\n", + "9: 640x640 12 persons, 3 bicycles, 1 bus, 312.4ms\n", + "Speed: 0.0ms preprocess, 312.4ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 16 persons, 2 airplanes, 1 bus, 1 truck, 328.7ms\n", + "1: 640x640 28 persons, 6 airplanes, 1 stop sign, 1 skis, 1 baseball glove, 328.7ms\n", + "2: 640x640 14 persons, 2 airplanes, 1 bus, 2 trucks, 1 skis, 328.7ms\n", + "3: 640x640 15 persons, 4 airplanes, 1 bus, 1 truck, 3 traffic lights, 2 horses, 1 baseball glove, 328.7ms\n", + "4: 640x640 16 persons, 1 airplane, 2 buss, 1 truck, 1 stop sign, 328.7ms\n", + "5: 640x640 45 persons, 1 airplane, 2 buss, 1 traffic light, 328.7ms\n", + "6: 640x640 20 persons, 1 airplane, 1 truck, 328.7ms\n", + "7: 640x640 25 persons, 2 airplanes, 3 buss, 1 stop sign, 1 skis, 328.7ms\n", + "8: 640x640 31 persons, 3 airplanes, 2 trucks, 1 stop sign, 328.7ms\n", + "9: 640x640 21 persons, 2 airplanes, 1 truck, 1 traffic light, 2 stop signs, 1 baseball bat, 1 baseball glove, 328.7ms\n", + "Speed: 0.0ms preprocess, 328.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 48 persons, 3 airplanes, 2 buss, 1 sports ball, 1 baseball glove, 2 bowls, 1 keyboard, 313.2ms\n", + "1: 640x640 21 persons, 3 airplanes, 3 buss, 1 toilet, 313.2ms\n", + "2: 640x640 16 persons, 4 airplanes, 313.2ms\n", + "3: 640x640 25 persons, 1 airplane, 1 bus, 1 bowl, 313.2ms\n", + "4: 640x640 24 persons, 2 airplanes, 2 buss, 2 toilets, 313.2ms\n", + "5: 640x640 24 persons, 313.2ms\n", + "6: 640x640 25 persons, 1 car, 3 airplanes, 2 buss, 1 bowl, 1 book, 313.2ms\n", + "7: 640x640 26 persons, 1 airplane, 3 buss, 1 bird, 1 dog, 1 skateboard, 3 bowls, 313.2ms\n", + "8: 640x640 23 persons, 1 car, 4 buss, 1 traffic light, 1 baseball glove, 1 cell phone, 313.2ms\n", + "9: 640x640 15 persons, 1 car, 4 airplanes, 4 buss, 2 tennis rackets, 313.2ms\n", + "Speed: 0.0ms preprocess, 313.2ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 2 persons, 1 cat, 322.4ms\n", + "1: 640x640 4 persons, 1 orange, 1 carrot, 1 chair, 322.4ms\n", + "2: 640x640 1 person, 5 buss, 1 truck, 322.4ms\n", + "3: 640x640 2 persons, 1 car, 1 bus, 1 traffic light, 322.4ms\n", + "4: 640x640 1 person, 1 bus, 1 baseball bat, 1 bottle, 322.4ms\n", + "5: 640x640 9 persons, 1 bus, 322.4ms\n", + "6: 640x640 9 persons, 1 bus, 1 keyboard, 322.4ms\n", + "7: 640x640 1 person, 1 vase, 322.4ms\n", + "8: 640x640 2 persons, 2 buss, 1 chair, 1 keyboard, 322.4ms\n", + "9: 640x640 2 persons, 1 bus, 1 chair, 322.4ms\n", + "Speed: 0.0ms preprocess, 322.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 26 persons, 1 traffic light, 2 skiss, 1 snowboard, 2 bottles, 1 bowl, 328.6ms\n", + "1: 640x640 19 persons, 1 traffic light, 1 baseball glove, 328.6ms\n", + "2: 640x640 25 persons, 1 suitcase, 1 sports ball, 328.6ms\n", + "3: 640x640 18 persons, 1 bus, 328.6ms\n", + "4: 640x640 31 persons, 1 traffic light, 1 suitcase, 4 bottles, 328.6ms\n", + "5: 640x640 20 persons, 1 bench, 1 tie, 4 suitcases, 1 sports ball, 9 bottles, 328.6ms\n", + "6: 640x640 6 persons, 1 baseball glove, 328.6ms\n", + "7: 640x640 17 persons, 3 suitcases, 328.6ms\n", + "8: 640x640 18 persons, 1 bus, 6 bottles, 328.6ms\n", + "9: 640x640 21 persons, 1 baseball bat, 1 bottle, 328.6ms\n", + "Speed: 0.0ms preprocess, 328.6ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 3 buss, 1 baseball bat, 1 tv, 1 clock, 312.4ms\n", + "1: 640x640 6 persons, 1 car, 1 umbrella, 1 keyboard, 1 teddy bear, 312.4ms\n", + "2: 640x640 6 persons, 1 bus, 1 traffic light, 1 sports ball, 2 teddy bears, 312.4ms\n", + "3: 640x640 15 persons, 1 traffic light, 1 parking meter, 1 bottle, 1 keyboard, 312.4ms\n", + "4: 640x640 6 persons, 1 car, 1 bus, 1 bottle, 2 keyboards, 312.4ms\n", + "5: 640x640 13 persons, 2 buss, 1 sports ball, 312.4ms\n", + "6: 640x640 15 persons, 2 buss, 1 parking meter, 1 baseball glove, 7 bottles, 1 keyboard, 1 vase, 312.4ms\n", + "7: 640x640 14 persons, 1 car, 1 bus, 1 traffic light, 1 parking meter, 1 tv, 2 vases, 312.4ms\n", + "8: 640x640 10 persons, 2 cars, 2 buss, 1 baseball glove, 1 keyboard, 312.4ms\n", + "9: 640x640 10 persons, 1 car, 2 airplanes, 2 buss, 1 traffic light, 1 tv, 1 vase, 312.4ms\n", + "Speed: 0.0ms preprocess, 312.4ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 5 persons, 1 traffic light, 1 surfboard, 3 bottles, 1 mouse, 320.6ms\n", + "1: 640x640 3 persons, 1 airplane, 1 traffic light, 320.6ms\n", + "2: 640x640 7 persons, 1 traffic light, 1 baseball glove, 1 surfboard, 1 sink, 320.6ms\n", + "3: 640x640 7 persons, 1 car, 1 traffic light, 2 teddy bears, 320.6ms\n", + "4: 640x640 12 persons, 4 traffic lights, 1 surfboard, 320.6ms\n", + "5: 640x640 13 persons, 1 car, 2 airplanes, 4 traffic lights, 1 mouse, 2 teddy bears, 320.6ms\n", + "6: 640x640 7 persons, 1 frisbee, 1 baseball glove, 1 surfboard, 3 mouses, 320.6ms\n", + "7: 640x640 4 persons, 1 traffic light, 320.6ms\n", + "8: 640x640 8 persons, 1 car, 1 bus, 3 traffic lights, 3 bottles, 1 banana, 320.6ms\n", + "9: 640x640 8 persons, 5 traffic lights, 1 cow, 1 baseball glove, 1 bottle, 320.6ms\n", + "Speed: 0.0ms preprocess, 320.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 4 persons, 1 fire hydrant, 357.5ms\n", + "1: 640x640 8 persons, 1 banana, 1 remote, 357.5ms\n", + "2: 640x640 6 persons, 1 bus, 357.5ms\n", + "3: 640x640 12 persons, 1 car, 1 sports ball, 1 baseball bat, 357.5ms\n", + "4: 640x640 7 persons, 357.5ms\n", + "5: 640x640 13 persons, 1 book, 357.5ms\n", + "6: 640x640 10 persons, 2 fire hydrants, 2 baseball bats, 1 banana, 357.5ms\n", + "7: 640x640 5 persons, 1 baseball glove, 1 keyboard, 357.5ms\n", + "8: 640x640 4 persons, 2 traffic lights, 357.5ms\n", + "9: 640x640 8 persons, 357.5ms\n", + "Speed: 0.0ms preprocess, 357.5ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 4 persons, 321.8ms\n", + "1: 640x640 4 persons, 1 airplane, 1 sports ball, 1 baseball bat, 1 baseball glove, 1 bottle, 1 book, 321.8ms\n", + "2: 640x640 5 persons, 321.8ms\n", + "3: 640x640 11 persons, 1 baseball glove, 1 refrigerator, 1 teddy bear, 321.8ms\n", + "4: 640x640 7 persons, 1 baseball glove, 321.8ms\n", + "5: 640x640 8 persons, 1 baseball glove, 1 keyboard, 321.8ms\n", + "6: 640x640 4 persons, 1 airplane, 321.8ms\n", + "7: 640x640 7 persons, 1 chair, 1 keyboard, 321.8ms\n", + "8: 640x640 13 persons, 2 horses, 1 sports ball, 2 bottles, 321.8ms\n", + "9: 640x640 5 persons, 1 baseball glove, 321.8ms\n", + "Speed: 0.0ms preprocess, 321.8ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 3 buss, 1 truck, 3 bottles, 323.3ms\n", + "1: 640x640 23 persons, 7 buss, 2 ties, 2 scissorss, 323.3ms\n", + "2: 640x640 8 persons, 5 buss, 1 truck, 1 umbrella, 1 tie, 8 bottles, 323.3ms\n", + "3: 640x640 12 persons, 1 bus, 1 umbrella, 1 tie, 1 skis, 1 bottle, 323.3ms\n", + "4: 640x640 8 persons, 3 buss, 1 truck, 1 tie, 1 skis, 323.3ms\n", + "5: 640x640 23 persons, 1 car, 4 buss, 1 umbrella, 1 tie, 2 sports balls, 323.3ms\n", + "6: 640x640 5 persons, 2 buss, 1 bird, 1 horse, 323.3ms\n", + "7: 640x640 13 persons, 1 bus, 6 ties, 323.3ms\n", + "8: 640x640 18 persons, 9 buss, 1 umbrella, 1 tie, 1 scissors, 323.3ms\n", + "9: 640x640 21 persons, 4 buss, 2 ties, 1 baseball glove, 323.3ms\n", + "Speed: 0.0ms preprocess, 323.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 99 persons, 3 buss, 1 traffic light, 4 bottles, 2 bowls, 351.7ms\n", + "Speed: 0.0ms preprocess, 351.7ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 95 persons, 3 buss, 8 snowboards, 1 sports ball, 1 baseball bat, 1 skateboard, 280.4ms\n", + "Speed: 0.0ms preprocess, 280.4ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 103 persons, 3 bicycles, 3 buss, 5 traffic lights, 4 bottles, 2 refrigerators, 273.1ms\n", + "Speed: 0.0ms preprocess, 273.1ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 72 persons, 1 bus, 4 traffic lights, 1 baseball glove, 1 refrigerator, 273.9ms\n", + "Speed: 0.0ms preprocess, 273.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 97 persons, 1 bus, 1 train, 1 bowl, 325.0ms\n", + "Speed: 0.0ms preprocess, 325.0ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 70 persons, 3 buss, 5 traffic lights, 1 baseball glove, 320.5ms\n", + "Speed: 0.0ms preprocess, 320.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 56 persons, 2 buss, 1 train, 1 cow, 1 baseball glove, 1 chair, 281.5ms\n", + "Speed: 0.0ms preprocess, 281.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 58 persons, 1 bus, 1 baseball glove, 1 bowl, 324.0ms\n", + "Speed: 0.0ms preprocess, 324.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 41 persons, 2 buss, 2 trains, 1 traffic light, 1 baseball glove, 278.0ms\n", + "Speed: 0.0ms preprocess, 278.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 37 persons, 4 buss, 1 traffic light, 1 baseball glove, 309.1ms\n", + "Speed: 0.0ms preprocess, 309.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 25 persons, 1 baseball glove, 1 bowl, 273.9ms\n", + "Speed: 0.0ms preprocess, 273.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 36 persons, 1 bus, 1 baseball glove, 1 bowl, 295.4ms\n", + "Speed: 0.0ms preprocess, 295.4ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 32 persons, 1 bus, 2 trains, 1 baseball glove, 315.9ms\n", + "Speed: 0.0ms preprocess, 315.9ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 34 persons, 1 traffic light, 1 baseball glove, 275.3ms\n", + "Speed: 0.0ms preprocess, 275.3ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 21 persons, 1 traffic light, 1 baseball glove, 298.4ms\n", + "Speed: 0.0ms preprocess, 298.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 32 persons, 1 truck, 1 traffic light, 1 baseball glove, 278.6ms\n", + "Speed: 0.0ms preprocess, 278.6ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 28 persons, 1 traffic light, 1 baseball glove, 304.6ms\n", + "Speed: 0.0ms preprocess, 304.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 1 traffic light, 1 baseball glove, 319.6ms\n", + "Speed: 0.0ms preprocess, 319.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 27 persons, 2 traffic lights, 1 baseball glove, 286.8ms\n", + "Speed: 0.0ms preprocess, 286.8ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 25 persons, 2 traffic lights, 1 horse, 1 sports ball, 1 baseball glove, 299.5ms\n", + "Speed: 0.0ms preprocess, 299.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 23 persons, 3 traffic lights, 1 baseball glove, 276.5ms\n", + "Speed: 0.0ms preprocess, 276.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 1 traffic light, 1 baseball glove, 279.6ms\n", + "Speed: 0.0ms preprocess, 279.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 23 persons, 1 traffic light, 1 baseball glove, 330.8ms\n", + "Speed: 0.0ms preprocess, 330.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 23 persons, 2 trains, 3 traffic lights, 1 baseball glove, 296.2ms\n", + "Speed: 0.0ms preprocess, 296.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 18 persons, 1 traffic light, 1 elephant, 1 baseball glove, 306.0ms\n", + "Speed: 0.0ms preprocess, 306.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 25 persons, 1 truck, 2 traffic lights, 1 baseball glove, 275.4ms\n", + "Speed: 0.0ms preprocess, 275.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 3 traffic lights, 1 baseball glove, 276.6ms\n", + "Speed: 0.0ms preprocess, 276.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 289.3ms\n", + "Speed: 0.0ms preprocess, 289.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 312.9ms\n", + "Speed: 0.0ms preprocess, 312.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 16 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 345.9ms\n", + "Speed: 0.0ms preprocess, 345.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 3 traffic lights, 1 sports ball, 1 baseball glove, 284.7ms\n", + "Speed: 0.0ms preprocess, 284.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 282.2ms\n", + "Speed: 0.0ms preprocess, 282.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 276.7ms\n", + "Speed: 0.0ms preprocess, 276.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 281.3ms\n", + "Speed: 0.0ms preprocess, 281.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 313.0ms\n", + "Speed: 0.0ms preprocess, 313.0ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 297.8ms\n", + "Speed: 0.0ms preprocess, 297.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 truck, 1 traffic light, 1 sports ball, 1 baseball glove, 286.6ms\n", + "Speed: 0.0ms preprocess, 286.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 1 train, 1 traffic light, 1 baseball glove, 279.0ms\n", + "Speed: 0.0ms preprocess, 279.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 276.0ms\n", + "Speed: 0.0ms preprocess, 276.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 22 persons, 1 train, 1 traffic light, 1 sports ball, 1 baseball glove, 293.2ms\n", + "Speed: 0.0ms preprocess, 293.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 308.6ms\n", + "Speed: 0.0ms preprocess, 308.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 1 chair, 305.6ms\n", + "Speed: 0.0ms preprocess, 305.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 1 truck, 4 traffic lights, 1 sports ball, 1 baseball glove, 287.1ms\n", + "Speed: 0.0ms preprocess, 287.1ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 1 truck, 1 traffic light, 1 sports ball, 1 baseball glove, 284.7ms\n", + "Speed: 0.0ms preprocess, 284.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 12 persons, 1 truck, 1 traffic light, 1 baseball glove, 446.5ms\n", + "Speed: 0.0ms preprocess, 446.5ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 275.4ms\n", + "Speed: 0.0ms preprocess, 275.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 3 traffic lights, 1 baseball glove, 334.5ms\n", + "Speed: 0.0ms preprocess, 334.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 291.1ms\n", + "Speed: 0.0ms preprocess, 291.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 1 car, 2 traffic lights, 1 baseball glove, 277.8ms\n", + "Speed: 0.0ms preprocess, 277.8ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 24 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 282.8ms\n", + "Speed: 0.0ms preprocess, 282.8ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 284.4ms\n", + "Speed: 0.0ms preprocess, 284.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 1 truck, 2 traffic lights, 1 sports ball, 1 baseball glove, 293.2ms\n", + "Speed: 0.0ms preprocess, 293.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 1 traffic light, 1 baseball glove, 325.8ms\n", + "Speed: 0.0ms preprocess, 325.8ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 23 persons, 4 traffic lights, 1 sports ball, 1 baseball glove, 359.3ms\n", + "Speed: 0.0ms preprocess, 359.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 278.6ms\n", + "Speed: 0.0ms preprocess, 278.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 281.9ms\n", + "Speed: 0.0ms preprocess, 281.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 25 persons, 5 traffic lights, 1 sports ball, 1 baseball glove, 322.6ms\n", + "Speed: 0.0ms preprocess, 322.6ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 18 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 273.6ms\n", + "Speed: 0.0ms preprocess, 273.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 2 traffic lights, 1 baseball glove, 337.3ms\n", + "Speed: 0.0ms preprocess, 337.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 314.1ms\n", + "Speed: 0.0ms preprocess, 314.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 3 traffic lights, 1 sports ball, 1 baseball glove, 462.5ms\n", + "Speed: 0.0ms preprocess, 462.5ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 20 persons, 1 truck, 1 traffic light, 1 baseball glove, 279.0ms\n", + "Speed: 0.0ms preprocess, 279.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 281.4ms\n", + "Speed: 0.0ms preprocess, 281.4ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 273.4ms\n", + "Speed: 0.0ms preprocess, 273.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 22 persons, 1 traffic light, 1 sports ball, 1 baseball glove, 284.6ms\n", + "Speed: 0.0ms preprocess, 284.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 18 persons, 1 truck, 2 traffic lights, 1 sports ball, 1 baseball glove, 305.3ms\n", + "Speed: 0.0ms preprocess, 305.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 16 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 279.3ms\n", + "Speed: 0.0ms preprocess, 279.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 22 persons, 2 traffic lights, 1 sports ball, 1 baseball glove, 298.1ms\n", + "Speed: 0.0ms preprocess, 298.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 train, 1 traffic light, 1 sports ball, 1 baseball glove, 392.5ms\n", + "Speed: 0.0ms preprocess, 392.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 1 train, 1 traffic light, 1 sports ball, 1 baseball glove, 282.5ms\n", + "Speed: 0.0ms preprocess, 282.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 13 persons, 1 bus, 1 truck, 1 boat, 1 traffic light, 1 sports ball, 1 baseball glove, 274.9ms\n", + "Speed: 0.0ms preprocess, 274.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 19 persons, 1 train, 2 traffic lights, 1 sports ball, 1 baseball glove, 297.7ms\n", + "Speed: 0.0ms preprocess, 297.7ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 13 persons, 1 train, 1 sports ball, 1 baseball glove, 292.6ms\n", + "Speed: 0.0ms preprocess, 292.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 16 persons, 1 train, 1 sports ball, 1 baseball glove, 303.9ms\n", + "Speed: 0.0ms preprocess, 303.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 train, 1 sports ball, 1 baseball glove, 275.1ms\n", + "Speed: 0.0ms preprocess, 275.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 12 persons, 2 trains, 1 sports ball, 1 baseball glove, 277.5ms\n", + "Speed: 0.0ms preprocess, 277.5ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 13 persons, 1 train, 1 baseball glove, 321.6ms\n", + "Speed: 0.0ms preprocess, 321.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 8 persons, 1 train, 1 truck, 1 sports ball, 1 baseball glove, 378.1ms\n", + "Speed: 0.0ms preprocess, 378.1ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 1 train, 1 elephant, 1 baseball glove, 291.5ms\n", + "Speed: 0.0ms preprocess, 291.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 1 train, 1 sports ball, 1 baseball glove, 351.4ms\n", + "Speed: 0.0ms preprocess, 351.4ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 13 persons, 1 train, 1 boat, 1 sports ball, 1 baseball glove, 295.6ms\n", + "Speed: 0.0ms preprocess, 295.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 train, 1 baseball glove, 272.8ms\n", + "Speed: 0.0ms preprocess, 272.8ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 13 persons, 2 trains, 1 sports ball, 1 baseball glove, 280.0ms\n", + "Speed: 0.0ms preprocess, 280.0ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 9 persons, 1 train, 1 sports ball, 1 baseball glove, 313.3ms\n", + "Speed: 0.0ms preprocess, 313.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 train, 1 baseball glove, 298.9ms\n", + "Speed: 0.0ms preprocess, 298.9ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 1 train, 1 sports ball, 1 baseball glove, 280.6ms\n", + "Speed: 0.0ms preprocess, 280.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 1 train, 1 horse, 1 sports ball, 1 baseball glove, 277.6ms\n", + "Speed: 0.0ms preprocess, 277.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 3 trains, 1 sports ball, 1 baseball glove, 276.9ms\n", + "Speed: 0.0ms preprocess, 276.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 1 train, 1 baseball glove, 326.7ms\n", + "Speed: 0.0ms preprocess, 326.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 22 persons, 1 train, 1 sports ball, 1 baseball glove, 309.7ms\n", + "Speed: 0.0ms preprocess, 309.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 9 persons, 1 train, 1 sports ball, 1 baseball glove, 297.3ms\n", + "Speed: 0.0ms preprocess, 297.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 11 persons, 1 train, 1 boat, 1 sports ball, 1 baseball glove, 286.2ms\n", + "Speed: 0.0ms preprocess, 286.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 18 persons, 1 train, 1 horse, 1 sports ball, 1 baseball glove, 281.2ms\n", + "Speed: 0.0ms preprocess, 281.2ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 16 persons, 1 train, 1 sports ball, 1 baseball glove, 293.2ms\n", + "Speed: 0.0ms preprocess, 293.2ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 12 persons, 1 train, 1 sports ball, 1 baseball glove, 305.1ms\n", + "Speed: 0.0ms preprocess, 305.1ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 1 train, 1 truck, 1 sports ball, 1 baseball glove, 274.7ms\n", + "Speed: 0.0ms preprocess, 274.7ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 14 persons, 1 train, 1 truck, 1 sports ball, 1 baseball glove, 277.9ms\n", + "Speed: 0.0ms preprocess, 277.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 15 persons, 1 train, 1 truck, 1 sports ball, 1 baseball glove, 275.6ms\n", + "Speed: 0.0ms preprocess, 275.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 10 persons, 1 train, 1 sports ball, 1 baseball glove, 323.3ms\n", + "Speed: 0.0ms preprocess, 323.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 17 persons, 1 train, 1 sports ball, 1 baseball glove, 274.7ms\n", + "Speed: 0.0ms preprocess, 274.7ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 12 persons, 1 train, 1 elephant, 1 sports ball, 1 baseball glove, 312.2ms\n", + "Speed: 0.0ms preprocess, 312.2ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 134 persons, 3 buss, 1 traffic light, 3 snowboards, 1 baseball glove, 8 bottles, 3 bowls, 1 chair, 292.0ms\n", + "Speed: 0.0ms preprocess, 292.0ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 640)\n", + "\n", + "0: 640x640 1 person, 1 sports ball, 1 baseball glove, 286.7ms\n", + "1: 640x640 105 persons, 1 bus, 1 traffic light, 1 snowboard, 1 baseball glove, 286.7ms\n", + "Speed: 4.9ms preprocess, 286.7ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)\n" + ] + } + ], + "source": [ + "#%% Apply attack\n", + "from art.attacks.evasion import SNAL\n", + "attack = SNAL(py_model,\n", + " eps = 16.0 /255.0,\n", + " max_iter = 100,\n", + " num_grid = 10,\n", + " candidates=candidates_list,\n", + " collector=collect_patches_from_images)\n", + "x_adv = (attack.generate(org_x / 255.0) * 255.0)\n", + "adv_np = np.transpose(x_adv[0, :], (1, 2, 0)).astype(np.uint8)\n", + "Image.fromarray(adv_np).save(f'output.png')\n", + "Image.fromarray(org_img).save(f'target.png')\n", + "\n", + "# Visualize the results\n", + "from IPython.display import Image as PyImage\n", + "results = model(['target.png', 'output.png'])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + }, + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results[0].save(\"results.jpg\")\n", + "logger.info(len(results[0].boxes.xyxy))\n", + "PyImage(filename=f\"results.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "109\n" + ] + }, + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results[1].save(\"results.jpg\")\n", + "logger.info(len(results[1].boxes.xyxy))\n", + "PyImage(filename=f\"results.jpg\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements_test.txt b/requirements_test.txt index a623075fe8..be4223444d 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -6,7 +6,7 @@ matplotlib==3.7.1 scikit-learn==1.4.1.post1 six==1.16.0 Pillow==10.3.0 -tqdm==4.66.4 +tqdm==4.67.1 statsmodels==0.14.2 pydub==0.25.1 resampy==0.4.3 @@ -64,7 +64,7 @@ types-PyYAML==6.0.12.20240917 types-setuptools==71.1.0.20240726 # other -requests~=2.31.0 +requests~=2.32.3 ultralytics==8.0.217 ipython==8.25.0 diff --git a/run_tests.sh b/run_tests.sh index 71ae377a42..81d1cea461 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -146,6 +146,10 @@ else "tests/defences/test_rounded.py" \ "tests/defences/test_thermometer_encoding.py" \ "tests/defences/test_variance_minimization.py" \ + "tests/defences/detector/evasion/test_beyond_detector.py" \ + "tests/defences/detector/evasion/test_binary_activation_detector.py" \ + "tests/defences/detector/evasion/test_binary_input_detector.py" \ + "tests/defences/detector/evasion/test_subsetscanning_detector.py" \ "tests/defences/detector/poison/test_activation_defence.py" \ "tests/defences/detector/poison/test_clustering_analyzer.py" \ "tests/defences/detector/poison/test_ground_truth_evaluator.py" \ diff --git a/tests/attacks/evasion/test_auto_attack.py b/tests/attacks/evasion/test_auto_attack.py index 52e76274a6..f98847bfd0 100644 --- a/tests/attacks/evasion/test_auto_attack.py +++ b/tests/attacks/evasion/test_auto_attack.py @@ -273,7 +273,7 @@ def test_generate_parallel(art_warning, fix_get_mnist_subset, image_dl_estimator batch_size=batch_size, estimator_orig=None, targeted=False, - parallel=True, + parallel_pool_size=3, ) attack_noparallel = AutoAttack( @@ -285,7 +285,7 @@ def test_generate_parallel(art_warning, fix_get_mnist_subset, image_dl_estimator batch_size=batch_size, estimator_orig=None, targeted=False, - parallel=False, + parallel_pool_size=0, ) x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) @@ -310,7 +310,7 @@ def test_generate_parallel(art_warning, fix_get_mnist_subset, image_dl_estimator batch_size=batch_size, estimator_orig=None, targeted=True, - parallel=True, + parallel_pool_size=3, ) x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) diff --git a/tests/attacks/evasion/test_rescaling_auto_conjugate_gradient.py b/tests/attacks/evasion/test_rescaling_auto_conjugate_gradient.py new file mode 100644 index 0000000000..9bb9026b1c --- /dev/null +++ b/tests/attacks/evasion/test_rescaling_auto_conjugate_gradient.py @@ -0,0 +1,166 @@ +# MIT License + +# Copyright (c) 2024 Keiichiro Yamamura + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import logging +import pytest + +import numpy as np + +from art.attacks.evasion import RescalingAutoConjugateGradient +from art.estimators.estimator import BaseEstimator, LossGradientsMixin +from art.estimators.classification.classifier import ClassifierMixin + +from tests.attacks.utils import backend_test_classifier_type_check_fail +from tests.utils import ARTTestException + +logger = logging.getLogger(__name__) + + +@pytest.fixture() +def fix_get_mnist_subset(get_mnist_dataset): + (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = get_mnist_dataset + n_train = 100 + n_test = 10 + yield x_train_mnist[:n_train], y_train_mnist[:n_train], x_test_mnist[:n_test], y_test_mnist[:n_test] + + +@pytest.mark.parametrize("loss_type", ["cross_entropy", "difference_logits_ratio"]) +@pytest.mark.parametrize("norm", ["inf", np.inf, 1, 2]) +@pytest.mark.skip_framework("keras", "non_dl_frameworks", "mxnet", "kerastf", "tensorflow1", "tensorflow2v1") +def test_generate(art_warning, fix_get_mnist_subset, image_dl_estimator_for_attack, framework, loss_type, norm): + print("test_generate") + try: + classifier = image_dl_estimator_for_attack(RescalingAutoConjugateGradient, from_logits=True) + + print("framework", framework) + + if framework in ["tensorflow1", "tensorflow2v1"] and loss_type == "difference_logits_ratio": + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient( + estimator=classifier, + norm=norm, + eps=0.3, + eps_step=0.1, + max_iter=5, + targeted=False, + nb_random_init=1, + batch_size=32, + loss_type=loss_type, + verbose=False, + ) + else: + + attack = RescalingAutoConjugateGradient( + estimator=classifier, + norm=norm, + eps=0.3, + eps_step=0.1, + max_iter=5, + targeted=False, + nb_random_init=1, + batch_size=32, + loss_type=loss_type, + verbose=False, + ) + + (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset + + x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) + + assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) > 0.0 + + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.framework_agnostic +def test_check_params(art_warning, image_dl_estimator_for_attack): + try: + + classifier = image_dl_estimator_for_attack(RescalingAutoConjugateGradient, from_logits=True) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, norm=0) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, eps="1") + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, eps=-1.0) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, eps_step="1") + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, eps_step=-1.0) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, max_iter=1.0) + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, max_iter=-1) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, targeted="true") + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, nb_random_init=1.0) + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, nb_random_init=-1) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, batch_size=1.0) + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, batch_size=-1) + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, loss_type="test") + + with pytest.raises(ValueError): + _ = RescalingAutoConjugateGradient(classifier, verbose="true") + + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.framework_agnostic +def test_classifier_type_check_fail(art_warning): + try: + backend_test_classifier_type_check_fail( + RescalingAutoConjugateGradient, [BaseEstimator, LossGradientsMixin, ClassifierMixin] + ) + except ARTTestException as e: + art_warning(e) diff --git a/tests/attacks/evasion/test_steal_now_attack_later.py b/tests/attacks/evasion/test_steal_now_attack_later.py new file mode 100644 index 0000000000..ddbba0b3d9 --- /dev/null +++ b/tests/attacks/evasion/test_steal_now_attack_later.py @@ -0,0 +1,241 @@ +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import logging + +import numpy as np +import pytest + +from art.attacks.evasion import SNAL +from art.estimators.object_detection import PyTorchYolo +from tests.utils import ARTTestException + +logger = logging.getLogger(__name__) + + +@pytest.mark.only_with_platform("pytorch") +def test_generate(art_warning): + try: + # The ultralytics package does not support Python versions earlier than 3.8. + # To avoid an import error with the TF 1.x pipeline, it is imported only within the function scope. + import torch + import requests + from ultralytics import YOLO + + model = YOLO("yolov8m") + py_model = PyTorchYolo(model=model, input_shape=(3, 640, 640), channels_first=True, is_yolov8=True) + + # Define a custom function to collect patches from images + def collect_patches_from_images(model, imgs): + bs = imgs.shape[0] + with torch.no_grad(): + pred = model.model(imgs) + y = [] + for obj in pred: + y.append(obj.boxes.xyxy) + + candidates_patch = [] + candidates_position = [] + for i in range(bs): + patch = [] + if y[i].shape[0] == 0: + candidates_patch.append(patch) + candidates_position.append(torch.zeros((0, 4), device=model.device)) + continue + + pos_matrix = y[i][:, :4].clone().int() + pos_matrix[:, 0] = torch.clamp_min(pos_matrix[:, 0], 0) + pos_matrix[:, 1] = torch.clamp_min(pos_matrix[:, 1], 0) + pos_matrix[:, 2] = torch.clamp_max(pos_matrix[:, 2], imgs.shape[3]) + pos_matrix[:, 3] = torch.clamp_max(pos_matrix[:, 3], imgs.shape[2]) + for e in pos_matrix: + p = imgs[i, :, e[1] : e[3], e[0] : e[2]] + patch.append(p.to(model.device)) + + candidates_patch.append(patch) + candidates_position.append(pos_matrix) + + return candidates_patch, candidates_position + + # Download a sample image + from io import BytesIO + from PIL import Image + + TARGET = "https://farm2.staticflickr.com/1065/705706084_39a7f28fc9_z.jpg" # val2017/000000552842.jpg + response = requests.get(TARGET) + org_img = np.asarray(Image.open(BytesIO(response.content)).resize((640, 640))) + x_org = np.stack([org_img.transpose((2, 0, 1))], axis=0).astype(np.float32) + + # Prepare dataset + import os + import time + + # Select images randomly from COCO dataset + list_url = [ + "http://farm4.staticflickr.com/3572/5744200926_082c11c43c_z.jpg", # 000000460229 + "http://farm4.staticflickr.com/3010/2749181045_ed450e5d36_z.jpg", # 000000057760 + "http://farm4.staticflickr.com/3826/9451771633_f14cef3a8b_z.jpg", # 000000468332 + "http://farm7.staticflickr.com/6194/6106161903_e505cbc192_z.jpg", # 000000190841 + "http://farm1.staticflickr.com/48/140268688_947e2bcc96_z.jpg", # 000000078420 + "http://farm6.staticflickr.com/5011/5389083366_fdf13f2ee6_z.jpg", # 000000309655 + "http://farm4.staticflickr.com/3552/5812461870_eb24c8eac5_z.jpg", # 000000293324 + "http://farm4.staticflickr.com/3610/3361019695_1005dd49fd_z.jpg", # 000000473821 + "http://farm8.staticflickr.com/7323/9725958435_3359641442_z.jpg", # 000000025386 + "http://farm4.staticflickr.com/3317/3427794620_9db24fe462_z.jpg", # 000000347693 + "http://farm6.staticflickr.com/5143/5589997131_22f51b308c_z.jpg", # 000000058029 + "http://farm5.staticflickr.com/4061/4376326145_7ef66603e3_z.jpg", # 000000389933 + "http://farm3.staticflickr.com/2028/2188480725_5fbf27a5b3_z.jpg", # 000000311789 + "http://farm1.staticflickr.com/172/421715600_666b0f6a2b_z.jpg", # 000000506004 + "http://farm9.staticflickr.com/8331/8100320407_6044d243a5_z.jpg", # 000000076648 + "http://farm4.staticflickr.com/3236/2487649513_1ef6a6d5c9_z.jpg", # 000000201646 + "http://farm4.staticflickr.com/3094/2684280938_a5b59c0fac_z.jpg", # 000000447187 + "http://farm1.staticflickr.com/42/100911501_005e4d3aa8_z.jpg", # 000000126107 + "http://farm1.staticflickr.com/56/147795701_40d7bc8331_z.jpg", # 000000505942 + "http://farm5.staticflickr.com/4103/5074895283_71a73d77e5_z.jpg", # 000000360951 + "http://farm1.staticflickr.com/160/404335548_3bdc1f2ed9_z.jpg", # 000000489764 + "http://farm9.staticflickr.com/8446/7857456044_401a257790_z.jpg", # 000000407574 + ] + + ROOT_MSCOCO = "datasets" + os.makedirs(ROOT_MSCOCO, exist_ok=True) + for idx, img_url in enumerate(list_url): + response = requests.get(img_url) + with open(f"{ROOT_MSCOCO}/{idx:03d}.jpg", "wb") as f: + f.write(response.content) + time.sleep(0.5) + + # % Collect patches + import glob + from torchvision import transforms + from torchvision.datasets.vision import VisionDataset + + class CustomDatasetFolder(VisionDataset): + def __init__(self, root, transform=None): + super(CustomDatasetFolder, self).__init__(root) + self.transform = transform + samples = glob.glob(f"{root}/*.jpg") + + self.samples = samples + + def __getitem__(self, index): + sample = self._loader(self.samples[index]) + if self.transform is not None: + sample = self.transform(sample) + return sample + + def __len__(self): + return len(self.samples) + + def _loader(self, path): + return Image.open(path).convert("RGB") + + img_dataset = CustomDatasetFolder( + ROOT_MSCOCO, + transforms.Compose( + [ + transforms.RandomResizedCrop((640, 640)), + transforms.AutoAugment(), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + ] + ), + ) + img_loader = torch.utils.data.DataLoader(img_dataset, batch_size=1, shuffle=True) + + candidates_list = [] + MAX_IMGS = 25 + img_count = 0 + for x in iter(img_loader): + img_count = img_count + 1 + if img_count == MAX_IMGS: + break + + candidates, _ = collect_patches_from_images(py_model, x.to(py_model.device)) + print(f"Number of objects are detected: {len(candidates[0])}") + candidates_list = candidates_list + candidates[0] + + attack = SNAL( + py_model, + eps=16.0 / 255.0, + max_iter=100, + num_grid=10, + candidates=candidates_list, + collector=collect_patches_from_images, + ) + + x_adv = attack.generate(x_org / 255.0) + assert x_org.shape == x_adv.shape + assert np.min(x_adv) >= 0.0 + assert np.max(x_adv) <= 1.0 + + adv_np = np.transpose(x_adv[0, :] * 255, (1, 2, 0)).astype(np.uint8) + result = model(adv_np) + assert len(result[0].boxes.xyxy) > 25 + + except ARTTestException as e: + art_warning(e) + + +@pytest.mark.only_with_platform("pytorch") +def test_check_params(art_warning): + try: + # The ultralytics package does not support Python versions earlier than 3.8. + # To avoid an import error with the TF 1.x pipeline, it is imported only within the function scope. + from ultralytics import YOLO + + model = YOLO("yolov8m") + py_model = PyTorchYolo(model=model, input_shape=(3, 640, 640), channels_first=True, is_yolov8=True) + + def dummy_func(model, imags): + candidates_patch = [] + candidates_position = [] + return candidates_patch, candidates_position + + dummy_list = [[], []] + + with pytest.raises(ValueError): + _ = SNAL(estimator=py_model, eps=-1.0, max_iter=5, num_grid=10, candidates=dummy_list, collector=dummy_func) + with pytest.raises(ValueError): + _ = SNAL(estimator=py_model, eps=2.0, max_iter=5, num_grid=10, candidates=dummy_list, collector=dummy_func) + with pytest.raises(TypeError): + _ = SNAL( + estimator=py_model, + eps=8 / 255.0, + max_iter=1.0, + num_grid=10, + candidates=dummy_list, + collector=dummy_func, + ) + with pytest.raises(ValueError): + _ = SNAL( + estimator=py_model, eps=8 / 255.0, max_iter=0, num_grid=10, candidates=dummy_list, collector=dummy_func + ) + with pytest.raises(TypeError): + _ = SNAL( + estimator=py_model, eps=8 / 255.0, max_iter=5, num_grid=1.0, candidates=dummy_list, collector=dummy_func + ) + with pytest.raises(ValueError): + _ = SNAL( + estimator=py_model, eps=8 / 255.0, max_iter=5, num_grid=0, candidates=dummy_list, collector=dummy_func + ) + with pytest.raises(TypeError): + _ = SNAL(estimator=py_model, eps=8 / 255.0, max_iter=5, num_grid=10, candidates=1.0, collector=dummy_func) + with pytest.raises(ValueError): + _ = SNAL(estimator=py_model, eps=8 / 255.0, max_iter=5, num_grid=10, candidates=[], collector=dummy_func) + + except ARTTestException as e: + art_warning(e) diff --git a/tests/defences/detector/evasion/test_beyond_detector.py b/tests/defences/detector/evasion/test_beyond_detector.py new file mode 100644 index 0000000000..ba5cdc2871 --- /dev/null +++ b/tests/defences/detector/evasion/test_beyond_detector.py @@ -0,0 +1,178 @@ +# MIT License +# +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2024 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from __future__ import absolute_import, division, print_function, unicode_literals + +import pytest +import numpy as np + +from art.attacks.evasion.fast_gradient import FastGradientMethod +from art.defences.detector.evasion import BeyondDetectorPyTorch +from art.estimators.classification import PyTorchClassifier +from tests.utils import ARTTestException + + +def get_ssl_model(weights_path): + """ + Loads the SSL model (SimSiamWithCls). + """ + import torch + import torch.nn as nn + + class SimSiamWithCls(nn.Module): + """ + SimSiam with Classifier + """ + + def __init__(self, arch="resnet18", feat_dim=2048, num_proj_layers=2): + from torchvision import models + + super(SimSiamWithCls, self).__init__() + self.backbone = models.resnet18() + out_dim = self.backbone.fc.weight.shape[1] + self.backbone.conv1 = nn.Conv2d( + in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=2, bias=False + ) + self.backbone.maxpool = nn.Identity() + self.backbone.fc = nn.Identity() + self.classifier = nn.Linear(out_dim, out_features=10) + + pred_hidden_dim = int(feat_dim / 4) + + self.projector = nn.Sequential( + nn.Linear(out_dim, feat_dim, bias=False), + nn.BatchNorm1d(feat_dim), + nn.ReLU(), + nn.Linear(feat_dim, feat_dim, bias=False), + nn.BatchNorm1d(feat_dim), + nn.ReLU(), + nn.Linear(feat_dim, feat_dim), + nn.BatchNorm1d(feat_dim, affine=False), + ) + self.projector[6].bias.requires_grad = False + + self.predictor = nn.Sequential( + nn.Linear(feat_dim, pred_hidden_dim, bias=False), + nn.BatchNorm1d(pred_hidden_dim), + nn.ReLU(), + nn.Linear(pred_hidden_dim, feat_dim), + ) + + def forward(self, img, im_aug1=None, im_aug2=None): + + r_ori = self.backbone(img) + if im_aug1 is None and im_aug2 is None: + cls = self.classifier(r_ori) + rep = self.projector(r_ori) + return {"cls": cls, "rep": rep} + else: + + r1 = self.backbone(im_aug1) + r2 = self.backbone(im_aug2) + + z1 = self.projector(r1) + z2 = self.projector(r2) + + p1 = self.predictor(z1) + p2 = self.predictor(z2) + + return {"z1": z1, "z2": z2, "p1": p1, "p2": p2} + + model = SimSiamWithCls() + model.load_state_dict(torch.load(weights_path)) + return model + + +@pytest.mark.only_with_platform("pytorch") +def test_beyond_detector(art_warning, get_default_cifar10_subset): + try: + import torch + from torchvision import models, transforms + + # Load CIFAR10 data + (x_train, y_train), (x_test, _) = get_default_cifar10_subset + + x_train = x_train[0:100] + y_train = y_train[0:100] + x_test = x_test[0:100] + + # Load models + # Download pretrained weights from + # https://drive.google.com/drive/folders/1ieEdd7hOj2CIl1FQfu4-3RGZmEj-mesi?usp=sharing + target_model = models.resnet18() + # target_model.load_state_dict(torch.load("./utils/resources/models/resnet_c10.pth", map_location=torch.device('cpu'))) + ssl_model = get_ssl_model(weights_path="./utils/resources/models/simsiam_c10.pth") + + target_classifier = PyTorchClassifier( + model=target_model, nb_classes=10, input_shape=(3, 32, 32), loss=torch.nn.CrossEntropyLoss() + ) + ssl_classifier = PyTorchClassifier( + model=ssl_model, nb_classes=10, input_shape=(3, 32, 32), loss=torch.nn.CrossEntropyLoss() + ) + + # Generate adversarial samples + attack = FastGradientMethod(estimator=target_classifier, eps=0.05) + x_test_adv = attack.generate(x_test) + + img_augmentations = transforms.Compose( + [ + transforms.RandomResizedCrop(32, scale=(0.2, 1.0)), + transforms.RandomHorizontalFlip(), + transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), # not strengthened + transforms.RandomGrayscale(p=0.2), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ) + + # Initialize BeyondDetector + detector = BeyondDetectorPyTorch( + target_classifier=target_classifier, + ssl_classifier=ssl_classifier, + augmentations=img_augmentations, + aug_num=50, + alpha=0.8, + var_K=20, + percentile=5, + ) + + # Fit the detector + detector.fit(x_train, y_train, batch_size=128) + + # Apply detector on clean and adversarial test data + _, test_detection = detector.detect(x_test) + _, test_adv_detection = detector.detect(x_test_adv) + + # Assert there is at least one true positive and negative + nb_true_positives = np.sum(test_adv_detection) + nb_true_negatives = len(test_detection) - np.sum(test_detection) + + assert nb_true_positives > 0 + assert nb_true_negatives > 0 + + clean_accuracy = 1 - np.mean(test_detection) + adv_accuracy = np.mean(test_adv_detection) + + assert clean_accuracy > 0.0 + assert adv_accuracy > 0.0 + + except ARTTestException as e: + art_warning(e) + + +if __name__ == "__main__": + + test_beyond_detector() diff --git a/tests/estimators/classification/test_scikitlearn.py b/tests/estimators/classification/test_scikitlearn.py index 56ecf30e57..7fbcfe87b4 100644 --- a/tests/estimators/classification/test_scikitlearn.py +++ b/tests/estimators/classification/test_scikitlearn.py @@ -47,6 +47,7 @@ ScikitlearnSVC, ) from art.estimators.classification.scikitlearn import SklearnClassifier +from art.utils import check_and_transform_label_format from tests.utils import TestBase, master_seed @@ -80,6 +81,28 @@ def test_save(self): def test_clone_for_refitting(self): _ = self.classifier.clone_for_refitting() + def test_multi_label(self): + x_train = self.x_train_iris + y_train = self.y_train_iris + x_test = self.x_test_iris + y_test = self.y_test_iris + + # make multi-label binary + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + y_test = np.column_stack((y_test, y_test, y_test)) + y_test[y_test > 1] = 1 + + underlying_model = DecisionTreeClassifier() + underlying_model.fit(x_train, y_train) + model = ScikitlearnDecisionTreeClassifier(model=underlying_model) + + pred = model.predict(x_test) + assert pred[0].shape[0] == x_test.shape[0] + assert isinstance(model.nb_classes, np.ndarray) + with self.assertRaises(TypeError): + check_and_transform_label_format(y_train, nb_classes=model.nb_classes) + class TestScikitlearnExtraTreeClassifier(TestBase): @classmethod diff --git a/utils/resources/models/resnet_c10.pth b/utils/resources/models/resnet_c10.pth new file mode 100644 index 0000000000..6aa6ceea55 Binary files /dev/null and b/utils/resources/models/resnet_c10.pth differ diff --git a/utils/resources/models/simsiam_c10.pth b/utils/resources/models/simsiam_c10.pth new file mode 100644 index 0000000000..dac25be33f Binary files /dev/null and b/utils/resources/models/simsiam_c10.pth differ