From 460ff9b29f6ad05d6703382e28a4b4a53da5ac77 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Wed, 28 Feb 2024 14:28:20 -0800 Subject: [PATCH 01/25] Export AnomalyDetector --- src/cupbearer/detectors/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cupbearer/detectors/__init__.py b/src/cupbearer/detectors/__init__.py index 93df129f..04ea74ab 100644 --- a/src/cupbearer/detectors/__init__.py +++ b/src/cupbearer/detectors/__init__.py @@ -1,5 +1,6 @@ # ruff: noqa: F401 from .abstraction import AbstractionDetectorConfig +from .anomaly_detector import AnomalyDetector from .config import DetectorConfig, StoredDetector from .finetuning import FinetuningConfig from .statistical import ( From dbae3bfcb75510cb88935971b3ad57184a204dba Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Wed, 28 Feb 2024 17:31:46 -0800 Subject: [PATCH 02/25] Make tasks more flexible --- src/cupbearer/data/__init__.py | 5 +- src/cupbearer/data/_shared.py | 49 +++-- src/cupbearer/data/adversarial.py | 44 ++-- src/cupbearer/data/backdoor_data.py | 5 + src/cupbearer/data/pytorch.py | 9 + .../abstraction/abstraction_detector.py | 7 +- src/cupbearer/detectors/anomaly_detector.py | 30 +-- src/cupbearer/detectors/finetuning.py | 13 +- .../statistical/mahalanobis_detector.py | 4 +- .../detectors/statistical/que_detector.py | 4 +- .../statistical/spectral_detector.py | 4 +- .../detectors/statistical/statistical.py | 32 ++- .../scripts/conf/eval_detector_conf.py | 4 +- .../scripts/conf/train_detector_conf.py | 4 +- src/cupbearer/scripts/eval_detector.py | 4 +- src/cupbearer/scripts/train_detector.py | 36 ++-- src/cupbearer/tasks/__init__.py | 2 +- src/cupbearer/tasks/_config.py | 202 ++++++++---------- src/cupbearer/tasks/adversarial_examples.py | 21 +- src/cupbearer/tasks/backdoor_detection.py | 32 +-- src/cupbearer/utils/train.py | 4 +- tests/test_data.py | 11 +- tests/test_detectors.py | 4 +- tests/test_pipeline.py | 28 +-- 24 files changed, 285 insertions(+), 273 deletions(-) diff --git a/src/cupbearer/data/__init__.py b/src/cupbearer/data/__init__.py index a92441b9..1cf8f1a7 100644 --- a/src/cupbearer/data/__init__.py +++ b/src/cupbearer/data/__init__.py @@ -1,9 +1,8 @@ # ruff: noqa: F401 from ._shared import ( DatasetConfig, - RemoveMixLabelDataset, - TestDataConfig, - TestDataMix, + MixedData, + MixedDataConfig, TrainDataFromRun, ) from .adversarial import AdversarialExampleConfig diff --git a/src/cupbearer/data/_shared.py b/src/cupbearer/data/_shared.py index 8d5df10d..e6a9a65e 100644 --- a/src/cupbearer/data/_shared.py +++ b/src/cupbearer/data/_shared.py @@ -24,6 +24,10 @@ class DatasetConfig(BaseConfig, ABC): def num_classes(self) -> int: # type: ignore pass + def get_test_split(self) -> "DatasetConfig": + # Not every dataset will define this + raise NotImplementedError + def get_transforms(self) -> list[Transform]: """Return a list of transforms that should be applied to this dataset. @@ -68,6 +72,9 @@ def __getitem__(self, index): class TrainDataFromRun(DatasetConfig): path: Path + def get_test_split(self) -> DatasetConfig: + return self.cfg.get_test_split() + def __post_init__(self): self._cfg = None @@ -95,16 +102,18 @@ def get_transforms(self) -> list[Transform]: return transforms -class TestDataMix(Dataset): +class MixedData(Dataset): def __init__( self, normal: Dataset, anomalous: Dataset, normal_weight: float = 0.5, + return_anomaly_labels: bool = True, ): self.normal_data = normal self.anomalous_data = anomalous self.normal_weight = normal_weight + self.return_anomaly_labels = return_anomaly_labels self._length = min( int(len(normal) / normal_weight), int(len(anomalous) / (1 - normal_weight)) ) @@ -116,23 +125,36 @@ def __len__(self): def __getitem__(self, index): if index < self.normal_len: - return self.normal_data[index], 0 + if self.return_anomaly_labels: + return self.normal_data[index], 0 + return self.normal_data[index] else: - return self.anomalous_data[index - self.normal_len], 1 + if self.return_anomaly_labels: + return self.anomalous_data[index - self.normal_len], 1 + return self.anomalous_data[index - self.normal_len] @dataclass -class TestDataConfig(DatasetConfig): +class MixedDataConfig(DatasetConfig): normal: DatasetConfig anomalous: DatasetConfig normal_weight: float = 0.5 + return_anomaly_labels: bool = True + + def get_test_split(self) -> "MixedDataConfig": + return MixedDataConfig( + normal=self.normal.get_test_split(), + anomalous=self.anomalous.get_test_split(), + normal_weight=self.normal_weight, + return_anomaly_labels=self.return_anomaly_labels, + ) @property def num_classes(self): assert (n := self.normal.num_classes) == self.anomalous.num_classes return n - def build(self) -> TestDataMix: + def build(self) -> MixedData: # We need to override this method because max_size needs to be applied in a # different way: TestDataMix just has normal data first and then anomalous data, # if we just used a Subset with indices 1...n, we'd get an incorrect ratio. @@ -145,22 +167,11 @@ def build(self) -> TestDataMix: anomalous_size = self.max_size - normal_size assert anomalous_size <= len(anomalous) anomalous = Subset(anomalous, range(anomalous_size)) - dataset = TestDataMix(normal, anomalous, self.normal_weight) + dataset = MixedData( + normal, anomalous, self.normal_weight, self.return_anomaly_labels + ) # We don't want to return a TransformDataset here. Transforms should be applied # directly to the normal and anomalous data. if self.transforms: raise ValueError("Transforms are not supported for TestDataConfig.") return dataset - - -class RemoveMixLabelDataset(Dataset): - """Help class to only return the first element of each item""" - - def __init__(self, dataset: Dataset): - self._dataset = dataset - - def __len__(self): - return len(self._dataset) - - def __getitem__(self, index): - return self._dataset[index][0] diff --git a/src/cupbearer/data/adversarial.py b/src/cupbearer/data/adversarial.py index 75fbb3c3..efad5001 100644 --- a/src/cupbearer/data/adversarial.py +++ b/src/cupbearer/data/adversarial.py @@ -17,19 +17,27 @@ def make_adversarial_example( path: Path, + filename: str, batch_size: int = 128, eps: float = 8 / 255, max_examples: Optional[int] = None, success_threshold: float = 0.1, steps: int = 40, + use_test_data: bool = False, ): - save_path = path / "adv_examples.pt" + save_path = path / f"{filename}.pt" if os.path.exists(save_path): logger.info("Adversarial examples already exist, skipping attack") return + else: + logger.info( + "Adversarial examples not found, running attack with default settings" + ) model_cfg = StoredModel(path=path) data_cfg = TrainDataFromRun(path=path) + if use_test_data: + data_cfg = data_cfg.get_test_split() dataset = data_cfg.build() if max_examples: @@ -76,22 +84,24 @@ class AdversarialExampleConfig(DatasetConfig): success_threshold: float = 0.1 steps: int = 40 eps: float = 8 / 255 + use_test_data: bool = False def _build(self) -> Dataset: - if not (self.path / "adv_examples").exists(): - logger.info( - "Adversarial examples not found, running attack with default settings" - ) - make_adversarial_example( - path=self.path, - batch_size=self.attack_batch_size, - eps=self.eps, - max_examples=self.max_size, - success_threshold=self.success_threshold, - steps=self.steps, - ) + filename = f"adv_examples_{'test' if self.use_test_data else 'train'}" + make_adversarial_example( + path=self.path, + filename=filename, + batch_size=self.attack_batch_size, + eps=self.eps, + max_examples=self.max_size, + success_threshold=self.success_threshold, + steps=self.steps, + use_test_data=self.use_test_data, + ) - return AdversarialExampleDataset(base_run=self.path, num_examples=self.max_size) + return AdversarialExampleDataset( + filepath=self.path / filename, num_examples=self.max_size + ) @property def num_classes(self): @@ -100,10 +110,8 @@ def num_classes(self): class AdversarialExampleDataset(Dataset): - def __init__(self, base_run: Path, num_examples=None): - self.base_run = base_run - - data = utils.load(base_run / "adv_examples") + def __init__(self, filepath: Path, num_examples=None): + data = utils.load(filepath) assert isinstance(data, dict) self.examples = data["adv_inputs"] self.labels = data["labels"] diff --git a/src/cupbearer/data/backdoor_data.py b/src/cupbearer/data/backdoor_data.py index a75f370d..44e79c4b 100644 --- a/src/cupbearer/data/backdoor_data.py +++ b/src/cupbearer/data/backdoor_data.py @@ -12,6 +12,11 @@ class BackdoorData(DatasetConfig): original: DatasetConfig backdoor: Backdoor + def get_test_split(self) -> DatasetConfig: + return BackdoorData( + original=self.original.get_test_split(), backdoor=self.backdoor + ) + @property def num_classes(self): return self.original.num_classes diff --git a/src/cupbearer/data/pytorch.py b/src/cupbearer/data/pytorch.py index 95725f09..804ce80e 100644 --- a/src/cupbearer/data/pytorch.py +++ b/src/cupbearer/data/pytorch.py @@ -1,3 +1,4 @@ +import dataclasses from dataclasses import dataclass from torch.utils.data import Dataset @@ -25,6 +26,14 @@ class PytorchConfig(DatasetConfig): transforms: dict[str, Transform] = mutable_field({"to_tensor": ToTensor()}) default_augmentations: bool = True + def get_test_split(self) -> DatasetConfig: + if self.train: + # TODO: this will keep the augmentations around, + # which we probably don't want? + return dataclasses.replace(self, train=False) + else: + raise ValueError("This dataset is already a test split.") + def __post_init__(self): super().__post_init__() if self.default_augmentations and self.train: diff --git a/src/cupbearer/detectors/abstraction/abstraction_detector.py b/src/cupbearer/detectors/abstraction/abstraction_detector.py index 34062256..df49ddea 100644 --- a/src/cupbearer/detectors/abstraction/abstraction_detector.py +++ b/src/cupbearer/detectors/abstraction/abstraction_detector.py @@ -146,11 +146,14 @@ def should_train_on_clean_data(self) -> bool: def train( self, - dataset, + trusted_data, + untrusted_data, *, num_classes: int, train_config: TrainConfig, ): + if trusted_data is None: + raise ValueError("Abstraction detector requires trusted training data.") # Possibly we should store this as a submodule to save optimizers and continue # training later. But as long as we don't actually make use of that, # this is easiest. @@ -160,7 +163,7 @@ def train( optim_cfg=train_config.optimizer, ) - train_loader = train_config.get_dataloader(dataset) + train_loader = train_config.get_dataloader(trusted_data) # TODO: implement validation data # val_loaders = { diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index e70e5d50..4fbb8724 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -13,7 +13,7 @@ from torch.utils.data import DataLoader, Dataset from tqdm.auto import tqdm -from cupbearer.data import TestDataMix +from cupbearer.data import MixedData from cupbearer.models.models import HookedModel from cupbearer.utils import utils @@ -33,18 +33,19 @@ def __init__( self.trained = False - @property @abstractmethod - def should_train_on_clean_data(self) -> bool: - pass - - @property - def should_train_on_poisoned_data(self) -> bool: - return not self.should_train_on_clean_data + def train( + self, + trusted_data: Dataset | None, + untrusted_data: Dataset | None, + *, + num_classes: int, + train_config: utils.BaseConfig, + ): + """Train the anomaly detector with the given datasets. - @abstractmethod - def train(self, dataset, *, num_classes: int, train_config: utils.BaseConfig): - """Train the anomaly detector with the given dataset as "normal" data.""" + At least one of trusted_data or untrusted_data must be provided. + """ @contextmanager def finetune(self, **kwargs): @@ -91,15 +92,18 @@ def eval( self, # Don't need train_dataset here, but e.g. adversarial abstractions need it, # and in general there's no reason to deny detectors access to it during eval. + # TODO: I think we can/should remove this and require detectors to handle + # anything involving training data during training (now that they get access + # to untrusted data then). train_dataset: Dataset, - test_dataset: TestDataMix, + test_dataset: MixedData, histogram_percentile: float = 95, num_bins: int = 100, pbar: bool = False, ): # Check this explicitly because otherwise things can break in weird ways # when we assume that anomaly labels are included. - assert isinstance(test_dataset, TestDataMix), type(test_dataset) + assert isinstance(test_dataset, MixedData), type(test_dataset) test_loader = DataLoader( test_dataset, diff --git a/src/cupbearer/detectors/finetuning.py b/src/cupbearer/detectors/finetuning.py index d76f512b..24ff4bbd 100644 --- a/src/cupbearer/detectors/finetuning.py +++ b/src/cupbearer/detectors/finetuning.py @@ -19,26 +19,25 @@ def __init__(self, model, max_batch_size, save_path): # detector or load weights for inference, we'll need to copy in both cases. self.finetuned_model = copy.deepcopy(self.model) - @property - def should_train_on_clean_data(self) -> bool: - return True - def train( self, - clean_dataset, + trusted_data, + untrusted_data, *, num_classes: int, train_config: TrainConfig, ): + if trusted_data is None: + raise ValueError("Finetuning detector requires trusted training data.") classifier = Classifier( self.finetuned_model, num_classes=num_classes, - optim_cfg=train_config, + optim_cfg=train_config.optimizer, save_hparams=False, ) # Create a DataLoader for the clean dataset - clean_loader = train_config.get_dataloader(clean_dataset) + clean_loader = train_config.get_dataloader(trusted_data) # Finetune the model on the clean dataset trainer = train_config.get_trainer(path=self.save_path) diff --git a/src/cupbearer/detectors/statistical/mahalanobis_detector.py b/src/cupbearer/detectors/statistical/mahalanobis_detector.py index d18e8835..b91d4287 100644 --- a/src/cupbearer/detectors/statistical/mahalanobis_detector.py +++ b/src/cupbearer/detectors/statistical/mahalanobis_detector.py @@ -8,9 +8,7 @@ class MahalanobisDetector(ActivationCovarianceBasedDetector): - @property - def should_train_on_clean_data(self) -> bool: - return True + use_trusted: bool = True def post_covariance_training(self, train_config: MahalanobisTrainConfig): self.inv_covariances = { diff --git a/src/cupbearer/detectors/statistical/que_detector.py b/src/cupbearer/detectors/statistical/que_detector.py index a3d4cf2f..7bc8dd15 100644 --- a/src/cupbearer/detectors/statistical/que_detector.py +++ b/src/cupbearer/detectors/statistical/que_detector.py @@ -8,9 +8,7 @@ class QuantumEntropyDetector(ActivationCovarianceBasedDetector): - @property - def should_train_on_clean_data(self) -> bool: - return True + use_trusted: bool = True def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig): whitening_matrices = {} diff --git a/src/cupbearer/detectors/statistical/spectral_detector.py b/src/cupbearer/detectors/statistical/spectral_detector.py index eaf74a3a..7774721d 100644 --- a/src/cupbearer/detectors/statistical/spectral_detector.py +++ b/src/cupbearer/detectors/statistical/spectral_detector.py @@ -13,9 +13,7 @@ class SpectralSignatureDetector(ActivationCovarianceBasedDetector): Neural Information Processing Systems (2018). """ - @property - def should_train_on_clean_data(self) -> bool: - return False + use_trusted: bool = False def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig): # Calculate top right singular vectors from covariance matrices diff --git a/src/cupbearer/detectors/statistical/statistical.py b/src/cupbearer/detectors/statistical/statistical.py index 80914c8d..2e686777 100644 --- a/src/cupbearer/detectors/statistical/statistical.py +++ b/src/cupbearer/detectors/statistical/statistical.py @@ -2,7 +2,7 @@ from dataclasses import dataclass import torch -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, Dataset from tqdm import tqdm from cupbearer.detectors.anomaly_detector import ActivationBasedDetector @@ -20,7 +20,7 @@ class StatisticalTrainConfig(BaseConfig, ABC): # robust: bool = False # TODO spectre uses # https://www.semanticscholar.org/paper/Being-Robust-(in-High-Dimensions)-Can-Be-Practical-Diakonikolas-Kamath/2a6de51d86f13e9eb7efa85491682dad0ccd65e8?utm_source=direct_link - def get_dataloader(self, dataset, train=True): + def get_dataloader(self, dataset: Dataset, train=True): if train: return DataLoader( dataset, @@ -39,7 +39,7 @@ def get_dataloader(self, dataset, train=True): @dataclass class DebugStatisticalTrainConfig(StatisticalTrainConfig): - max_batchs: int = 3 + max_batches: int = 3 batch_size: int = 5 max_batch_size: int = 5 @@ -67,6 +67,8 @@ class DebugMahalanobisTrainConfig(DebugStatisticalTrainConfig, MahalanobisTrainC class StatisticalDetector(ActivationBasedDetector, ABC): + use_trusted: bool + @abstractmethod def init_variables(self, activation_sizes: dict[str, torch.Size]): pass @@ -77,7 +79,8 @@ def batch_update(self, activations: dict[str, torch.Tensor]): def train( self, - dataset, + trusted_data, + untrusted_data, *, num_classes: int, train_config: StatisticalTrainConfig, @@ -85,7 +88,20 @@ def train( # Common for statistical methods is that the training does not require # gradients, but instead computes summary statistics or similar with torch.inference_mode(): - data_loader = train_config.get_dataloader(dataset) + if self.use_trusted: + if trusted_data is None: + raise ValueError( + f"{self.__class__.__name__} requires trusted training data." + ) + data = trusted_data + else: + if untrusted_data is None: + raise ValueError( + f"{self.__class__.__name__} requires untrusted training data." + ) + data = untrusted_data + + data_loader = train_config.get_dataloader(data) example_batch = next(iter(data_loader)) _, example_activations = self.get_activations(example_batch) @@ -131,13 +147,15 @@ def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig def train( self, - dataset, + trusted_data, + untrusted_data, *, num_classes: int, train_config: ActivationCovarianceTrainConfig, ): super().train( - dataset, + trusted_data=trusted_data, + untrusted_data=untrusted_data, num_classes=num_classes, train_config=train_config, ) diff --git a/src/cupbearer/scripts/conf/eval_detector_conf.py b/src/cupbearer/scripts/conf/eval_detector_conf.py index 980c768a..ca5f7d2c 100644 --- a/src/cupbearer/scripts/conf/eval_detector_conf.py +++ b/src/cupbearer/scripts/conf/eval_detector_conf.py @@ -1,13 +1,13 @@ from dataclasses import dataclass from cupbearer.detectors import DetectorConfig, StoredDetector -from cupbearer.tasks import TaskConfigBase +from cupbearer.tasks import TaskConfig from cupbearer.utils.scripts import ScriptConfig @dataclass(kw_only=True) class Config(ScriptConfig): - task: TaskConfigBase + task: TaskConfig detector: DetectorConfig | None = None save_config: bool = False pbar: bool = False diff --git a/src/cupbearer/scripts/conf/train_detector_conf.py b/src/cupbearer/scripts/conf/train_detector_conf.py index ed854e32..0b51379c 100644 --- a/src/cupbearer/scripts/conf/train_detector_conf.py +++ b/src/cupbearer/scripts/conf/train_detector_conf.py @@ -1,11 +1,11 @@ from dataclasses import dataclass from cupbearer.detectors import DetectorConfig -from cupbearer.tasks import TaskConfigBase +from cupbearer.tasks import TaskConfig from cupbearer.utils.scripts import ScriptConfig @dataclass(kw_only=True) class Config(ScriptConfig): - task: TaskConfigBase + task: TaskConfig detector: DetectorConfig diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index f9e87b74..fe20b245 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -6,8 +6,8 @@ def main(cfg: Config): assert cfg.detector is not None # make type checker happy # Init - train_data = cfg.task.build_train_data() - test_data = cfg.task.build_test_data() + train_data = cfg.task.trusted_data.build() + test_data = cfg.task.test_data.build() # train_data[0] is the first sample, which is (input, ...), so we need another [0] example_input = train_data[0][0] model = cfg.task.build_model(input_shape=example_input.shape) diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index 3f0dd720..a0e099c7 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -1,5 +1,3 @@ -import warnings - from cupbearer.utils.scripts import script from . import EvalDetectorConfig, eval_detector @@ -8,29 +6,27 @@ @script def main(cfg: Config): - reference_data = cfg.task.build_train_data() - # reference_data[0] is the first sample, which is (input, ...), so we need another + trusted_data = untrusted_data = None + + if cfg.task.allow_trusted: + trusted_data = cfg.task.trusted_data.build() + if cfg.task.allow_untrusted: + untrusted_data = cfg.task.untrusted_data.build() + + example_data = trusted_data or untrusted_data + if example_data is None: + raise ValueError( + f"{type(cfg.task).__name__} does not allow trusted nor untrusted data." + ) + # example_data[0] is the first sample, which is (input, ...), so we need another # [0] index - example_input = reference_data[0][0] + example_input = example_data[0][0] model = cfg.task.build_model(input_shape=example_input.shape) detector = cfg.detector.build(model=model, save_dir=cfg.path) - if cfg.task.normal_weight_when_training < 1.0: - if not detector.should_train_on_poisoned_data: - warnings.warn( - f"Detector of type {type(detector).__name__} is not meant" - + " to be trained on poisoned samples." - ) - else: - if not detector.should_train_on_clean_data: - warnings.warn( - f"Detector of type {type(detector).__name__} is not meant" - + " to be trained without poisoned samples." - ) - - # We want to convert the train dataclass to a dict, but *not* recursively. detector.train( - reference_data, + trusted_data=trusted_data, + untrusted_data=untrusted_data, num_classes=cfg.task.num_classes, train_config=cfg.detector.train, ) diff --git a/src/cupbearer/tasks/__init__.py b/src/cupbearer/tasks/__init__.py index 60328543..09baff94 100644 --- a/src/cupbearer/tasks/__init__.py +++ b/src/cupbearer/tasks/__init__.py @@ -1,5 +1,5 @@ # ruff: noqa: F401 -from ._config import CustomTask, TaskConfig, TaskConfigBase +from ._config import CustomTask, TaskConfig from .adversarial_examples import AdversarialExampleTask from .backdoor_detection import BackdoorDetection from .toy_features import ToyFeaturesTask diff --git a/src/cupbearer/tasks/_config.py b/src/cupbearer/tasks/_config.py index 54dae1b9..525e688f 100644 --- a/src/cupbearer/tasks/_config.py +++ b/src/cupbearer/tasks/_config.py @@ -1,150 +1,130 @@ -from abc import ABC, abstractmethod, abstractproperty -from copy import deepcopy +from abc import ABC from dataclasses import dataclass from typing import Optional -from torch.utils.data import Dataset - from cupbearer.data import ( DatasetConfig, - RemoveMixLabelDataset, - TestDataConfig, - TestDataMix, + MixedDataConfig, ) from cupbearer.models import ModelConfig from cupbearer.models.models import HookedModel -from cupbearer.utils.utils import BaseConfig @dataclass(kw_only=True) -class TaskConfigBase(BaseConfig, ABC): - @abstractmethod - def build_train_data(self) -> Dataset: - pass - - @abstractmethod - def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: - pass - - @abstractmethod - def build_test_data(self) -> TestDataMix: - pass - - @abstractproperty - def num_classes(self) -> int: # type: ignore - pass - +class TaskConfig(ABC): + # Proportion of clean data in untrusted datasets: + clean_test_weight: float = 0.5 + clean_train_weight: float = 0.5 + # Whether to allow using trusted and untrusted data for training: + allow_trusted: bool = True + allow_untrusted: bool = True -@dataclass(kw_only=True) -class TaskConfig(TaskConfigBase, ABC): - normal_weight: float = 0.5 - normal_weight_when_training: float = 1.0 max_train_size: Optional[int] = None max_test_size: Optional[int] = None def __post_init__(self): # We'll only actually instantiate these when we need them, in case relevant # attributes get changed after initialization. - self._train_data: Optional[DatasetConfig] = None - self._test_data: Optional[DatasetConfig] = None + + # TODO: I think this is no longer necessary after the config refactor. + self._trusted_data: Optional[DatasetConfig] = None + self._untrusted_data: Optional[DatasetConfig] = None + self._test_data: Optional[MixedDataConfig] = None self._model: Optional[ModelConfig] = None - @abstractmethod - def _init_train_data(self): - pass - - def _get_normal_test_data(self) -> DatasetConfig: - # Default implementation: just use the training data, but the test split - # if possible. May be overridden, e.g. if normal test data is meant to be - # harder or otherwise out-of-distribution. - if not self._train_data: - self._init_train_data() - assert self._train_data is not None, "init_train_data must set _train_data" - normal = deepcopy(self._train_data) - if hasattr(normal, "train"): - # TODO: this is a bit of a hack, maybe there should be a nicer interface - # for this. - normal.train = False # type: ignore - - return normal - - @abstractmethod - def _get_anomalous_test_data(self) -> DatasetConfig: - pass - - @abstractmethod - def _init_model(self): - pass - - def build_train_data(self) -> Dataset: - if not self._train_data: - self._init_train_data() - assert self._train_data is not None, "init_train_data must set _train_data" - self._train_data.max_size = self.max_train_size - - if self.normal_weight_when_training == 1.0: - return self._train_data.build() - else: - # E.g. SpectralDetector should use poisoned data when training - anomalous = self._get_anomalous_test_data() - - # As TestDataMix adds a label for poisoned or not, we remove this here - train_data = RemoveMixLabelDataset( - TestDataConfig( - normal=self._train_data, - anomalous=anomalous, - normal_weight=self.normal_weight_when_training, - ).build() + def _get_clean_data(self, train: bool) -> DatasetConfig: + raise NotImplementedError + + def _get_anomalous_data(self, train: bool) -> DatasetConfig: + raise NotImplementedError + + def _get_model(self) -> ModelConfig: + raise NotImplementedError + + @property + def trusted_data(self) -> DatasetConfig: + """Clean data that may be used for training.""" + if not self.allow_trusted: + raise ValueError( + "Using trusted training data is not allowed for this task." ) - return train_data + if not self._trusted_data: + self._trusted_data = self._get_clean_data(train=True) + self._trusted_data.max_size = self.max_train_size + return self._trusted_data + + @property + def untrusted_data(self) -> DatasetConfig: + """A mix of clean and anomalous data that may be used for training.""" + if not self.allow_untrusted: + raise ValueError( + "Using untrusted training data is not allowed for this task." + ) + if not self._untrusted_data: + anomalous_data = self._get_anomalous_data(train=True) + clean_data = self._get_clean_data(train=True) + self._untrusted_data = MixedDataConfig( + normal=clean_data, + anomalous=anomalous_data, + normal_weight=self.clean_train_weight, + max_size=self.max_train_size, + return_anomaly_labels=False, + ) + return self._untrusted_data def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: if not self._model: - self._init_model() - assert self._model is not None, "init_model must set _model" + self._model = self._get_model() return self._model.build_model(input_shape) - def build_test_data(self) -> TestDataMix: - normal = self._get_normal_test_data() - anomalous = self._get_anomalous_test_data() - self._test_data = TestDataConfig( - normal=normal, - anomalous=anomalous, - normal_weight=self.normal_weight, - max_size=self.max_test_size, - ) - return self._test_data.build() + @property + def test_data(self) -> MixedDataConfig: + if not self._test_data: + normal = self._get_clean_data(train=False) + anomalous = self._get_anomalous_data(train=False) + self._test_data = MixedDataConfig( + normal=normal, + anomalous=anomalous, + normal_weight=self.clean_test_weight, + max_size=self.max_test_size, + ) + return self._test_data @property def num_classes(self): - if not self._train_data: - self._init_train_data() - assert self._train_data is not None, "init_train_data must set _train_data" - return self._train_data.num_classes + try: + return self.trusted_data.num_classes + except ValueError: + return self.untrusted_data.num_classes -@dataclass(kw_only=True) +@dataclass class CustomTask(TaskConfig): """A fully customizable task config, where all datasets are specified directly.""" - train_data: DatasetConfig - anomalous_data: DatasetConfig - normal_test_data: Optional[DatasetConfig] = None + clean_test_data: DatasetConfig + anomalous_test_data: DatasetConfig model: ModelConfig + clean_train_data: Optional[DatasetConfig] = None + anomalous_train_data: Optional[DatasetConfig] = None - def _init_train_data(self): - self._train_data = self.train_data - - def _get_anomalous_test_data(self) -> DatasetConfig: - return self.anomalous_data - - def _get_normal_test_data(self) -> DatasetConfig: - if self.normal_test_data: - return self.normal_test_data - return super()._get_normal_test_data() - - def _init_model(self): - self._model = self.model + def __post_init__(self): + super(CustomTask, self).__post_init__() + self.allow_trusted = self.clean_train_data is not None + self.allow_untrusted = self.anomalous_train_data is not None + + def _get_clean_data(self, train: bool) -> DatasetConfig: + # This is a bit of a hack because it might return `None`, but that only + # becomes important if illegal training data is used. + return self.clean_train_data if train else self.clean_test_data + + def _get_anomalous_data(self, train: bool) -> DatasetConfig: + # This is a bit of a hack because it might return `None`, but that only + # becomes important if illegal training data is used. + return self.anomalous_train_data if train else self.anomalous_test_data + + def _get_model(self) -> ModelConfig: + return self.model @dataclass(kw_only=True) diff --git a/src/cupbearer/tasks/adversarial_examples.py b/src/cupbearer/tasks/adversarial_examples.py index f8196d76..907967d2 100644 --- a/src/cupbearer/tasks/adversarial_examples.py +++ b/src/cupbearer/tasks/adversarial_examples.py @@ -2,9 +2,8 @@ from dataclasses import dataclass from pathlib import Path -from cupbearer.data._shared import TrainDataFromRun -from cupbearer.data.adversarial import AdversarialExampleConfig -from cupbearer.models import StoredModel +from cupbearer.data import AdversarialExampleConfig, DatasetConfig, TrainDataFromRun +from cupbearer.models import ModelConfig, StoredModel from ._config import DebugTaskConfig, TaskConfig @@ -17,15 +16,18 @@ class AdversarialExampleTask(TaskConfig): steps: int = 40 eps: float = 8 / 255 - def _init_train_data(self): - self._train_data = TrainDataFromRun(path=self.path) + def _get_clean_data(self, train: bool) -> DatasetConfig: + if train: + return TrainDataFromRun(path=self.path) + else: + return TrainDataFromRun(path=self.path).get_test_split() - def _get_anomalous_test_data(self): + def _get_anomalous_data(self, train: bool) -> DatasetConfig: max_size = None if self.max_test_size: # This isn't strictly necessary, but it lets us avoid generating more # adversarial examples than needed. - max_size = math.ceil(self.max_test_size * (1 - self.normal_weight)) + max_size = math.ceil(self.max_test_size * (1 - self.clean_test_weight)) return AdversarialExampleConfig( path=self.path, max_size=max_size, @@ -33,10 +35,11 @@ def _get_anomalous_test_data(self): success_threshold=self.success_threshold, steps=self.steps, eps=self.eps, + use_test_data=not train, ) - def _init_model(self): - self._model = StoredModel(path=self.path) + def _get_model(self) -> ModelConfig: + return StoredModel(path=self.path) @dataclass(kw_only=True) diff --git a/src/cupbearer/tasks/backdoor_detection.py b/src/cupbearer/tasks/backdoor_detection.py index b1d05762..0291e847 100644 --- a/src/cupbearer/tasks/backdoor_detection.py +++ b/src/cupbearer/tasks/backdoor_detection.py @@ -1,10 +1,9 @@ -from copy import deepcopy from dataclasses import dataclass from pathlib import Path -from cupbearer.data import Backdoor +from cupbearer.data import Backdoor, DatasetConfig from cupbearer.data.backdoor_data import BackdoorData -from cupbearer.models import StoredModel +from cupbearer.models import ModelConfig, StoredModel from cupbearer.utils.scripts import load_config from ._config import DebugTaskConfig, TaskConfig @@ -16,19 +15,28 @@ class BackdoorDetection(TaskConfig): backdoor: Backdoor no_load: bool = False - def _init_train_data(self): - data_cfg = load_config(self.path, "train_data", BackdoorData) - # Remove the backdoor - self._train_data = data_cfg.original + def __post_init__(self): + super().__post_init__() + self.backdoored_train_data = load_config(self.path, "train_data", BackdoorData) - def _get_anomalous_test_data(self): - copy = deepcopy(self._train_data) + def _get_clean_data(self, train: bool) -> DatasetConfig: + if train: + return self.backdoored_train_data.original + else: + return self.backdoored_train_data.original.get_test_split() + + def _get_anomalous_data(self, train: bool) -> DatasetConfig: if not self.no_load: self.backdoor.load(self.path) - return BackdoorData(original=copy, backdoor=self.backdoor) - def _init_model(self): - self._model = StoredModel(path=self.path) + # TODO: should we get rid of `self.backdoor` and just use the existing one + # from the training run? + return BackdoorData( + original=self._get_clean_data(train), backdoor=self.backdoor + ) + + def _get_model(self) -> ModelConfig: + return StoredModel(path=self.path) @dataclass diff --git a/src/cupbearer/utils/train.py b/src/cupbearer/utils/train.py index 7f827b7e..a87b9b6d 100644 --- a/src/cupbearer/utils/train.py +++ b/src/cupbearer/utils/train.py @@ -4,7 +4,7 @@ import lightning as L from lightning.pytorch import callbacks, loggers -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, Dataset from cupbearer.utils.optimizers import OptimizerConfig from cupbearer.utils.utils import BaseConfig @@ -37,7 +37,7 @@ def callbacks(self): return callback_list - def get_dataloader(self, dataset, train=True): + def get_dataloader(self, dataset: Dataset, train=True): if train: return DataLoader( dataset, diff --git a/tests/test_data.py b/tests/test_data.py index fcc05d78..66a31b5f 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -3,9 +3,6 @@ import numpy as np import pytest import torch - -# We shouldn't import TestDataMix directly because that will make pytest think -# it's a test. from cupbearer import data from torch.utils.data import DataLoader, Dataset from torchvision.transforms.functional import InterpolationMode @@ -87,7 +84,7 @@ def anomalous_dataset(): @pytest.fixture def mixed_dataset(clean_dataset, anomalous_dataset): - return data.TestDataMix(clean_dataset, anomalous_dataset) + return data.MixedData(clean_dataset, anomalous_dataset) @pytest.fixture @@ -102,7 +99,7 @@ def anomalous_config(): @pytest.fixture def mixed_config(clean_config, anomalous_config): - return data.TestDataConfig(clean_config, anomalous_config) + return data.MixedDataConfig(clean_config, anomalous_config) def test_len(mixed_dataset): @@ -118,7 +115,7 @@ def test_contents(mixed_dataset): def test_uneven_weight(clean_dataset, anomalous_dataset): - mixed_data = data.TestDataMix(clean_dataset, anomalous_dataset, normal_weight=0.3) + mixed_data = data.MixedData(clean_dataset, anomalous_dataset, normal_weight=0.3) # The 7 anomalous datapoints should be 70% of the dataset, so total length should # be 10. assert len(mixed_data) == 10 @@ -149,7 +146,7 @@ def test_mixed_max_size(clean_config, anomalous_config): anomalous_config.max_size = 23 # The actual mixed dataset we build now is the same as before: 10 datapoints, # 3 normal and 7 anomalous. - mixed_config = data.TestDataConfig(clean_config, anomalous_config) + mixed_config = data.MixedDataConfig(clean_config, anomalous_config) mixed_config.max_size = 10 mixed_config.normal_weight = 0.3 mixed_data = mixed_config.build() diff --git a/tests/test_detectors.py b/tests/test_detectors.py index 66964f52..5d53a2b4 100644 --- a/tests/test_detectors.py +++ b/tests/test_detectors.py @@ -51,7 +51,9 @@ def train_detector(self, dataset, Model, Detector, **kwargs): detector = Detector(model=model) detector.train( - dataset=dataset, + # Just make sure all detectors get the data they need: + trusted_data=dataset, + untrusted_data=dataset, num_classes=7, train_config=self.train_config, ) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f8be68c1..2efb3ced 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,9 +1,6 @@ -import warnings - import pytest import torch from cupbearer import data, detectors, models, tasks -from cupbearer.data import RemoveMixLabelDataset from cupbearer.scripts import ( eval_classifier, train_classifier, @@ -109,7 +106,7 @@ def test_train_mahalanobis_advex(backdoor_classifier_path, tmp_path): path=tmp_path, ) train_detector(cfg) - assert (backdoor_classifier_path / "adv_examples.pt").is_file() + assert (backdoor_classifier_path / "adv_examples_train.pt").is_file() assert (backdoor_classifier_path / "adv_examples.pdf").is_file() assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() @@ -127,39 +124,18 @@ def test_train_mahalanobis_advex(backdoor_classifier_path, tmp_path): detectors.DebugQuantumEntropyConfig, ], ) -@pytest.mark.parametrize("train_on_clean", [False, True]) -def test_train_statistical_backdoor( - backdoor_classifier_path, tmp_path, detector_type, train_on_clean -): +def test_train_statistical_backdoor(backdoor_classifier_path, tmp_path, detector_type): cfg = train_detector_conf.Config( task=tasks.backdoor_detection.DebugBackdoorDetection( path=backdoor_classifier_path, backdoor=data.CornerPixelBackdoor(), - normal_weight_when_training=1.0 if train_on_clean else 0.9, ), detector=detector_type(), path=tmp_path, ) train_detector(cfg) - # Check that data is mixed when it should be - assert train_on_clean ^ isinstance( - cfg.task.build_train_data(), RemoveMixLabelDataset - ) - # Train detector - warning_message = ( - r".*Detector of type \w+ is not meant to be trained \w+ poisoned samples[.].*" - ) - if train_on_clean ^ (detector_type != detectors.DebugSpectralSignatureConfig): - # Should warn for incompatibility - with pytest.warns(match=warning_message): - train_detector(cfg) - else: - # Should not warn for incompatibility - with warnings.catch_warnings(): - warnings.filterwarnings(action="error", message=warning_message) - train_detector(cfg) assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() # Eval outputs: From f16b9ca264d92d2dc0a338ad10d7a3bbf637eb40 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Thu, 29 Feb 2024 00:03:42 -0800 Subject: [PATCH 03/25] Iterating on tasks --- src/cupbearer/data/__init__.py | 2 + src/cupbearer/data/_shared.py | 68 ++++++++++- src/cupbearer/scripts/train_detector.py | 4 + src/cupbearer/tasks/_config.py | 130 +++++++++++++++++----- src/cupbearer/tasks/backdoor_detection.py | 35 +++--- src/cupbearer/utils/__init__.py | 2 +- 6 files changed, 191 insertions(+), 50 deletions(-) diff --git a/src/cupbearer/data/__init__.py b/src/cupbearer/data/__init__.py index 1cf8f1a7..479558ef 100644 --- a/src/cupbearer/data/__init__.py +++ b/src/cupbearer/data/__init__.py @@ -3,7 +3,9 @@ DatasetConfig, MixedData, MixedDataConfig, + SubsetConfig, TrainDataFromRun, + split_dataset_cfg, ) from .adversarial import AdversarialExampleConfig from .backdoor_data import BackdoorData diff --git a/src/cupbearer/data/_shared.py b/src/cupbearer/data/_shared.py index e6a9a65e..d004f22c 100644 --- a/src/cupbearer/data/_shared.py +++ b/src/cupbearer/data/_shared.py @@ -53,6 +53,70 @@ def _build(self) -> Dataset: raise NotImplementedError +@dataclass +class SubsetConfig(DatasetConfig): + full_dataset: DatasetConfig + start_fraction: float = 0.0 + end_fraction: float = 1.0 + + def __post_init__(self): + super().__post_init__() + if self.max_size: + raise ValueError( + "max_size should be set on the full dataset, not the subset." + ) + if self.start_fraction > self.end_fraction: + raise ValueError( + f"{self.start_fraction=} must be less than or equal " + f"to {self.end_fraction=}." + ) + if self.start_fraction < 0 or self.end_fraction > 1: + raise ValueError( + "Fractions must be between 0 and 1, " + f"got {self.start_fraction} and {self.end_fraction}." + ) + if self.transforms: + raise ValueError( + "Transforms should be applied to the full dataset, not the subset." + ) + + def _build(self) -> Dataset: + full = self.full_dataset.build() + start = int(self.start_fraction * len(full)) + end = int(self.end_fraction * len(full)) + return Subset(full, range(start, end)) + + def num_classes(self) -> int: # type: ignore + return self.full_dataset.num_classes + + def get_test_split(self) -> "DatasetConfig": + return SubsetConfig( + full_dataset=self.full_dataset.get_test_split(), + start_fraction=self.start_fraction, + end_fraction=self.end_fraction, + ) + + # Mustn't inherit get_transforms() from full_dataset, they're already applied + # to the full dataset on build. + + +def split_dataset_cfg(cfg: DatasetConfig, *fractions: float) -> list[SubsetConfig]: + if not fractions: + raise ValueError("At least one fraction must be provided.") + if not all(0 <= f <= 1 for f in fractions): + raise ValueError("Fractions must be between 0 and 1.") + if not sum(fractions) == 1: + fractions = fractions + (1 - sum(fractions),) + + subsets = [] + current_start = 0.0 + for fraction in fractions: + subsets.append(SubsetConfig(cfg, current_start, current_start + fraction)) + current_start += fraction + assert current_start == 1.0 + return subsets + + class TransformDataset(Dataset): """Dataset that applies a transform to another dataset.""" @@ -162,10 +226,10 @@ def build(self) -> MixedData: anomalous = self.anomalous.build() if self.max_size: normal_size = int(self.max_size * self.normal_weight) - assert normal_size <= len(normal) + normal_size = min(len(normal), normal_size) normal = Subset(normal, range(normal_size)) anomalous_size = self.max_size - normal_size - assert anomalous_size <= len(anomalous) + anomalous_size = min(len(anomalous), anomalous_size) anomalous = Subset(anomalous, range(anomalous_size)) dataset = MixedData( normal, anomalous, self.normal_weight, self.return_anomaly_labels diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index a0e099c7..f8641e3e 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -10,8 +10,12 @@ def main(cfg: Config): if cfg.task.allow_trusted: trusted_data = cfg.task.trusted_data.build() + if len(trusted_data) == 0: + trusted_data = None if cfg.task.allow_untrusted: untrusted_data = cfg.task.untrusted_data.build() + if len(untrusted_data) == 0: + untrusted_data = None example_data = trusted_data or untrusted_data if example_data is None: diff --git a/src/cupbearer/tasks/_config.py b/src/cupbearer/tasks/_config.py index 525e688f..b6309f2d 100644 --- a/src/cupbearer/tasks/_config.py +++ b/src/cupbearer/tasks/_config.py @@ -1,10 +1,12 @@ -from abc import ABC +from abc import ABC, abstractmethod +from copy import deepcopy from dataclasses import dataclass from typing import Optional from cupbearer.data import ( DatasetConfig, MixedDataConfig, + split_dataset_cfg, ) from cupbearer.models import ModelConfig from cupbearer.models.models import HookedModel @@ -32,10 +34,21 @@ def __post_init__(self): self._test_data: Optional[MixedDataConfig] = None self._model: Optional[ModelConfig] = None - def _get_clean_data(self, train: bool) -> DatasetConfig: + def _get_trusted_data(self) -> DatasetConfig: raise NotImplementedError - def _get_anomalous_data(self, train: bool) -> DatasetConfig: + def _get_clean_untrusted_data(self) -> DatasetConfig: + raise NotImplementedError + + def _get_anomalous_data(self) -> DatasetConfig: + raise NotImplementedError + + # The following two methods don't need to be implemented, the task will use + # get_test_split() on the untrusted data by default. + def _get_clean_test_data(self) -> DatasetConfig: + raise NotImplementedError + + def _get_anomalous_test_data(self) -> DatasetConfig: raise NotImplementedError def _get_model(self) -> ModelConfig: @@ -49,7 +62,7 @@ def trusted_data(self) -> DatasetConfig: "Using trusted training data is not allowed for this task." ) if not self._trusted_data: - self._trusted_data = self._get_clean_data(train=True) + self._trusted_data = deepcopy(self._get_trusted_data()) self._trusted_data.max_size = self.max_train_size return self._trusted_data @@ -61,8 +74,8 @@ def untrusted_data(self) -> DatasetConfig: "Using untrusted training data is not allowed for this task." ) if not self._untrusted_data: - anomalous_data = self._get_anomalous_data(train=True) - clean_data = self._get_clean_data(train=True) + anomalous_data = self._get_anomalous_data() + clean_data = self._get_clean_untrusted_data() self._untrusted_data = MixedDataConfig( normal=clean_data, anomalous=anomalous_data, @@ -80,11 +93,15 @@ def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: @property def test_data(self) -> MixedDataConfig: if not self._test_data: - normal = self._get_clean_data(train=False) - anomalous = self._get_anomalous_data(train=False) + try: + anomalous_data = self._get_anomalous_test_data() + clean_data = self._get_clean_test_data() + except NotImplementedError: + anomalous_data = self._get_anomalous_data().get_test_split() + clean_data = self._get_clean_untrusted_data().get_test_split() self._test_data = MixedDataConfig( - normal=normal, - anomalous=anomalous, + normal=clean_data, + anomalous=anomalous_data, normal_weight=self.clean_test_weight, max_size=self.max_test_size, ) @@ -99,29 +116,86 @@ def num_classes(self): @dataclass +class FuzzedTask(TaskConfig): + """A task where the anomalous inputs are some modified version of clean ones.""" + + trusted_fraction: float = 1.0 + + def __post_init__(self): + super().__post_init__() + + # First we get the base (unmodified) data and its test split. + train_data = self._get_base_data() + test_data = train_data.get_test_split() + + # We split the training data up into three parts: + # 1. A `trusted_fraction` part will be used as trusted data. + # 2. Out of the remaining part, a `clean_untrusted_fraction` part will be used + # as clean untrusted data. + # 3. The rest will be used as anomalous training data. + ( + self._trusted_data, + self._clean_untrusted_data, + _anomalous_base, + ) = split_dataset_cfg( + train_data, + self.trusted_fraction, + # Using clean_train_weight here means we'll end up using all our data, + # since this is also what's used later in the MixedDataConfig. + (1 - self.trusted_fraction) * self.clean_train_weight, + (1 - self.trusted_fraction) * (1 - self.clean_train_weight), + ) + + # Similarly, we plit up the test data, except there is no trusted subset. + self._clean_test_data, _anomalous_test_base = split_dataset_cfg( + test_data, + self.clean_test_weight, + ) + + self._anomalous_data = self.fuzz(_anomalous_base) + self._anomalous_test_data = self.fuzz(_anomalous_test_base) + + @abstractmethod + def fuzz(self, data: DatasetConfig) -> DatasetConfig: + pass + + @abstractmethod + def _get_base_data(self) -> DatasetConfig: + pass + + def _get_trusted_data(self) -> DatasetConfig: + return self._trusted_data + + def _get_clean_untrusted_data(self) -> DatasetConfig: + return self._clean_untrusted_data + + def _get_anomalous_data(self) -> DatasetConfig: + return self._anomalous_data + + def _get_clean_test_data(self) -> DatasetConfig: + return self._clean_test_data + + def _get_anomalous_test_data(self) -> DatasetConfig: + return self._anomalous_test_data + + +@dataclass(kw_only=True) class CustomTask(TaskConfig): """A fully customizable task config, where all datasets are specified directly.""" - clean_test_data: DatasetConfig - anomalous_test_data: DatasetConfig + trusted_data: DatasetConfig + clean_untrusted_data: DatasetConfig + anomalous_data: DatasetConfig model: ModelConfig - clean_train_data: Optional[DatasetConfig] = None - anomalous_train_data: Optional[DatasetConfig] = None - def __post_init__(self): - super(CustomTask, self).__post_init__() - self.allow_trusted = self.clean_train_data is not None - self.allow_untrusted = self.anomalous_train_data is not None - - def _get_clean_data(self, train: bool) -> DatasetConfig: - # This is a bit of a hack because it might return `None`, but that only - # becomes important if illegal training data is used. - return self.clean_train_data if train else self.clean_test_data - - def _get_anomalous_data(self, train: bool) -> DatasetConfig: - # This is a bit of a hack because it might return `None`, but that only - # becomes important if illegal training data is used. - return self.anomalous_train_data if train else self.anomalous_test_data + def _get_clean_untrusted_data(self) -> DatasetConfig: + return self.clean_untrusted_data + + def _get_trusted_data(self) -> DatasetConfig: + return self.trusted_data + + def _get_anomalous_data(self) -> DatasetConfig: + return self.anomalous_data def _get_model(self) -> ModelConfig: return self.model diff --git a/src/cupbearer/tasks/backdoor_detection.py b/src/cupbearer/tasks/backdoor_detection.py index 0291e847..cec9fdcc 100644 --- a/src/cupbearer/tasks/backdoor_detection.py +++ b/src/cupbearer/tasks/backdoor_detection.py @@ -1,39 +1,36 @@ from dataclasses import dataclass from pathlib import Path -from cupbearer.data import Backdoor, DatasetConfig +from cupbearer.data import DatasetConfig from cupbearer.data.backdoor_data import BackdoorData from cupbearer.models import ModelConfig, StoredModel from cupbearer.utils.scripts import load_config -from ._config import DebugTaskConfig, TaskConfig +from ._config import DebugTaskConfig, FuzzedTask @dataclass(kw_only=True) -class BackdoorDetection(TaskConfig): +class BackdoorDetection(FuzzedTask): path: Path - backdoor: Backdoor no_load: bool = False def __post_init__(self): - super().__post_init__() - self.backdoored_train_data = load_config(self.path, "train_data", BackdoorData) - - def _get_clean_data(self, train: bool) -> DatasetConfig: - if train: - return self.backdoored_train_data.original - else: - return self.backdoored_train_data.original.get_test_split() + backdoor_data = load_config(self.path, "train_data", BackdoorData) + self._original = backdoor_data.original + self._backdoor = backdoor_data.backdoor + self._backdoor.p_backdoor = 1.0 - def _get_anomalous_data(self, train: bool) -> DatasetConfig: if not self.no_load: - self.backdoor.load(self.path) + self._backdoor.load(self.path) + + # Call this only now that _original and _backdoor are set. + super().__post_init__() + + def _get_base_data(self) -> DatasetConfig: + return self._original - # TODO: should we get rid of `self.backdoor` and just use the existing one - # from the training run? - return BackdoorData( - original=self._get_clean_data(train), backdoor=self.backdoor - ) + def fuzz(self, data: DatasetConfig) -> DatasetConfig: + return BackdoorData(original=data, backdoor=self._backdoor) def _get_model(self) -> ModelConfig: return StoredModel(path=self.path) diff --git a/src/cupbearer/utils/__init__.py b/src/cupbearer/utils/__init__.py index 27d7d21c..5ca825cb 100644 --- a/src/cupbearer/utils/__init__.py +++ b/src/cupbearer/utils/__init__.py @@ -1,4 +1,4 @@ # ruff: noqa: F401 from .optimizers import OptimizerConfig from .train import DebugTrainConfig, TrainConfig -from .utils import load, save +from .utils import inputs_from_batch, load, save From 9073a8564a444041a11bc2701593af1214f2642a Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Thu, 29 Feb 2024 15:05:43 -0800 Subject: [PATCH 04/25] Mostly fix tests Adversarial examples are broken, I think they might be easier to fix after some bigger changes --- src/cupbearer/data/_shared.py | 1 + tests/test_pipeline.py | 23 ++++++++--------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/cupbearer/data/_shared.py b/src/cupbearer/data/_shared.py index d004f22c..911a77c6 100644 --- a/src/cupbearer/data/_shared.py +++ b/src/cupbearer/data/_shared.py @@ -86,6 +86,7 @@ def _build(self) -> Dataset: end = int(self.end_fraction * len(full)) return Subset(full, range(start, end)) + @property def num_classes(self) -> int: # type: ignore return self.full_dataset.num_classes diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 2efb3ced..4fdad0ea 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -56,9 +56,7 @@ def test_eval_classifier(backdoor_classifier_path): @pytest.mark.slow def test_train_abstraction_corner_backdoor(backdoor_classifier_path, tmp_path): cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection( - path=backdoor_classifier_path, backdoor=data.CornerPixelBackdoor() - ), + task=tasks.BackdoorDetection(path=backdoor_classifier_path), detector=detectors.AbstractionDetectorConfig(train=DebugTrainConfig()), path=tmp_path, ) @@ -75,9 +73,7 @@ def test_train_abstraction_corner_backdoor(backdoor_classifier_path, tmp_path): @pytest.mark.slow def test_train_autoencoder_corner_backdoor(backdoor_classifier_path, tmp_path): cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection( - path=backdoor_classifier_path, backdoor=data.CornerPixelBackdoor() - ), + task=tasks.BackdoorDetection(path=backdoor_classifier_path), detector=detectors.AbstractionDetectorConfig( train=DebugTrainConfig(), abstraction=detectors.abstraction.AutoencoderAbstractionConfig(), @@ -127,8 +123,9 @@ def test_train_mahalanobis_advex(backdoor_classifier_path, tmp_path): def test_train_statistical_backdoor(backdoor_classifier_path, tmp_path, detector_type): cfg = train_detector_conf.Config( task=tasks.backdoor_detection.DebugBackdoorDetection( + # Need some untrusted data for SpectralSignatureConfig path=backdoor_classifier_path, - backdoor=data.CornerPixelBackdoor(), + trusted_fraction=0.5, ), detector=detector_type(), path=tmp_path, @@ -146,9 +143,7 @@ def test_train_statistical_backdoor(backdoor_classifier_path, tmp_path, detector @pytest.mark.slow def test_finetuning_detector(backdoor_classifier_path, tmp_path): cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection( - path=backdoor_classifier_path, backdoor=data.CornerPixelBackdoor() - ), + task=tasks.BackdoorDetection(path=backdoor_classifier_path), detector=detectors.finetuning.FinetuningConfig(train=DebugTrainConfig()), path=tmp_path, ) @@ -195,16 +190,14 @@ def test_wanet(tmp_path): # Check that from_run can load WanetBackdoor properly train_detector_cfg = train_detector_conf.Config( - task=tasks.backdoor_detection.DebugBackdoorDetection( - path=tmp_path / "wanet", backdoor=data.WanetBackdoor() - ), + task=tasks.backdoor_detection.DebugBackdoorDetection(path=tmp_path / "wanet"), detector=detectors.DebugMahalanobisConfig(), path=tmp_path / "wanet-mahalanobis", ) train_detector(train_detector_cfg) assert isinstance(train_detector_cfg.task, tasks.BackdoorDetection) - assert isinstance(train_detector_cfg.task.backdoor, data.WanetBackdoor) + assert isinstance(train_detector_cfg.task._backdoor, data.WanetBackdoor) assert torch.allclose( - train_detector_cfg.task.backdoor.control_grid, + train_detector_cfg.task._backdoor.control_grid, cfg.train_data.backdoor.control_grid, ) From 54c34a682c6518a19958c3251d551d5edf74155f Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Thu, 29 Feb 2024 23:46:51 -0800 Subject: [PATCH 05/25] [WIP] Remove configs --- demo.ipynb | 505 ----------------- notebooks/simple_demo.ipynb | 514 ++++++++++++++++++ src/cupbearer/data/__init__.py | 17 +- src/cupbearer/data/_shared.py | 75 ++- src/cupbearer/data/adversarial.py | 131 ++--- src/cupbearer/data/backdoor_data.py | 35 -- src/cupbearer/data/backdoors.py | 12 +- src/cupbearer/data/pytorch.py | 51 +- src/cupbearer/data/toy_ambiguous_features.py | 15 - src/cupbearer/data/transforms.py | 8 +- src/cupbearer/detectors/__init__.py | 14 +- .../detectors/abstraction/__init__.py | 3 +- src/cupbearer/detectors/anomaly_detector.py | 42 +- src/cupbearer/detectors/config.py | 57 -- src/cupbearer/detectors/finetuning.py | 19 +- .../detectors/statistical/__init__.py | 67 +-- src/cupbearer/models/__init__.py | 100 +--- src/cupbearer/scripts/_shared.py | 22 +- .../scripts/conf/eval_classifier_conf.py | 13 +- .../scripts/conf/eval_detector_conf.py | 15 +- .../scripts/conf/train_classifier_conf.py | 23 +- .../scripts/conf/train_detector_conf.py | 12 +- src/cupbearer/scripts/eval_classifier.py | 20 +- src/cupbearer/scripts/eval_detector.py | 14 +- src/cupbearer/scripts/train_classifier.py | 20 +- src/cupbearer/scripts/train_detector.py | 39 +- src/cupbearer/tasks/__init__.py | 7 +- src/cupbearer/tasks/_config.py | 283 +++------- src/cupbearer/tasks/adversarial_examples.py | 79 ++- src/cupbearer/tasks/backdoor_detection.py | 79 ++- src/cupbearer/tasks/toy_features.py | 27 - src/cupbearer/utils/scripts.py | 55 +- 32 files changed, 927 insertions(+), 1446 deletions(-) delete mode 100644 demo.ipynb create mode 100644 notebooks/simple_demo.ipynb delete mode 100644 src/cupbearer/data/backdoor_data.py delete mode 100644 src/cupbearer/detectors/config.py delete mode 100644 src/cupbearer/tasks/toy_features.py diff --git a/demo.ipynb b/demo.ipynb deleted file mode 100644 index fa2a4e44..00000000 --- a/demo.ipynb +++ /dev/null @@ -1,505 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from pathlib import Path\n", - "\n", - "from cupbearer import data, detectors, models, scripts, tasks, utils\n", - "from tensorboard import notebook" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Training a backdoored classifier\n", - "First, we train a classifier on poisoned data:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "GPU available: True (mps), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "/Users/erik/.pyenv/versions/3.10.9/envs/cupbearer/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory logs/demo/classifier/checkpoints exists and is not empty.\n", - "\n", - " | Name | Type | Params\n", - "------------------------------------------------------\n", - "0 | model | MLP | 118 K \n", - "1 | train_accuracy | MulticlassAccuracy | 0 \n", - "2 | val_accuracy | ModuleList | 0 \n", - "3 | test_accuracy | ModuleList | 0 \n", - "------------------------------------------------------\n", - "118 K Trainable params\n", - "0 Non-trainable params\n", - "118 K Total params\n", - "0.473 Total estimated model params size (MB)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2478edb97e774ca097a31f195f31c032", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Sanity Checking: | | 0/? [00:00\n", - " \n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "notebook.display(port=6006, height=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also explicitly evaluate the trained model (right now this is pretty limited and doesn't support multiple datasets at once):" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-02-14 14:33:59.006\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.scripts.eval_classifier\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m18\u001b[0m - \u001b[34m\u001b[1mLoading transform: ToTensor()\u001b[0m\n", - "\u001b[32m2024-02-14 14:33:59.006\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.scripts.eval_classifier\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m18\u001b[0m - \u001b[34m\u001b[1mLoading transform: RandomCrop(p=0.8, padding=5, fill=0, padding_mode='constant')\u001b[0m\n", - "\u001b[32m2024-02-14 14:33:59.007\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.scripts.eval_classifier\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m18\u001b[0m - \u001b[34m\u001b[1mLoading transform: RandomRotation(p=0.5, degrees=10, interpolation=, expand=False, center=None, fill=0)\u001b[0m\n", - "GPU available: True (mps), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "531198feafe142feb9c826b7c9886331", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Testing: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", - "┃ Test metric DataLoader 0 ┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│ test/acc_epoch 0.9509999752044678 │\n", - "│ test/acc_step 0.9509999752044678 │\n", - "│ test/loss 0.15841300785541534 │\n", - "└───────────────────────────┴───────────────────────────┘\n", - "\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│\u001b[36m \u001b[0m\u001b[36m test/acc_epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9509999752044678 \u001b[0m\u001b[35m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36m test/acc_step \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9509999752044678 \u001b[0m\u001b[35m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36m test/loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.15841300785541534 \u001b[0m\u001b[35m \u001b[0m│\n", - "└───────────────────────────┴───────────────────────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "scripts.eval_classifier(\n", - " scripts.EvalClassifierConfig(\n", - " path=Path(\"logs/demo/classifier\"), data=data.MNIST(train=False)\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These results will also have been stored to `logs/demo/classifier/metrics.json` if we want to process them further (e.g. to compare many runs):" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'test/loss': 0.15841300785541534, 'test/acc_step': 0.9509999752044678, 'test/acc_epoch': 0.9509999752044678}]\n" - ] - } - ], - "source": [ - "with open(\"logs/demo/classifier/eval.json\") as f:\n", - " print(json.load(f))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Training a backdoor detector\n", - "We'll train a very simple detector using the Mahalanobis distance:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-02-14 14:39:19.332\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'train_data' from logs/demo/classifier\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:19.356\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'model' from logs/demo/classifier\u001b[0m\n", - "100%|██████████| 15/15 [00:06<00:00, 2.32it/s]\n", - "\u001b[32m2024-02-14 14:39:26.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36msave_weights\u001b[0m:\u001b[36m205\u001b[0m - \u001b[1mSaving detector to logs/demo/detector/detector\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:26.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'model' from logs/demo/classifier\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:26.375\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'detector' from logs/demo/detector\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:26.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36mload_weights\u001b[0m:\u001b[36m209\u001b[0m - \u001b[1mLoading detector from logs/demo/detector/detector\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:28.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m128\u001b[0m - \u001b[1mAUC_ROC: 1.0000\u001b[0m\n", - "\u001b[32m2024-02-14 14:39:28.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mAP: 1.0000\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "scripts.train_detector(\n", - " scripts.TrainDetectorConfig(\n", - " path=Path(\"logs/demo/detector\"),\n", - " task=tasks.BackdoorDetection(\n", - " # We pass in the path of the trained classifier, as well as what backdoor\n", - " # to use. The backdoor is the same one we used for training in this case,\n", - " # we could also have stored that.\n", - " path=Path(\"logs/demo/classifier\"),\n", - " backdoor=data.CornerPixelBackdoor(),\n", - " ),\n", - " detector=detectors.MahalanobisConfig(),\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As we can see, this was a trivial detection task. As an ablation, we can test whether the detector specifically flags backdoored inputs as anomalous, or just anything out of distribution:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-02-14 19:38:05.113\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'model' from logs/demo/classifier\u001b[0m\n", - "\u001b[32m2024-02-14 19:38:05.132\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcupbearer.utils.scripts\u001b[0m:\u001b[36mload_config\u001b[0m:\u001b[36m55\u001b[0m - \u001b[34m\u001b[1mLoading config 'detector' from logs/demo/detector\u001b[0m\n", - "\u001b[32m2024-02-14 19:38:05.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36mload_weights\u001b[0m:\u001b[36m209\u001b[0m - \u001b[1mLoading detector from logs/demo/detector/detector\u001b[0m\n", - "\u001b[32m2024-02-14 19:38:07.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m128\u001b[0m - \u001b[1mAUC_ROC: 0.9998\u001b[0m\n", - "\u001b[32m2024-02-14 19:38:07.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mAP: 0.9994\u001b[0m\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "scripts.eval_detector(\n", - " scripts.EvalDetectorConfig(\n", - " path=Path(\"logs/demo/detector\"),\n", - " task=tasks.CustomTask(\n", - " # TODO: this won't actually be used, plausibly Tasks should be split better\n", - " # into their training and test data.\n", - " train_data=data.MNIST(),\n", - " # Our anomalous data is the backdoor data from above, except we use the\n", - " # MNIST test split.\n", - " anomalous_data=data.BackdoorData(\n", - " original=data.MNIST(train=False),\n", - " backdoor=data.CornerPixelBackdoor(),\n", - " ),\n", - " # Our normal data is MNIST with added noise, this makes the images OOD\n", - " # but they shouldn't be mechanistically anomalous.\n", - " normal_test_data=data.MNIST(\n", - " train=False,\n", - " transforms={\n", - " \"to_tensor\": data.ToTensor(),\n", - " \"noise\": data.GaussianNoise(0.3),\n", - " },\n", - " ),\n", - " model=models.StoredModel(Path(\"logs/demo/classifier\")),\n", - " ),\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As we can see, adding noise did make the images quite a bit more \"anomalous\" according to our detector (the blue histogram has shifted to the right to higher anomaly scores). But we still have a very clear separation between these \"merely noisy\" inputs and the backdoored inputs. (This is a very easy to detect backdoor.)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "cupbearer", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/simple_demo.ipynb b/notebooks/simple_demo.ipynb new file mode 100644 index 00000000..3f6a2bc6 --- /dev/null +++ b/notebooks/simple_demo.ipynb @@ -0,0 +1,514 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "from cupbearer import data, detectors, models, scripts, tasks, utils" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def get_path(base=\"logs\", time=True):\n", + " if time:\n", + " timestamp = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n", + " else:\n", + " timestamp = datetime.now().strftime(\"%Y-%m-%d\")\n", + " return Path(base) / timestamp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training a backdoored classifier\n", + "First, we train a classifier on poisoned data:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = data.MNIST()\n", + "val_data = data.MNIST(train=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "model = models.MLP(input_shape=(28, 28), hidden_dims=[128, 128], output_dim=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "------------------------------------------------------\n", + "0 | model | MLP | 118 K \n", + "1 | train_accuracy | MulticlassAccuracy | 0 \n", + "2 | val_accuracy | ModuleList | 0 \n", + "3 | test_accuracy | ModuleList | 0 \n", + "------------------------------------------------------\n", + "118 K Trainable params\n", + "0 Non-trainable params\n", + "118 K Total params\n", + "0.473 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "48219c8359284728a9ec6a2144927c0a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test/acc_epoch 0.9520999789237976 │\n", + "│ test/acc_step 0.9520999789237976 │\n", + "│ test/loss 0.15424881875514984 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/acc_epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9520999789237976 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/acc_step \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9520999789237976 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.15424881875514984 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scripts.eval_classifier(\n", + " scripts.EvalClassifierConfig(path=classifier_path, data=val_data, model=model)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These results will also have been stored to `logs/demo/classifier/eval.json` if we want to process them further (e.g. to compare many runs):" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'test/loss': 0.15424881875514984, 'test/acc_step': 0.9520999789237976, 'test/acc_epoch': 0.9520999789237976}]\n" + ] + } + ], + "source": [ + "with open(classifier_path / \"eval.json\") as f:\n", + " print(json.load(f))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training a backdoor detector\n", + "We'll train a very simple detector using the Mahalanobis distance. Our model is still in memory, but just for demonstration let's load it again:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "models.load(model, classifier_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 15/15 [00:06<00:00, 2.30it/s]\n", + "\u001b[32m2024-02-29 22:14:34.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36msave_weights\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mSaving detector to logs/demo/detector/2024-02-29_22-14-27/detector\u001b[0m\n", + "\u001b[32m2024-02-29 22:14:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mAUC_ROC: 1.0000\u001b[0m\n", + "\u001b[32m2024-02-29 22:14:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAP: 1.0000\u001b[0m\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scripts.train_detector(\n", + " scripts.TrainDetectorConfig(\n", + " path=(detector_path := get_path(\"logs/demo/detector\")),\n", + " task=tasks.backdoor_detection(\n", + " model, train_data, val_data, data.CornerPixelBackdoor()\n", + " ),\n", + " detector=detectors.MahalanobisDetector(save_path=detector_path),\n", + " train=detectors.MahalanobisTrainConfig(),\n", + " num_classes=10,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, this was a trivial detection task. As an ablation, we can test whether the detector specifically flags backdoored inputs as anomalous, or just anything out of distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-02-29 22:14:35.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36mload_weights\u001b[0m:\u001b[36m232\u001b[0m - \u001b[1mLoading detector from logs/demo/detector/2024-02-29_22-14-27/detector\u001b[0m\n" + ] + } + ], + "source": [ + "detector = detectors.MahalanobisDetector(save_path=detector_path / \"ood_eval\")\n", + "# TODO: The fact that weights are saved in \"detector\" is just a convention used by\n", + "# the train_detector script, this is kind of weird.\n", + "detector.load_weights(detector_path / \"detector\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-02-29 22:14:36.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mAUC_ROC: 0.9934\u001b[0m\n", + "\u001b[32m2024-02-29 22:14:36.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAP: 0.9779\u001b[0m\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scripts.eval_detector(\n", + " scripts.EvalDetectorConfig(\n", + " detector=detector,\n", + " task=tasks.Task.from_separate_data(\n", + " model=model,\n", + " # TODO: this won't actually be used, plausibly Tasks should be split better\n", + " # into their training and test data.\n", + " trusted_data=train_data,\n", + " # Our anomalous data is the backdoor data from above, except we use the\n", + " # MNIST test split.\n", + " anomalous_test_data=data.BackdoorDataset(\n", + " original=val_data,\n", + " backdoor=data.CornerPixelBackdoor(),\n", + " ),\n", + " # Our normal data is MNIST with added noise, this makes the images OOD\n", + " # but they shouldn't be mechanistically anomalous.\n", + " clean_test_data=data.TransformDataset(val_data, data.GaussianNoise(0.3)),\n", + " ),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, adding noise did make the images quite a bit more \"anomalous\" according to our detector (the blue histogram has shifted to the right to higher anomaly scores). But we still have a very clear separation between these \"merely noisy\" inputs and the backdoored inputs. (This is a very easy to detect backdoor.)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cupbearer", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/cupbearer/data/__init__.py b/src/cupbearer/data/__init__.py index 479558ef..706d0ae8 100644 --- a/src/cupbearer/data/__init__.py +++ b/src/cupbearer/data/__init__.py @@ -4,14 +4,19 @@ MixedData, MixedDataConfig, SubsetConfig, - TrainDataFromRun, + TransformDataset, split_dataset_cfg, ) -from .adversarial import AdversarialExampleConfig -from .backdoor_data import BackdoorData -from .backdoors import Backdoor, CornerPixelBackdoor, NoiseBackdoor, WanetBackdoor -from .pytorch import CIFAR10, GTSRB, MNIST, PytorchConfig -from .toy_ambiguous_features import ToyFeaturesConfig +from .adversarial import AdversarialExampleDataset, make_adversarial_examples +from .backdoors import ( + Backdoor, + BackdoorDataset, + CornerPixelBackdoor, + NoiseBackdoor, + WanetBackdoor, +) +from .pytorch import CIFAR10, GTSRB, MNIST, PytorchDataset +from .toy_ambiguous_features import ToyDataset from .transforms import ( GaussianNoise, RandomCrop, diff --git a/src/cupbearer/data/_shared.py b/src/cupbearer/data/_shared.py index 911a77c6..fedbdce4 100644 --- a/src/cupbearer/data/_shared.py +++ b/src/cupbearer/data/_shared.py @@ -1,13 +1,11 @@ from abc import ABC, abstractproperty from dataclasses import dataclass, field -from pathlib import Path from typing import Optional from torch.utils.data import Dataset, Subset from torchvision.transforms import Compose from cupbearer.data.transforms import Transform -from cupbearer.utils.scripts import load_config from cupbearer.utils.utils import BaseConfig @@ -101,6 +99,27 @@ def get_test_split(self) -> "DatasetConfig": # to the full dataset on build. +# def split_dataset(dataset: Dataset, *fractions: float) -> list[Subset]: +# if not fractions: +# raise ValueError("At least one fraction must be provided.") +# if not all(0 <= f <= 1 for f in fractions): +# raise ValueError("Fractions must be between 0 and 1.") +# if not sum(fractions) == 1: +# fractions = fractions + (1 - sum(fractions),) + +# total = len(dataset) + +# markers = [int(total * fraction) for fraction in fractions] + +# subsets = [] +# current_start = 0 +# for marker in markers: +# subsets.append(Subset(dataset, range(current_start, current_start + marker))) +# current_start += marker +# assert current_start == total +# return subsets + + def split_dataset_cfg(cfg: DatasetConfig, *fractions: float) -> list[SubsetConfig]: if not fractions: raise ValueError("At least one fraction must be provided.") @@ -133,57 +152,29 @@ def __getitem__(self, index): return self.transform(sample) -@dataclass -class TrainDataFromRun(DatasetConfig): - path: Path - - def get_test_split(self) -> DatasetConfig: - return self.cfg.get_test_split() - - def __post_init__(self): - self._cfg = None - - @property - def cfg(self): - if self._cfg is None: - # It's important we cache this, not mainly for performance reasons, - # but because otherwise we'd get different instances every time. - # Mostly that would be fine, but e.g. the Wanet backdoor transform - # actually has state not captured by its fields - # (it's not a "real" dataclass) - self._cfg = load_config(self.path, "train_data", DatasetConfig) - - return self._cfg - - @property - def num_classes(self): - return self.cfg.num_classes - - def _build(self) -> Dataset: - return self.cfg._build() - - def get_transforms(self) -> list[Transform]: - transforms = self.cfg.get_transforms() + super().get_transforms() - return transforms - - class MixedData(Dataset): def __init__( self, normal: Dataset, anomalous: Dataset, - normal_weight: float = 0.5, + normal_weight: Optional[float] = 0.5, return_anomaly_labels: bool = True, ): self.normal_data = normal self.anomalous_data = anomalous self.normal_weight = normal_weight self.return_anomaly_labels = return_anomaly_labels - self._length = min( - int(len(normal) / normal_weight), int(len(anomalous) / (1 - normal_weight)) - ) - self.normal_len = int(self._length * normal_weight) - self.anomalous_len = self._length - self.normal_len + if normal_weight is None: + self.normal_len = len(normal) + self.anomalous_len = len(anomalous) + self._length = self.normal_len + self.anomalous_len + else: + self._length = min( + int(len(normal) / normal_weight), + int(len(anomalous) / (1 - normal_weight)), + ) + self.normal_len = int(self._length * normal_weight) + self.anomalous_len = self._length - self.normal_len def __len__(self): return self._length diff --git a/src/cupbearer/data/adversarial.py b/src/cupbearer/data/adversarial.py index efad5001..7847c1c4 100644 --- a/src/cupbearer/data/adversarial.py +++ b/src/cupbearer/data/adversarial.py @@ -1,5 +1,4 @@ import os -from dataclasses import dataclass from pathlib import Path from typing import Optional @@ -9,41 +8,62 @@ from matplotlib import pyplot as plt from torch.utils.data import DataLoader, Dataset, Subset -from cupbearer.models import StoredModel from cupbearer.utils import utils -from . import DatasetConfig, TrainDataFromRun +class AdversarialExampleDataset(Dataset): + def __init__(self, advexes: torch.Tensor, labels: torch.Tensor): + self.advexes = advexes + self.labels = labels + + @classmethod + def from_file(cls, filepath: Path, num_examples=None): + data = utils.load(filepath) + assert isinstance(data, dict) + advexes = data["adv_inputs"] + labels = data["labels"] + + if num_examples is None: + num_examples = len(advexes) + if len(advexes) < num_examples: + raise ValueError( + f"Only {len(advexes)} adversarial examples exist, " + f"but {num_examples} were requested" + ) + + return cls(advexes[:num_examples], labels[:num_examples]) + + def __len__(self): + return len(self.advexes) -def make_adversarial_example( - path: Path, - filename: str, + def __getitem__(self, idx): + # Labels are the original ones. We need to return them mainly for implementation + # reasons: for eval, normal and anomalous data will be batched together, so + # since the normal data includes labels, the anomalous one needs to as well. + # TODO: Probably detectors should just never have access to labels during evals + # (none of the current ones make use of them anyway). If a detector needs them, + # it should use the model-generated labels, not ground truth ones. + return self.advexes[idx], int(self.labels[idx]) + + +def make_adversarial_examples( + model: torch.nn.Module, + dataset: Dataset, + save_path: Path | str, batch_size: int = 128, eps: float = 8 / 255, max_examples: Optional[int] = None, success_threshold: float = 0.1, steps: int = 40, - use_test_data: bool = False, -): - save_path = path / f"{filename}.pt" +) -> AdversarialExampleDataset: + save_path = Path(save_path).with_suffix(".pt") if os.path.exists(save_path): logger.info("Adversarial examples already exist, skipping attack") - return - else: - logger.info( - "Adversarial examples not found, running attack with default settings" - ) - - model_cfg = StoredModel(path=path) - data_cfg = TrainDataFromRun(path=path) - if use_test_data: - data_cfg = data_cfg.get_test_split() + return AdversarialExampleDataset.from_file(save_path, num_examples=max_examples) - dataset = data_cfg.build() if max_examples: dataset = Subset(dataset, range(max_examples)) - image, _ = dataset[0] - model = model_cfg.build_model(input_shape=image.shape) + dataloader = DataLoader( dataset, batch_size=batch_size, @@ -57,6 +77,8 @@ def make_adversarial_example( # N.B. rob_acc is in percent while success_threshold is not if rob_acc > 100 * success_threshold: + # Make sure we delete the unsuccessful data so we don't load it later + save_path.unlink() raise RuntimeError( "Attack failed, new accuracy is" f" {rob_acc}% > {100 * success_threshold}%." @@ -74,67 +96,6 @@ def make_adversarial_example( except IndexError: pass plt.tight_layout() - plt.savefig(path / "adv_examples.pdf") - - -@dataclass -class AdversarialExampleConfig(DatasetConfig): - path: Path - attack_batch_size: int = 128 - success_threshold: float = 0.1 - steps: int = 40 - eps: float = 8 / 255 - use_test_data: bool = False - - def _build(self) -> Dataset: - filename = f"adv_examples_{'test' if self.use_test_data else 'train'}" - make_adversarial_example( - path=self.path, - filename=filename, - batch_size=self.attack_batch_size, - eps=self.eps, - max_examples=self.max_size, - success_threshold=self.success_threshold, - steps=self.steps, - use_test_data=self.use_test_data, - ) - - return AdversarialExampleDataset( - filepath=self.path / filename, num_examples=self.max_size - ) - - @property - def num_classes(self): - data_cfg = TrainDataFromRun(path=self.path) - return data_cfg.num_classes - - -class AdversarialExampleDataset(Dataset): - def __init__(self, filepath: Path, num_examples=None): - data = utils.load(filepath) - assert isinstance(data, dict) - self.examples = data["adv_inputs"] - self.labels = data["labels"] + plt.savefig(save_path.with_suffix(".pdf")) - if num_examples is None: - num_examples = len(self.examples) - self.num_examples = num_examples - if len(self.examples) < num_examples: - raise ValueError( - f"Only {len(self.examples)} adversarial examples exist, " - f"but {num_examples} were requested" - ) - - def __len__(self): - return self.num_examples - - def __getitem__(self, idx): - if idx >= self.num_examples: - raise IndexError(f"Index {idx} is out of range") - # Labels are the original ones. We need to return them mainly for implementation - # reasons: for eval, normal and anomalous data will be batched together, so - # since the normal data includes labels, the anomalous one needs to as well. - # TODO: Probably detectors should just never have access to labels during evals - # (none of the current ones make use of them anyway). If a detector needs them, - # it should use the model-generated labels, not ground truth ones. - return self.examples[idx], int(self.labels[idx]) + return AdversarialExampleDataset.from_file(save_path) diff --git a/src/cupbearer/data/backdoor_data.py b/src/cupbearer/data/backdoor_data.py deleted file mode 100644 index 44e79c4b..00000000 --- a/src/cupbearer/data/backdoor_data.py +++ /dev/null @@ -1,35 +0,0 @@ -# This needs to be in a separate file from backdoors.py because of circularity issues -# with the config groups. See __init__.py. -from dataclasses import dataclass - -from cupbearer.data import DatasetConfig -from cupbearer.data.backdoors import Backdoor -from cupbearer.data.transforms import Transform - - -@dataclass -class BackdoorData(DatasetConfig): - original: DatasetConfig - backdoor: Backdoor - - def get_test_split(self) -> DatasetConfig: - return BackdoorData( - original=self.original.get_test_split(), backdoor=self.backdoor - ) - - @property - def num_classes(self): - return self.original.num_classes - - def get_transforms(self) -> list[Transform]: - # We can't set this in __post_init__, since then the backdoor would be part of - # transforms in the config that's stored to disk. If we then load this config, - # another backdoor would be added to the transforms. - transforms = [] - transforms += self.original.get_transforms() - transforms += super().get_transforms() - transforms += [self.backdoor] - return transforms - - def _build(self): - return self.original._build() diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index 417e3488..04bb161a 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -6,8 +6,9 @@ import torch import torch.nn.functional as F from loguru import logger +from torch.utils.data import Dataset -from ._shared import Transform +from ._shared import Transform, TransformDataset @dataclass @@ -34,6 +35,15 @@ def __call__(self, sample: Tuple[torch.Tensor, int]) -> Tuple[torch.Tensor, int] return self.inject_backdoor(img), self.target_class +class BackdoorDataset(TransformDataset): + """Just a wrapper around TransformDataset with aliases and more specific types.""" + + def __init__(self, original: Dataset, backdoor: Backdoor): + super().__init__(dataset=original, transform=backdoor) + self.original = original + self.backdoor = backdoor + + @dataclass class CornerPixelBackdoor(Backdoor): """Adds a white/red pixel to the specified corner of the image and sets the target. diff --git a/src/cupbearer/data/pytorch.py b/src/cupbearer/data/pytorch.py index 804ce80e..43366d17 100644 --- a/src/cupbearer/data/pytorch.py +++ b/src/cupbearer/data/pytorch.py @@ -1,11 +1,9 @@ -import dataclasses from dataclasses import dataclass from torch.utils.data import Dataset from cupbearer.utils.utils import get_object, mutable_field -from . import DatasetConfig from .transforms import ( RandomCrop, RandomHorizontalFlip, @@ -17,29 +15,28 @@ @dataclass(kw_only=True) -class PytorchConfig(DatasetConfig): +class PytorchDataset(Dataset): name: str - # This is an abstractproperty on the parent class, but it's a bit more - # convenient to just make it a field here. - num_classes: int train: bool = True - transforms: dict[str, Transform] = mutable_field({"to_tensor": ToTensor()}) + transforms: list[Transform] = mutable_field([ToTensor()]) default_augmentations: bool = True - def get_test_split(self) -> DatasetConfig: - if self.train: - # TODO: this will keep the augmentations around, - # which we probably don't want? - return dataclasses.replace(self, train=False) - else: - raise ValueError("This dataset is already a test split.") - def __post_init__(self): - super().__post_init__() if self.default_augmentations and self.train: # Defaults from WaNet https://openreview.net/pdf?id=eEn8KTtJOx - self.transforms["random_crop"] = RandomCrop(p=0.8, padding=5) - self.transforms["random_rotation"] = RandomRotation(p=0.5, degrees=10) + self.transforms.append(RandomCrop(p=0.8, padding=5)) + self.transforms.append(RandomRotation(p=0.5, degrees=10)) + + self._dataset = self._build() + + def __len__(self): + return len(self._dataset) + + def __getitem__(self, index): + sample = self._dataset[index] + for transform in self.transforms: + sample = transform(sample) + return sample @property def _dataset_kws(self): @@ -57,31 +54,31 @@ def _build(self) -> Dataset: @dataclass -class MNIST(PytorchConfig): +class MNIST(PytorchDataset): name: str = "torchvision.datasets.MNIST" num_classes: int = 10 @dataclass -class CIFAR10(PytorchConfig): +class CIFAR10(PytorchDataset): name: str = "torchvision.datasets.CIFAR10" num_classes: int = 10 def __post_init__(self): super().__post_init__() if self.default_augmentations and self.train: - self.transforms["random_horizontal_flip"] = RandomHorizontalFlip(p=0.5) + self.transforms.append(RandomHorizontalFlip(p=0.5)) @dataclass -class GTSRB(PytorchConfig): +class GTSRB(PytorchDataset): name: str = "torchvision.datasets.GTSRB" num_classes: int = 43 - transforms: dict[str, Transform] = mutable_field( - { - "resize": Resize(size=(32, 32)), - "to_tensor": ToTensor(), - } + transforms: list[Transform] = mutable_field( + [ + Resize(size=(32, 32)), + ToTensor(), + ] ) @property diff --git a/src/cupbearer/data/toy_ambiguous_features.py b/src/cupbearer/data/toy_ambiguous_features.py index dbbd176d..bd3c334c 100644 --- a/src/cupbearer/data/toy_ambiguous_features.py +++ b/src/cupbearer/data/toy_ambiguous_features.py @@ -1,21 +1,6 @@ -from dataclasses import dataclass - import numpy as np from torch.utils.data import Dataset -from ._shared import DatasetConfig - - -@dataclass -class ToyFeaturesConfig(DatasetConfig): - correlated: bool = True - size: int = 1000 - noise: float = 0.1 - num_classes: int = 2 - - def _build(self): - return ToyDataset(self.size, self.correlated, self.noise) - class ToyDataset(Dataset): def __init__(self, size: int, correlated: bool, noise: float): diff --git a/src/cupbearer/data/transforms.py b/src/cupbearer/data/transforms.py index 92a21144..6b15cde3 100644 --- a/src/cupbearer/data/transforms.py +++ b/src/cupbearer/data/transforms.py @@ -5,11 +5,8 @@ import torch import torchvision.transforms.functional as F -from cupbearer.utils.utils import BaseConfig - -@dataclass -class Transform(BaseConfig, ABC): +class Transform(ABC): @abstractmethod def __call__(self, sample): pass @@ -23,7 +20,6 @@ def load(self, basepath): pass -@dataclass class AdaptedTransform(Transform, ABC): """Adapt a transform designed to work on inputs to work on img, label pairs.""" @@ -51,8 +47,6 @@ def __call__(self, sample): return (img, *rest) -# Needs to be a dataclass to make simple_parsing's serialization work correctly. -@dataclass class ToTensor(AdaptedTransform): def __img_call__(self, img): out = F.to_tensor(img) diff --git a/src/cupbearer/detectors/__init__.py b/src/cupbearer/detectors/__init__.py index 04ea74ab..2da3d794 100644 --- a/src/cupbearer/detectors/__init__.py +++ b/src/cupbearer/detectors/__init__.py @@ -1,13 +1,11 @@ # ruff: noqa: F401 from .abstraction import AbstractionDetectorConfig from .anomaly_detector import AnomalyDetector -from .config import DetectorConfig, StoredDetector -from .finetuning import FinetuningConfig +from .finetuning import FinetuningAnomalyDetector from .statistical import ( - DebugMahalanobisConfig, - DebugQuantumEntropyConfig, - DebugSpectralSignatureConfig, - MahalanobisConfig, - QuantumEntropyConfig, - SpectralSignatureConfig, + ActivationCovarianceTrainConfig, + MahalanobisDetector, + MahalanobisTrainConfig, + QuantumEntropyDetector, + SpectralSignatureDetector, ) diff --git a/src/cupbearer/detectors/abstraction/__init__.py b/src/cupbearer/detectors/abstraction/__init__.py index 0c8f9075..ba48e172 100644 --- a/src/cupbearer/detectors/abstraction/__init__.py +++ b/src/cupbearer/detectors/abstraction/__init__.py @@ -5,7 +5,6 @@ from cupbearer.utils.train import TrainConfig from cupbearer.utils.utils import BaseConfig -from ..config import DetectorConfig from .abstraction import ( Abstraction, AutoencoderAbstraction, @@ -45,7 +44,7 @@ def build(self, model: HookedModel) -> AutoencoderAbstraction: @dataclass -class AbstractionDetectorConfig(DetectorConfig): +class AbstractionDetectorConfig: abstraction: AbstractionConfig = field( default_factory=LocallyConsistentAbstractionConfig ) diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index 4fbb8724..75c5569e 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -21,11 +21,9 @@ class AnomalyDetector(ABC): def __init__( self, - model: HookedModel, max_batch_size: int = 4096, save_path: Optional[Path | str] = None, ): - self.model = model # For storing the original detector variables when finetuning self._original_variables = None self.max_batch_size = max_batch_size @@ -33,6 +31,16 @@ def __init__( self.trained = False + def set_model(self, model: HookedModel): + # This is separate from __init__ because we want to be able to set the model + # automatically based on the task, instead of letting the user pass it in. + # On the other hand, it's separate from train() because we might need to set + # the model even when just using the detector for inference. + # + # Subclasses can implement more complex logic here. + self.model = model + self.trained = False + @abstractmethod def train( self, @@ -42,7 +50,7 @@ def train( num_classes: int, train_config: utils.BaseConfig, ): - """Train the anomaly detector with the given datasets. + """Train the anomaly detector with the given datasets on the given model. At least one of trusted_data or untrusted_data must be provided. """ @@ -153,6 +161,8 @@ def eval( if not self.save_path: return + self.save_path.mkdir(parents=True, exist_ok=True) + # Everything from here is just saving metrics and creating figures # (which we skip if they aren't going to be saved anyway). with open(self.save_path / "eval.json", "w") as f: @@ -223,25 +233,35 @@ def load_weights(self, path: str | Path): self._set_trained_variables(utils.load(path)) +def default_activation_name_func(model): + return model.default_names + + class ActivationBasedDetector(AnomalyDetector): """AnomalyDetector using activations.""" def __init__( self, - model: HookedModel, - activation_name_func: Callable[[HookedModel], Collection[str]] | None = None, + activation_name_func: str + | Callable[[HookedModel], Collection[str]] + | None = None, max_batch_size: int = 4096, save_path: Path | str | None = None, ): - super().__init__( - model=model, max_batch_size=max_batch_size, save_path=save_path - ) + super().__init__(max_batch_size=max_batch_size, save_path=save_path) + if activation_name_func is None: + activation_name_func = default_activation_name_func + elif isinstance(activation_name_func, str): + activation_name_func = utils.get_object(activation_name_func) + + assert callable(activation_name_func) # make type checker happy - def activation_name_func(model): - return model.default_names + self.activation_name_func = activation_name_func - self.activation_names = activation_name_func(model) + def set_model(self, model: HookedModel): + super().set_model(model) + self.activation_names = self.activation_name_func(model) def get_activations(self, batch): inputs = utils.inputs_from_batch(batch) diff --git a/src/cupbearer/detectors/config.py b/src/cupbearer/detectors/config.py deleted file mode 100644 index c04f163c..00000000 --- a/src/cupbearer/detectors/config.py +++ /dev/null @@ -1,57 +0,0 @@ -from abc import ABC, abstractmethod -from collections.abc import Collection -from dataclasses import dataclass, field -from pathlib import Path -from typing import Callable, Optional - -from loguru import logger - -from cupbearer.detectors.anomaly_detector import AnomalyDetector -from cupbearer.models.models import HookedModel -from cupbearer.utils.scripts import load_config -from cupbearer.utils.train import TrainConfig -from cupbearer.utils.utils import BaseConfig, get_object - - -@dataclass(kw_only=True) -class DetectorConfig(BaseConfig, ABC): - train: TrainConfig = field(default_factory=TrainConfig) - - @abstractmethod - def build(self, model: HookedModel, save_dir: Path | None) -> AnomalyDetector: - pass - - -# TODO: this feels like unnecessary indirection, can maybe integrate this elsewhere -@dataclass(kw_only=True) -class ActivationBasedDetectorConfig(DetectorConfig): - name_func: Optional[str] = None - - def resolve_name_func(self) -> Callable[[HookedModel], Collection[str]] | None: - if isinstance(self.name_func, str): - return get_object(self.name_func) - return self.name_func - - -@dataclass(kw_only=True) -class StoredDetector(DetectorConfig): - path: Path - - def build(self, model, save_dir) -> AnomalyDetector: - detector_cfg = load_config(self.path, "detector", DetectorConfig) - if isinstance(detector_cfg, StoredDetector) and detector_cfg.path == self.path: - raise RuntimeError( - f"It looks like the detector you're trying to load from {self.path} " - "is a stored detector pointing to itself. This probably means " - "a configuration file is broken." - ) - detector = detector_cfg.build(model, save_dir) - try: - detector.load_weights(self.path / "detector") - except FileNotFoundError: - logger.warning( - f"Didn't find weights for detector from {self.path}. " - "This is normal if the detector doesn't have learned parameters." - ) - - return detector diff --git a/src/cupbearer/detectors/finetuning.py b/src/cupbearer/detectors/finetuning.py index 24ff4bbd..d0f2e014 100644 --- a/src/cupbearer/detectors/finetuning.py +++ b/src/cupbearer/detectors/finetuning.py @@ -1,20 +1,21 @@ import copy import warnings -from dataclasses import dataclass import torch import torch.nn.functional as F from cupbearer.detectors.anomaly_detector import AnomalyDetector -from cupbearer.detectors.config import DetectorConfig from cupbearer.scripts._shared import Classifier from cupbearer.utils import utils from cupbearer.utils.train import TrainConfig class FinetuningAnomalyDetector(AnomalyDetector): - def __init__(self, model, max_batch_size, save_path): - super().__init__(model, max_batch_size, save_path) + def __init__(self, max_batch_size, save_path): + super().__init__(max_batch_size, save_path) + + def set_model(self, model): + super().set_model(model) # We might as well make a copy here already, since whether we'll train this # detector or load weights for inference, we'll need to copy in both cases. self.finetuned_model = copy.deepcopy(self.model) @@ -92,13 +93,3 @@ def _get_trained_variables(self, saving: bool = False): def _set_trained_variables(self, variables): self.finetuned_model.load_state_dict(variables) - - -@dataclass -class FinetuningConfig(DetectorConfig): - def build(self, model, save_dir) -> FinetuningAnomalyDetector: - return FinetuningAnomalyDetector( - model=model, - max_batch_size=self.train.max_batch_size, - save_path=save_dir, - ) diff --git a/src/cupbearer/detectors/statistical/__init__.py b/src/cupbearer/detectors/statistical/__init__.py index 736c2b11..76bc19a2 100644 --- a/src/cupbearer/detectors/statistical/__init__.py +++ b/src/cupbearer/detectors/statistical/__init__.py @@ -1,7 +1,4 @@ -from dataclasses import dataclass, field - -from cupbearer.detectors.config import ActivationBasedDetectorConfig - +# flake8: noqa from .mahalanobis_detector import MahalanobisDetector from .que_detector import QuantumEntropyDetector from .spectral_detector import SpectralSignatureDetector @@ -11,65 +8,3 @@ DebugMahalanobisTrainConfig, MahalanobisTrainConfig, ) - - -@dataclass -class MahalanobisConfig(ActivationBasedDetectorConfig): - train: MahalanobisTrainConfig = field(default_factory=MahalanobisTrainConfig) - - def build(self, model, save_dir) -> MahalanobisDetector: - return MahalanobisDetector( - model=model, - activation_name_func=self.resolve_name_func(), - max_batch_size=self.train.max_batch_size, - save_path=save_dir, - ) - - -@dataclass -class DebugMahalanobisConfig(MahalanobisConfig): - train: MahalanobisTrainConfig = field(default_factory=DebugMahalanobisTrainConfig) - - -@dataclass -class SpectralSignatureConfig(ActivationBasedDetectorConfig): - train: ActivationCovarianceTrainConfig = field( - default_factory=ActivationCovarianceTrainConfig - ) - - def build(self, model, save_dir) -> SpectralSignatureDetector: - return SpectralSignatureDetector( - model=model, - activation_name_func=self.resolve_name_func(), - max_batch_size=self.train.max_batch_size, - save_path=save_dir, - ) - - -@dataclass -class DebugSpectralSignatureConfig(SpectralSignatureConfig): - train: ActivationCovarianceTrainConfig = field( - default_factory=DebugActivationCovarianceTrainConfig - ) - - -@dataclass -class QuantumEntropyConfig(ActivationBasedDetectorConfig): - train: ActivationCovarianceTrainConfig = field( - default_factory=ActivationCovarianceTrainConfig - ) - - def build(self, model, save_dir) -> QuantumEntropyDetector: - return QuantumEntropyDetector( - model=model, - activation_name_func=self.resolve_name_func(), - max_batch_size=self.train.max_batch_size, - save_path=save_dir, - ) - - -@dataclass -class DebugQuantumEntropyConfig(QuantumEntropyConfig): - train: ActivationCovarianceTrainConfig = field( - default_factory=DebugActivationCovarianceTrainConfig - ) diff --git a/src/cupbearer/models/__init__.py b/src/cupbearer/models/__init__.py index 1c847846..f1585f0e 100644 --- a/src/cupbearer/models/__init__.py +++ b/src/cupbearer/models/__init__.py @@ -1,91 +1,19 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass +# ruff: noqa: F401 from pathlib import Path import torch -from cupbearer.utils.scripts import load_config -from cupbearer.utils.utils import BaseConfig, mutable_field - from .hooked_model import HookedModel -from .models import CNN, MLP, PreActBlock, PreActResNet - - -@dataclass(kw_only=True) -class ModelConfig(BaseConfig, ABC): - @abstractmethod - def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: - pass - - -@dataclass -class StoredModel(ModelConfig): - path: Path - - def build_model(self, input_shape) -> HookedModel: - model_cfg = load_config(self.path, "model", ModelConfig) - model = model_cfg.build_model(input_shape) - - # Our convention is that LightningModules store the actual pytorch model - # as a `model` attribute. We use the last checkpoint (generated via the - # save_last=True option to the ModelCheckpoint callback). - state_dict = torch.load(self.path / "checkpoints" / "last.ckpt")["state_dict"] - # We want the state_dict for the 'model' submodule, so remove - # the 'model.' prefix from the keys. - state_dict = {k[6:]: v for k, v in state_dict.items() if k.startswith("model.")} - assert isinstance(model, torch.nn.Module) - model.load_state_dict(state_dict) - return model - - -@dataclass -class MLPConfig(ModelConfig): - output_dim: int = 10 - hidden_dims: list[int] = mutable_field([256, 256]) - - def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: - return MLP( - input_shape=input_shape, - output_dim=self.output_dim, - hidden_dims=self.hidden_dims, - ) - - -@dataclass -class DebugMLPConfig(MLPConfig): - # TODO: we need at least two layers here because abstractions currently - # only work in that case. Abstraction implementation should be fixed. - # Additionally, we make network with some width to reduce chance that all - # neurons are dead. - hidden_dims: list[int] = mutable_field([5, 5]) - - -@dataclass -class CNNConfig(ModelConfig): - output_dim: int = 10 - channels: list[int] = mutable_field([32, 64]) - dense_dims: list[int] = mutable_field([256, 256]) - - def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: - return CNN( - input_shape=input_shape, - output_dim=self.output_dim, - channels=self.channels, - dense_dims=self.dense_dims, - ) - - -@dataclass -class DebugCNNConfig(CNNConfig): - channels: list[int] = mutable_field([2]) - dense_dims: list[int] = mutable_field([2]) - - -@dataclass -class ResnetConfig(ModelConfig): - output_dim: int = 10 - # ResNet18 default: - num_blocks: list[int] = mutable_field([2, 2, 2, 2]) - - def build_model(self, input_shape) -> HookedModel: - return PreActResNet(PreActBlock, self.num_blocks, num_classes=self.output_dim) +from .models import CNN, MLP, PreActResNet + + +def load(model: HookedModel, path: Path | str): + path = Path(path) + # Our convention is that LightningModules store the actual pytorch model + # as a `model` attribute. We use the last checkpoint (generated via the + # save_last=True option to the ModelCheckpoint callback). + state_dict = torch.load(path / "checkpoints" / "last.ckpt")["state_dict"] + # We want the state_dict for the 'model' submodule, so remove + # the 'model.' prefix from the keys. + state_dict = {k[6:]: v for k, v in state_dict.items() if k.startswith("model.")} + model.load_state_dict(state_dict) diff --git a/src/cupbearer/scripts/_shared.py b/src/cupbearer/scripts/_shared.py index a25e13d5..62729606 100644 --- a/src/cupbearer/scripts/_shared.py +++ b/src/cupbearer/scripts/_shared.py @@ -2,43 +2,29 @@ import torch from torchmetrics.classification import Accuracy -from cupbearer.models import HookedModel, ModelConfig +from cupbearer.models import HookedModel from cupbearer.utils.optimizers import OptimizerConfig class Classifier(L.LightningModule): def __init__( self, - model: ModelConfig | HookedModel, + model: HookedModel, num_classes: int, optim_cfg: OptimizerConfig, - input_shape: tuple[int, ...] | None = None, val_loader_names: list[str] | None = None, test_loader_names: list[str] | None = None, save_hparams: bool = True, ): super().__init__() - if isinstance(model, HookedModel) and save_hparams: - raise ValueError( - "Cannot save hyperparameters when model is already instantiated. " - "Either pass a ModelConfig or set save_hparams=False." - ) if save_hparams: - self.save_hyperparameters() + self.save_hyperparameters(ignore=["model"]) if val_loader_names is None: val_loader_names = [] if test_loader_names is None: test_loader_names = [] - if isinstance(model, HookedModel): - self.model = model - elif input_shape is None: - raise ValueError( - "Must provide input_shape when passing a ModelConfig " - "instead of an instantiated model." - ) - else: - self.model = model.build_model(input_shape=input_shape) + self.model = model self.optim_cfg = optim_cfg self.val_loader_names = val_loader_names self.test_loader_names = test_loader_names diff --git a/src/cupbearer/scripts/conf/eval_classifier_conf.py b/src/cupbearer/scripts/conf/eval_classifier_conf.py index fb17bbfb..10a365b1 100644 --- a/src/cupbearer/scripts/conf/eval_classifier_conf.py +++ b/src/cupbearer/scripts/conf/eval_classifier_conf.py @@ -1,13 +1,15 @@ from dataclasses import dataclass from typing import Optional -from cupbearer.data import DatasetConfig, TrainDataFromRun +from cupbearer.models import HookedModel from cupbearer.utils.scripts import ScriptConfig +from torch.utils.data import Dataset @dataclass(kw_only=True) class Config(ScriptConfig): - data: DatasetConfig | None = None + data: Dataset + model: HookedModel max_batches: Optional[int] = None max_batch_size: int = 2048 save_config: bool = False @@ -18,13 +20,6 @@ class Config(ScriptConfig): def __post_init__(self): if self.path is None: raise ValueError("Path must be set") - if self.data is None: - self.data = TrainDataFromRun(self.path) - - @property - def num_classes(self): - assert self.data is not None - return self.data.num_classes @dataclass diff --git a/src/cupbearer/scripts/conf/eval_detector_conf.py b/src/cupbearer/scripts/conf/eval_detector_conf.py index ca5f7d2c..33d6bbf8 100644 --- a/src/cupbearer/scripts/conf/eval_detector_conf.py +++ b/src/cupbearer/scripts/conf/eval_detector_conf.py @@ -1,19 +1,12 @@ from dataclasses import dataclass -from cupbearer.detectors import DetectorConfig, StoredDetector -from cupbearer.tasks import TaskConfig +from cupbearer.detectors import AnomalyDetector +from cupbearer.tasks import Task from cupbearer.utils.scripts import ScriptConfig @dataclass(kw_only=True) class Config(ScriptConfig): - task: TaskConfig - detector: DetectorConfig | None = None - save_config: bool = False + task: Task + detector: AnomalyDetector pbar: bool = False - - def __post_init__(self): - if self.detector is None: - if self.path is None: - raise ValueError("Path or detector must be set") - self.detector = StoredDetector(path=self.path) diff --git a/src/cupbearer/scripts/conf/train_classifier_conf.py b/src/cupbearer/scripts/conf/train_classifier_conf.py index b8209e61..5fcd3473 100644 --- a/src/cupbearer/scripts/conf/train_classifier_conf.py +++ b/src/cupbearer/scripts/conf/train_classifier_conf.py @@ -1,41 +1,36 @@ from dataclasses import dataclass, field -from cupbearer.data import BackdoorData, DatasetConfig, WanetBackdoor -from cupbearer.models import CNNConfig, MLPConfig, ModelConfig +from cupbearer.data import BackdoorDataset, WanetBackdoor +from cupbearer.models import HookedModel from cupbearer.utils.scripts import ScriptConfig from cupbearer.utils.train import DebugTrainConfig, TrainConfig +from torch.utils.data import Dataset @dataclass(kw_only=True) class Config(ScriptConfig): - model: ModelConfig + model: HookedModel train_config: TrainConfig = field(default_factory=TrainConfig) - train_data: DatasetConfig - val_data: dict[str, DatasetConfig] = field(default_factory=dict) + train_data: Dataset + num_classes: int + val_data: dict[str, Dataset] = field(default_factory=dict) # If True, returns the Lighting Trainer object (which has the model and a bunch # of other information, this may be useful when using interactively). # Otherwise (default), return only a dictionary of latest metrics, to avoid e.g. # submitit trying to pickle the entire Trainer object. return_trainer: bool = False - @property - def num_classes(self): - return self.train_data.num_classes - def __post_init__(self): super().__post_init__() - # HACK: Need to add new architectures here as they get implemented. - if isinstance(self.model, (MLPConfig, CNNConfig)): - self.model.output_dim = self.num_classes # For datasets that are not necessarily deterministic based only on # arguments, this is where validation sets are set to follow train_data - if isinstance(self.train_data, BackdoorData): + if isinstance(self.train_data, BackdoorDataset): for name, val_config in self.val_data.items(): # WanetBackdoor if ( isinstance(self.train_data.backdoor, WanetBackdoor) - and isinstance(val_config, BackdoorData) + and isinstance(val_config, BackdoorDataset) and isinstance(val_config.backdoor, WanetBackdoor) ): str_factor = ( diff --git a/src/cupbearer/scripts/conf/train_detector_conf.py b/src/cupbearer/scripts/conf/train_detector_conf.py index 0b51379c..84a362b0 100644 --- a/src/cupbearer/scripts/conf/train_detector_conf.py +++ b/src/cupbearer/scripts/conf/train_detector_conf.py @@ -1,11 +1,15 @@ from dataclasses import dataclass -from cupbearer.detectors import DetectorConfig -from cupbearer.tasks import TaskConfig +from cupbearer.detectors import AnomalyDetector +from cupbearer.tasks import Task from cupbearer.utils.scripts import ScriptConfig +from cupbearer.utils.train import TrainConfig +from cupbearer.utils.utils import BaseConfig, mutable_field @dataclass(kw_only=True) class Config(ScriptConfig): - task: TaskConfig - detector: DetectorConfig + task: Task + detector: AnomalyDetector + num_classes: int + train: BaseConfig = mutable_field(TrainConfig()) diff --git a/src/cupbearer/scripts/eval_classifier.py b/src/cupbearer/scripts/eval_classifier.py index 5a2b8259..021d866d 100644 --- a/src/cupbearer/scripts/eval_classifier.py +++ b/src/cupbearer/scripts/eval_classifier.py @@ -1,32 +1,34 @@ import json import lightning as L -from cupbearer.scripts._shared import Classifier -from cupbearer.utils.scripts import script from loguru import logger from torch.utils.data import DataLoader +from cupbearer.data import BackdoorDataset +from cupbearer.scripts._shared import Classifier +from cupbearer.utils.scripts import script + from .conf.eval_classifier_conf import Config @script def main(cfg: Config): - assert cfg.data is not None # make type checker happy assert cfg.path is not None # make type checker happy - for trafo in cfg.data.get_transforms(): - logger.debug(f"Loading transform: {trafo}") - trafo.load(cfg.path) + if isinstance(cfg.data, BackdoorDataset): + logger.debug(f"Loading transform: {cfg.data.backdoor}") + cfg.data.backdoor.load(cfg.path) - dataset = cfg.data.build() dataloader = DataLoader( - dataset, + cfg.data, batch_size=cfg.max_batch_size, shuffle=False, ) classifier = Classifier.load_from_checkpoint( - cfg.path / "checkpoints" / "last.ckpt", test_loader_names=["test"] + cfg.path / "checkpoints" / "last.ckpt", + model=cfg.model, + test_loader_names=["test"], ) trainer = L.Trainer( logger=False, diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index fe20b245..31217a7e 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -6,18 +6,12 @@ def main(cfg: Config): assert cfg.detector is not None # make type checker happy # Init - train_data = cfg.task.trusted_data.build() - test_data = cfg.task.test_data.build() - # train_data[0] is the first sample, which is (input, ...), so we need another [0] - example_input = train_data[0][0] - model = cfg.task.build_model(input_shape=example_input.shape) - detector = cfg.detector.build( - model=model, - save_dir=cfg.path, - ) + train_data = cfg.task.trusted_data + test_data = cfg.task.test_data + cfg.detector.set_model(cfg.task.model) # Evaluate detector - detector.eval( + cfg.detector.eval( train_dataset=train_data, test_dataset=test_data, pbar=cfg.pbar, diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index 5eb56eda..dce13a9e 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -4,6 +4,7 @@ import lightning as L from lightning.pytorch.callbacks import ModelCheckpoint +from cupbearer.data import BackdoorDataset from cupbearer.scripts._shared import Classifier from cupbearer.utils.scripts import script @@ -12,27 +13,20 @@ @script def main(cfg: Config) -> dict[str, Any] | L.Trainer: - dataset = cfg.train_data.build() - - train_loader = cfg.train_config.get_dataloader(dataset) + train_loader = cfg.train_config.get_dataloader(cfg.train_data) val_loaders = { - k: cfg.train_config.get_dataloader(v.build(), train=False) + k: cfg.train_config.get_dataloader(v, train=False) for k, v in cfg.val_data.items() } - # Store transforms to be used in training - if cfg.path: - for trafo in cfg.train_data.get_transforms(): - trafo.store(cfg.path) - - # Dataloader returns images and labels, only images get passed to model - images, _ = next(iter(train_loader)) - example_input = images[0] + # The WaNet backdoor (and maybe others in the future) has randomly generated state + # that needs to be stored if we want to load it later. + if isinstance(cfg.train_data, BackdoorDataset): + cfg.train_data.backdoor.store(cfg.path) classifier = Classifier( model=cfg.model, - input_shape=example_input.shape, num_classes=cfg.num_classes, optim_cfg=cfg.train_config.optimizer, val_loader_names=list(val_loaders.keys()), diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index f8641e3e..fbc4151c 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -6,38 +6,19 @@ @script def main(cfg: Config): - trusted_data = untrusted_data = None + cfg.detector.set_model(cfg.task.model) - if cfg.task.allow_trusted: - trusted_data = cfg.task.trusted_data.build() - if len(trusted_data) == 0: - trusted_data = None - if cfg.task.allow_untrusted: - untrusted_data = cfg.task.untrusted_data.build() - if len(untrusted_data) == 0: - untrusted_data = None - - example_data = trusted_data or untrusted_data - if example_data is None: - raise ValueError( - f"{type(cfg.task).__name__} does not allow trusted nor untrusted data." - ) - # example_data[0] is the first sample, which is (input, ...), so we need another - # [0] index - example_input = example_data[0][0] - model = cfg.task.build_model(input_shape=example_input.shape) - detector = cfg.detector.build(model=model, save_dir=cfg.path) - - detector.train( - trusted_data=trusted_data, - untrusted_data=untrusted_data, - num_classes=cfg.task.num_classes, - train_config=cfg.detector.train, + cfg.detector.train( + trusted_data=cfg.task.trusted_data, + untrusted_data=cfg.task.untrusted_train_data, + num_classes=cfg.num_classes, + train_config=cfg.train, ) - if cfg.path: - detector.save_weights(cfg.path / "detector") + path = cfg.detector.save_path + if path: + cfg.detector.save_weights(path / "detector") eval_cfg = EvalDetectorConfig( - path=cfg.path, + detector=cfg.detector, task=cfg.task, seed=cfg.seed, ) diff --git a/src/cupbearer/tasks/__init__.py b/src/cupbearer/tasks/__init__.py index 09baff94..635f0049 100644 --- a/src/cupbearer/tasks/__init__.py +++ b/src/cupbearer/tasks/__init__.py @@ -1,5 +1,4 @@ # ruff: noqa: F401 -from ._config import CustomTask, TaskConfig -from .adversarial_examples import AdversarialExampleTask -from .backdoor_detection import BackdoorDetection -from .toy_features import ToyFeaturesTask +from ._config import Task +from .adversarial_examples import adversarial_examples +from .backdoor_detection import backdoor_detection diff --git a/src/cupbearer/tasks/_config.py b/src/cupbearer/tasks/_config.py index b6309f2d..d5cc8f38 100644 --- a/src/cupbearer/tasks/_config.py +++ b/src/cupbearer/tasks/_config.py @@ -1,216 +1,95 @@ -from abc import ABC, abstractmethod -from copy import deepcopy from dataclasses import dataclass -from typing import Optional +from typing import Callable, Optional -from cupbearer.data import ( - DatasetConfig, - MixedDataConfig, - split_dataset_cfg, -) -from cupbearer.models import ModelConfig +from torch.utils.data import Dataset, random_split + +from cupbearer.data import MixedData from cupbearer.models.models import HookedModel @dataclass(kw_only=True) -class TaskConfig(ABC): - # Proportion of clean data in untrusted datasets: - clean_test_weight: float = 0.5 - clean_train_weight: float = 0.5 - # Whether to allow using trusted and untrusted data for training: - allow_trusted: bool = True - allow_untrusted: bool = True - - max_train_size: Optional[int] = None - max_test_size: Optional[int] = None - - def __post_init__(self): - # We'll only actually instantiate these when we need them, in case relevant - # attributes get changed after initialization. - - # TODO: I think this is no longer necessary after the config refactor. - self._trusted_data: Optional[DatasetConfig] = None - self._untrusted_data: Optional[DatasetConfig] = None - self._test_data: Optional[MixedDataConfig] = None - self._model: Optional[ModelConfig] = None - - def _get_trusted_data(self) -> DatasetConfig: - raise NotImplementedError - - def _get_clean_untrusted_data(self) -> DatasetConfig: - raise NotImplementedError - - def _get_anomalous_data(self) -> DatasetConfig: - raise NotImplementedError - - # The following two methods don't need to be implemented, the task will use - # get_test_split() on the untrusted data by default. - def _get_clean_test_data(self) -> DatasetConfig: - raise NotImplementedError - - def _get_anomalous_test_data(self) -> DatasetConfig: - raise NotImplementedError - - def _get_model(self) -> ModelConfig: - raise NotImplementedError - - @property - def trusted_data(self) -> DatasetConfig: - """Clean data that may be used for training.""" - if not self.allow_trusted: - raise ValueError( - "Using trusted training data is not allowed for this task." - ) - if not self._trusted_data: - self._trusted_data = deepcopy(self._get_trusted_data()) - self._trusted_data.max_size = self.max_train_size - return self._trusted_data - - @property - def untrusted_data(self) -> DatasetConfig: - """A mix of clean and anomalous data that may be used for training.""" - if not self.allow_untrusted: - raise ValueError( - "Using untrusted training data is not allowed for this task." - ) - if not self._untrusted_data: - anomalous_data = self._get_anomalous_data() - clean_data = self._get_clean_untrusted_data() - self._untrusted_data = MixedDataConfig( - normal=clean_data, +class Task: + trusted_data: Dataset + untrusted_train_data: Optional[MixedData] = None + test_data: MixedData + model: HookedModel + + @classmethod + def from_separate_data( + cls, + model: HookedModel, + trusted_data: Dataset, + clean_test_data: Dataset, + anomalous_test_data: Dataset, + clean_untrusted_data: Optional[Dataset] = None, + anomalous_data: Optional[Dataset] = None, + clean_train_weight: Optional[float] = 0.5, + clean_test_weight: Optional[float] = 0.5, + ): + untrusted_train_data = None + if clean_untrusted_data and anomalous_data: + untrusted_train_data = MixedData( + normal=clean_untrusted_data, anomalous=anomalous_data, - normal_weight=self.clean_train_weight, - max_size=self.max_train_size, + normal_weight=clean_train_weight, return_anomaly_labels=False, ) - return self._untrusted_data - - def build_model(self, input_shape: list[int] | tuple[int]) -> HookedModel: - if not self._model: - self._model = self._get_model() - return self._model.build_model(input_shape) - - @property - def test_data(self) -> MixedDataConfig: - if not self._test_data: - try: - anomalous_data = self._get_anomalous_test_data() - clean_data = self._get_clean_test_data() - except NotImplementedError: - anomalous_data = self._get_anomalous_data().get_test_split() - clean_data = self._get_clean_untrusted_data().get_test_split() - self._test_data = MixedDataConfig( - normal=clean_data, - anomalous=anomalous_data, - normal_weight=self.clean_test_weight, - max_size=self.max_test_size, - ) - return self._test_data - - @property - def num_classes(self): - try: - return self.trusted_data.num_classes - except ValueError: - return self.untrusted_data.num_classes - - -@dataclass -class FuzzedTask(TaskConfig): - """A task where the anomalous inputs are some modified version of clean ones.""" - - trusted_fraction: float = 1.0 - - def __post_init__(self): - super().__post_init__() - - # First we get the base (unmodified) data and its test split. - train_data = self._get_base_data() - test_data = train_data.get_test_split() - # We split the training data up into three parts: - # 1. A `trusted_fraction` part will be used as trusted data. - # 2. Out of the remaining part, a `clean_untrusted_fraction` part will be used - # as clean untrusted data. - # 3. The rest will be used as anomalous training data. - ( - self._trusted_data, - self._clean_untrusted_data, - _anomalous_base, - ) = split_dataset_cfg( - train_data, - self.trusted_fraction, - # Using clean_train_weight here means we'll end up using all our data, - # since this is also what's used later in the MixedDataConfig. - (1 - self.trusted_fraction) * self.clean_train_weight, - (1 - self.trusted_fraction) * (1 - self.clean_train_weight), + test_data = MixedData( + normal=clean_test_data, + anomalous=anomalous_test_data, + normal_weight=clean_test_weight, ) - - # Similarly, we plit up the test data, except there is no trusted subset. - self._clean_test_data, _anomalous_test_base = split_dataset_cfg( - test_data, - self.clean_test_weight, + return Task( + trusted_data=trusted_data, + untrusted_train_data=untrusted_train_data, + test_data=test_data, + model=model, ) - self._anomalous_data = self.fuzz(_anomalous_base) - self._anomalous_test_data = self.fuzz(_anomalous_test_base) - - @abstractmethod - def fuzz(self, data: DatasetConfig) -> DatasetConfig: - pass - - @abstractmethod - def _get_base_data(self) -> DatasetConfig: - pass - - def _get_trusted_data(self) -> DatasetConfig: - return self._trusted_data - - def _get_clean_untrusted_data(self) -> DatasetConfig: - return self._clean_untrusted_data - - def _get_anomalous_data(self) -> DatasetConfig: - return self._anomalous_data - - def _get_clean_test_data(self) -> DatasetConfig: - return self._clean_test_data - - def _get_anomalous_test_data(self) -> DatasetConfig: - return self._anomalous_test_data - - -@dataclass(kw_only=True) -class CustomTask(TaskConfig): - """A fully customizable task config, where all datasets are specified directly.""" - - trusted_data: DatasetConfig - clean_untrusted_data: DatasetConfig - anomalous_data: DatasetConfig - model: ModelConfig - - def _get_clean_untrusted_data(self) -> DatasetConfig: - return self.clean_untrusted_data - - def _get_trusted_data(self) -> DatasetConfig: - return self.trusted_data - - def _get_anomalous_data(self) -> DatasetConfig: - return self.anomalous_data - - def _get_model(self) -> ModelConfig: - return self.model - - -@dataclass(kw_only=True) -class DebugTaskConfig(TaskConfig): - """Debug configs for specific tasks can inherit from this for convenience. - - Note that children should inherit this first, to make sure MRO picks up on - the overriden defaults below! - """ + @classmethod + def from_base_data( + cls, + model: HookedModel, + train_data: Dataset, + test_data: Dataset, + anomaly_func: Callable[[Dataset, bool], Dataset], + clean_untrusted_func: Optional[Callable[[Dataset], Dataset]] = None, + trusted_fraction: float = 1.0, + clean_train_weight: float = 0.5, + clean_test_weight: float = 0.5, + ): + if trusted_fraction == 1.0: + trusted_data = train_data + clean_untrusted_data = anomalous_data = None + else: + untrusted_fraction = 1 - trusted_fraction + train_fractions = ( + trusted_fraction, + untrusted_fraction * clean_train_weight, + untrusted_fraction * (1 - clean_train_weight), + ) + trusted_data, clean_untrusted_data, anomalous_data = random_split( + train_data, train_fractions + ) - # Needs to be at least two because otherwise Mahalanobis distance scores are - # NaN. - max_train_size: int = 2 - # Needs to be at least two so it can contain both normal and anomalous data. - max_test_size: int = 2 + if clean_untrusted_func: + clean_untrusted_data = clean_untrusted_func(clean_untrusted_data) + # Second argument to anomaly_func is whether this is training data + anomalous_data = anomaly_func(anomalous_data, True) + + test_fractions = (clean_test_weight, 1 - clean_test_weight) + clean_test_data, anomalous_test_data = random_split(test_data, test_fractions) + + if clean_untrusted_func: + clean_test_data = clean_untrusted_func(clean_test_data) + anomalous_test_data = anomaly_func(anomalous_test_data, False) + + return Task.from_separate_data( + model=model, + trusted_data=trusted_data, + clean_untrusted_data=clean_untrusted_data, + anomalous_data=anomalous_data, + clean_test_data=clean_test_data, + anomalous_test_data=anomalous_test_data, + ) diff --git a/src/cupbearer/tasks/adversarial_examples.py b/src/cupbearer/tasks/adversarial_examples.py index 907967d2..c9bd23cb 100644 --- a/src/cupbearer/tasks/adversarial_examples.py +++ b/src/cupbearer/tasks/adversarial_examples.py @@ -1,49 +1,34 @@ -import math -from dataclasses import dataclass from pathlib import Path -from cupbearer.data import AdversarialExampleConfig, DatasetConfig, TrainDataFromRun -from cupbearer.models import ModelConfig, StoredModel - -from ._config import DebugTaskConfig, TaskConfig - - -@dataclass -class AdversarialExampleTask(TaskConfig): - path: Path - attack_batch_size: int = 128 - success_threshold: float = 0.1 - steps: int = 40 - eps: float = 8 / 255 - - def _get_clean_data(self, train: bool) -> DatasetConfig: - if train: - return TrainDataFromRun(path=self.path) - else: - return TrainDataFromRun(path=self.path).get_test_split() - - def _get_anomalous_data(self, train: bool) -> DatasetConfig: - max_size = None - if self.max_test_size: - # This isn't strictly necessary, but it lets us avoid generating more - # adversarial examples than needed. - max_size = math.ceil(self.max_test_size * (1 - self.clean_test_weight)) - return AdversarialExampleConfig( - path=self.path, - max_size=max_size, - attack_batch_size=self.attack_batch_size, - success_threshold=self.success_threshold, - steps=self.steps, - eps=self.eps, - use_test_data=not train, - ) - - def _get_model(self) -> ModelConfig: - return StoredModel(path=self.path) - - -@dataclass(kw_only=True) -class DebugAdversarialExampleTask(DebugTaskConfig, AdversarialExampleTask): - attack_batch_size: int = 1 - success_threshold: float = 1.0 - steps: int = 1 +from torch.utils.data import Dataset + +from cupbearer.data import make_adversarial_examples +from cupbearer.models import HookedModel + +from ._config import Task + + +def adversarial_examples( + model: HookedModel, + train_data: Dataset, + test_data: Dataset, + cache_path: Path, + trusted_fraction: float = 1.0, + clean_train_weight: float = 0.5, + clean_test_weight: float = 0.5, + **kwargs, +) -> Task: + return Task.from_base_data( + model=model, + train_data=train_data, + test_data=test_data, + anomaly_func=lambda dataset, train: make_adversarial_examples( + model, + dataset, + cache_path / f"advexes_{'train' if train else 'test'}", + **kwargs, + ), + trusted_fraction=trusted_fraction, + clean_train_weight=clean_train_weight, + clean_test_weight=clean_test_weight, + ) diff --git a/src/cupbearer/tasks/backdoor_detection.py b/src/cupbearer/tasks/backdoor_detection.py index cec9fdcc..d0e94b62 100644 --- a/src/cupbearer/tasks/backdoor_detection.py +++ b/src/cupbearer/tasks/backdoor_detection.py @@ -1,41 +1,38 @@ -from dataclasses import dataclass -from pathlib import Path - -from cupbearer.data import DatasetConfig -from cupbearer.data.backdoor_data import BackdoorData -from cupbearer.models import ModelConfig, StoredModel -from cupbearer.utils.scripts import load_config - -from ._config import DebugTaskConfig, FuzzedTask - - -@dataclass(kw_only=True) -class BackdoorDetection(FuzzedTask): - path: Path - no_load: bool = False - - def __post_init__(self): - backdoor_data = load_config(self.path, "train_data", BackdoorData) - self._original = backdoor_data.original - self._backdoor = backdoor_data.backdoor - self._backdoor.p_backdoor = 1.0 - - if not self.no_load: - self._backdoor.load(self.path) - - # Call this only now that _original and _backdoor are set. - super().__post_init__() - - def _get_base_data(self) -> DatasetConfig: - return self._original - - def fuzz(self, data: DatasetConfig) -> DatasetConfig: - return BackdoorData(original=data, backdoor=self._backdoor) - - def _get_model(self) -> ModelConfig: - return StoredModel(path=self.path) - - -@dataclass -class DebugBackdoorDetection(DebugTaskConfig, BackdoorDetection): - pass +from torch.utils.data import Dataset + +from cupbearer.data import Backdoor, BackdoorDataset +from cupbearer.models import HookedModel + +from ._config import Task + + +def backdoor_detection( + model: HookedModel, + train_data: Dataset, + test_data: Dataset, + backdoor: Backdoor, + trusted_fraction: float = 1.0, + clean_train_weight: float = 0.5, + clean_test_weight: float = 0.5, +): + assert backdoor.p_backdoor == 1.0, ( + "Your anomalous data is not pure backdoor data, " + "this is probably unintentional." + ) + + # TODO: for WaNet, we currently expect the user to load the control grid. + # (Otherwise we'd have to always take in a path here, and also when working + # in a notebook it might just be easier to pass in the existing backdoor object.) + # But we should somehow check somewhere that it's loaded to avoid silent errors. + + return Task.from_base_data( + model=model, + train_data=train_data, + test_data=test_data, + anomaly_func=lambda dataset, _: BackdoorDataset( + original=dataset, backdoor=backdoor + ), + trusted_fraction=trusted_fraction, + clean_train_weight=clean_train_weight, + clean_test_weight=clean_test_weight, + ) diff --git a/src/cupbearer/tasks/toy_features.py b/src/cupbearer/tasks/toy_features.py deleted file mode 100644 index 7a3f5c61..00000000 --- a/src/cupbearer/tasks/toy_features.py +++ /dev/null @@ -1,27 +0,0 @@ -from dataclasses import dataclass -from pathlib import Path - -from cupbearer.data.toy_ambiguous_features import ToyFeaturesConfig -from cupbearer.models import StoredModel - -from ._config import DebugTaskConfig, TaskConfig - - -@dataclass -class ToyFeaturesTask(TaskConfig): - path: Path - noise: float = 0.1 - - def _init_train_data(self): - self._train_data = ToyFeaturesConfig(correlated=True, noise=self.noise) - - def _get_anomalous_test_data(self): - return ToyFeaturesConfig(correlated=False, noise=self.noise) - - def _init_model(self): - self._model = StoredModel(path=self.path) - - -@dataclass -class DebugToyFeaturesTask(DebugTaskConfig, ToyFeaturesTask): - pass diff --git a/src/cupbearer/utils/scripts.py b/src/cupbearer/utils/scripts.py index e3fde2ef..488254be 100644 --- a/src/cupbearer/utils/scripts.py +++ b/src/cupbearer/utils/scripts.py @@ -1,11 +1,9 @@ import functools from dataclasses import dataclass from pathlib import Path -from typing import Any, Callable, Optional, Type, TypeVar +from typing import Any, Callable, Optional, TypeVar -import simple_parsing from cupbearer.utils.utils import BaseConfig -from loguru import logger @dataclass(kw_only=True) @@ -30,44 +28,19 @@ def run_script(cfg: ConfigType): def save_cfg(cfg: ScriptConfig, save_config: bool = True): - if cfg.path: - cfg.path.mkdir(parents=True, exist_ok=True) - if save_config: - # TODO: replace this with cfg.save if/when that exposes save_dc_types. - # Note that we need save_dc_types here even though `BaseConfig` already - # enables that, since `save` calls `to_dict` directly, not `obj.to_dict`. - simple_parsing.helpers.serialization.serializable.save( - cfg, - cfg.path / "config.yaml", - save_dc_types=True, - sort_keys=False, - ) + # if cfg.path: + # cfg.path.mkdir(parents=True, exist_ok=True) + # if save_config: + # # TODO: replace this with cfg.save if/when that exposes save_dc_types. + # # Note that we need save_dc_types here even though `BaseConfig` already + # # enables that, since `save` calls `to_dict` directly, not `obj.to_dict`. + # simple_parsing.helpers.serialization.serializable.save( + # cfg, + # cfg.path / "config.yaml", + # save_dc_types=True, + # sort_keys=False, + # ) + pass T = TypeVar("T") - - -def load_config( - path: str | Path, - name: Optional[str] = None, - expected_type: Type[T] = ScriptConfig, -) -> T: - logger.debug(f"Loading config '{name}' from {path}") - path = Path(path) - cfg = ScriptConfig.load(path / "config.yaml", drop_extra_fields=False) - - if name is None: - if not isinstance(cfg, expected_type): - raise ValueError(f"Expected config to be a {expected_type}, got {cfg}") - - return cfg - - if not hasattr(cfg, name): - raise ValueError(f"Expected {name} to be in config, got {cfg}") - - sub_cfg = getattr(cfg, name) - - if not isinstance(sub_cfg, expected_type): - raise ValueError(f"Expected {name} to be a {expected_type}, got {sub_cfg}") - - return sub_cfg From 51e6a25da56dcc493aa6426cc625e981cabbfb50 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Thu, 29 Feb 2024 23:54:58 -0800 Subject: [PATCH 06/25] Remove unused DatasetConfigs --- src/cupbearer/data/__init__.py | 9 +- src/cupbearer/data/_shared.py | 177 +-------------------------------- 2 files changed, 2 insertions(+), 184 deletions(-) diff --git a/src/cupbearer/data/__init__.py b/src/cupbearer/data/__init__.py index 706d0ae8..1747c96c 100644 --- a/src/cupbearer/data/__init__.py +++ b/src/cupbearer/data/__init__.py @@ -1,12 +1,5 @@ # ruff: noqa: F401 -from ._shared import ( - DatasetConfig, - MixedData, - MixedDataConfig, - SubsetConfig, - TransformDataset, - split_dataset_cfg, -) +from ._shared import MixedData, TransformDataset from .adversarial import AdversarialExampleDataset, make_adversarial_examples from .backdoors import ( Backdoor, diff --git a/src/cupbearer/data/_shared.py b/src/cupbearer/data/_shared.py index fedbdce4..ac516e53 100644 --- a/src/cupbearer/data/_shared.py +++ b/src/cupbearer/data/_shared.py @@ -1,140 +1,8 @@ -from abc import ABC, abstractproperty -from dataclasses import dataclass, field from typing import Optional -from torch.utils.data import Dataset, Subset -from torchvision.transforms import Compose +from torch.utils.data import Dataset from cupbearer.data.transforms import Transform -from cupbearer.utils.utils import BaseConfig - - -@dataclass(kw_only=True) -class DatasetConfig(BaseConfig, ABC): - # Only the values of the transforms dict are used, but simple_parsing doesn't - # support lists of dataclasses, which is why we use a dict. One advantage - # of this is also that it's easier to override specific transforms. - # TODO: We should probably make this a list now that we're abandoning CLI. - transforms: dict[str, Transform] = field(default_factory=dict) - max_size: Optional[int] = None - - @abstractproperty - def num_classes(self) -> int: # type: ignore - pass - - def get_test_split(self) -> "DatasetConfig": - # Not every dataset will define this - raise NotImplementedError - - def get_transforms(self) -> list[Transform]: - """Return a list of transforms that should be applied to this dataset. - - Most subclasses won't need to override this, since it just returns - the transforms field by default. But in some cases, we need to apply custom - processing to this that can't be handled in __post_init__ (see BackdoorData - for an example). - """ - return list(self.transforms.values()) - - def build(self) -> Dataset: - """Create an instance of the Dataset described by this config.""" - dataset = self._build() - transform = Compose(self.get_transforms()) - dataset = TransformDataset(dataset, transform) - if self.max_size: - assert self.max_size <= len(dataset) - dataset = Subset(dataset, range(self.max_size)) - return dataset - - def _build(self) -> Dataset: - # Not an abstractmethod because e.g. TestDataConfig overrides build() instead. - raise NotImplementedError - - -@dataclass -class SubsetConfig(DatasetConfig): - full_dataset: DatasetConfig - start_fraction: float = 0.0 - end_fraction: float = 1.0 - - def __post_init__(self): - super().__post_init__() - if self.max_size: - raise ValueError( - "max_size should be set on the full dataset, not the subset." - ) - if self.start_fraction > self.end_fraction: - raise ValueError( - f"{self.start_fraction=} must be less than or equal " - f"to {self.end_fraction=}." - ) - if self.start_fraction < 0 or self.end_fraction > 1: - raise ValueError( - "Fractions must be between 0 and 1, " - f"got {self.start_fraction} and {self.end_fraction}." - ) - if self.transforms: - raise ValueError( - "Transforms should be applied to the full dataset, not the subset." - ) - - def _build(self) -> Dataset: - full = self.full_dataset.build() - start = int(self.start_fraction * len(full)) - end = int(self.end_fraction * len(full)) - return Subset(full, range(start, end)) - - @property - def num_classes(self) -> int: # type: ignore - return self.full_dataset.num_classes - - def get_test_split(self) -> "DatasetConfig": - return SubsetConfig( - full_dataset=self.full_dataset.get_test_split(), - start_fraction=self.start_fraction, - end_fraction=self.end_fraction, - ) - - # Mustn't inherit get_transforms() from full_dataset, they're already applied - # to the full dataset on build. - - -# def split_dataset(dataset: Dataset, *fractions: float) -> list[Subset]: -# if not fractions: -# raise ValueError("At least one fraction must be provided.") -# if not all(0 <= f <= 1 for f in fractions): -# raise ValueError("Fractions must be between 0 and 1.") -# if not sum(fractions) == 1: -# fractions = fractions + (1 - sum(fractions),) - -# total = len(dataset) - -# markers = [int(total * fraction) for fraction in fractions] - -# subsets = [] -# current_start = 0 -# for marker in markers: -# subsets.append(Subset(dataset, range(current_start, current_start + marker))) -# current_start += marker -# assert current_start == total -# return subsets - - -def split_dataset_cfg(cfg: DatasetConfig, *fractions: float) -> list[SubsetConfig]: - if not fractions: - raise ValueError("At least one fraction must be provided.") - if not all(0 <= f <= 1 for f in fractions): - raise ValueError("Fractions must be between 0 and 1.") - if not sum(fractions) == 1: - fractions = fractions + (1 - sum(fractions),) - - subsets = [] - current_start = 0.0 - for fraction in fractions: - subsets.append(SubsetConfig(cfg, current_start, current_start + fraction)) - current_start += fraction - assert current_start == 1.0 - return subsets class TransformDataset(Dataset): @@ -188,46 +56,3 @@ def __getitem__(self, index): if self.return_anomaly_labels: return self.anomalous_data[index - self.normal_len], 1 return self.anomalous_data[index - self.normal_len] - - -@dataclass -class MixedDataConfig(DatasetConfig): - normal: DatasetConfig - anomalous: DatasetConfig - normal_weight: float = 0.5 - return_anomaly_labels: bool = True - - def get_test_split(self) -> "MixedDataConfig": - return MixedDataConfig( - normal=self.normal.get_test_split(), - anomalous=self.anomalous.get_test_split(), - normal_weight=self.normal_weight, - return_anomaly_labels=self.return_anomaly_labels, - ) - - @property - def num_classes(self): - assert (n := self.normal.num_classes) == self.anomalous.num_classes - return n - - def build(self) -> MixedData: - # We need to override this method because max_size needs to be applied in a - # different way: TestDataMix just has normal data first and then anomalous data, - # if we just used a Subset with indices 1...n, we'd get an incorrect ratio. - normal = self.normal.build() - anomalous = self.anomalous.build() - if self.max_size: - normal_size = int(self.max_size * self.normal_weight) - normal_size = min(len(normal), normal_size) - normal = Subset(normal, range(normal_size)) - anomalous_size = self.max_size - normal_size - anomalous_size = min(len(anomalous), anomalous_size) - anomalous = Subset(anomalous, range(anomalous_size)) - dataset = MixedData( - normal, anomalous, self.normal_weight, self.return_anomaly_labels - ) - # We don't want to return a TransformDataset here. Transforms should be applied - # directly to the normal and anomalous data. - if self.transforms: - raise ValueError("Transforms are not supported for TestDataConfig.") - return dataset From 48f8292f788df5acb546b40ed207d811fd87813b Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Thu, 29 Feb 2024 23:57:12 -0800 Subject: [PATCH 07/25] Rename task file More appropriate now to not call it `_config.py` --- src/cupbearer/tasks/__init__.py | 2 +- src/cupbearer/tasks/adversarial_examples.py | 2 +- src/cupbearer/tasks/backdoor_detection.py | 2 +- src/cupbearer/tasks/{_config.py => task.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename src/cupbearer/tasks/{_config.py => task.py} (100%) diff --git a/src/cupbearer/tasks/__init__.py b/src/cupbearer/tasks/__init__.py index 635f0049..9fe5b58f 100644 --- a/src/cupbearer/tasks/__init__.py +++ b/src/cupbearer/tasks/__init__.py @@ -1,4 +1,4 @@ # ruff: noqa: F401 -from ._config import Task from .adversarial_examples import adversarial_examples from .backdoor_detection import backdoor_detection +from .task import Task diff --git a/src/cupbearer/tasks/adversarial_examples.py b/src/cupbearer/tasks/adversarial_examples.py index c9bd23cb..ee593558 100644 --- a/src/cupbearer/tasks/adversarial_examples.py +++ b/src/cupbearer/tasks/adversarial_examples.py @@ -5,7 +5,7 @@ from cupbearer.data import make_adversarial_examples from cupbearer.models import HookedModel -from ._config import Task +from .task import Task def adversarial_examples( diff --git a/src/cupbearer/tasks/backdoor_detection.py b/src/cupbearer/tasks/backdoor_detection.py index d0e94b62..51942285 100644 --- a/src/cupbearer/tasks/backdoor_detection.py +++ b/src/cupbearer/tasks/backdoor_detection.py @@ -3,7 +3,7 @@ from cupbearer.data import Backdoor, BackdoorDataset from cupbearer.models import HookedModel -from ._config import Task +from .task import Task def backdoor_detection( diff --git a/src/cupbearer/tasks/_config.py b/src/cupbearer/tasks/task.py similarity index 100% rename from src/cupbearer/tasks/_config.py rename to src/cupbearer/tasks/task.py From 79b51ecf9fe57067d5e6552826102e8087f13381 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Fri, 1 Mar 2024 20:50:58 -0800 Subject: [PATCH 08/25] WIP on removing ScriptConfig and TrainConfig --- src/cupbearer/scripts/__init__.py | 5 +- src/cupbearer/scripts/_shared.py | 7 +- .../scripts/conf/eval_classifier_conf.py | 30 ----- .../scripts/conf/eval_detector_conf.py | 12 -- .../scripts/conf/train_classifier_conf.py | 47 -------- .../scripts/conf/train_detector_conf.py | 15 --- src/cupbearer/scripts/eval_classifier.py | 39 +++--- src/cupbearer/scripts/eval_detector.py | 24 ++-- src/cupbearer/scripts/train_classifier.py | 111 ++++++++++++++---- src/cupbearer/scripts/train_detector.py | 37 ++++-- src/cupbearer/utils/scripts.py | 37 ++---- 11 files changed, 163 insertions(+), 201 deletions(-) delete mode 100644 src/cupbearer/scripts/conf/eval_classifier_conf.py delete mode 100644 src/cupbearer/scripts/conf/eval_detector_conf.py delete mode 100644 src/cupbearer/scripts/conf/train_classifier_conf.py delete mode 100644 src/cupbearer/scripts/conf/train_detector_conf.py diff --git a/src/cupbearer/scripts/__init__.py b/src/cupbearer/scripts/__init__.py index 51003cff..1666400e 100644 --- a/src/cupbearer/scripts/__init__.py +++ b/src/cupbearer/scripts/__init__.py @@ -1,8 +1,5 @@ # ruff: noqa: F401 -from .conf.eval_classifier_conf import Config as EvalClassifierConfig -from .conf.eval_detector_conf import Config as EvalDetectorConfig -from .conf.train_classifier_conf import Config as TrainClassifierConfig -from .conf.train_detector_conf import Config as TrainDetectorConfig +from ._shared import Classifier from .eval_classifier import main as eval_classifier from .eval_detector import main as eval_detector from .train_classifier import main as train_classifier diff --git a/src/cupbearer/scripts/_shared.py b/src/cupbearer/scripts/_shared.py index 62729606..539d88d1 100644 --- a/src/cupbearer/scripts/_shared.py +++ b/src/cupbearer/scripts/_shared.py @@ -3,7 +3,6 @@ from torchmetrics.classification import Accuracy from cupbearer.models import HookedModel -from cupbearer.utils.optimizers import OptimizerConfig class Classifier(L.LightningModule): @@ -11,7 +10,7 @@ def __init__( self, model: HookedModel, num_classes: int, - optim_cfg: OptimizerConfig, + lr: float, val_loader_names: list[str] | None = None, test_loader_names: list[str] | None = None, save_hparams: bool = True, @@ -25,7 +24,7 @@ def __init__( test_loader_names = [] self.model = model - self.optim_cfg = optim_cfg + self.lr = lr self.val_loader_names = val_loader_names self.test_loader_names = test_loader_names self.train_accuracy = Accuracy(task="multiclass", num_classes=num_classes) @@ -81,4 +80,4 @@ def on_validation_epoch_end(self): self.log(f"{name}/acc_epoch", self.val_accuracy[i]) def configure_optimizers(self): - return self.optim_cfg.get_optimizer(self.parameters()) + return torch.optim.Adam(self.parameters(), lr=self.lr) diff --git a/src/cupbearer/scripts/conf/eval_classifier_conf.py b/src/cupbearer/scripts/conf/eval_classifier_conf.py deleted file mode 100644 index 10a365b1..00000000 --- a/src/cupbearer/scripts/conf/eval_classifier_conf.py +++ /dev/null @@ -1,30 +0,0 @@ -from dataclasses import dataclass -from typing import Optional - -from cupbearer.models import HookedModel -from cupbearer.utils.scripts import ScriptConfig -from torch.utils.data import Dataset - - -@dataclass(kw_only=True) -class Config(ScriptConfig): - data: Dataset - model: HookedModel - max_batches: Optional[int] = None - max_batch_size: int = 2048 - save_config: bool = False - pbar: bool = True - wandb: bool = False - log_every_n_steps: Optional[int] = None - - def __post_init__(self): - if self.path is None: - raise ValueError("Path must be set") - - -@dataclass -class DebugConfig(Config): - max_batches: int = 1 - max_batch_size: int = 2 - wandb: bool = False - log_every_n_steps: int = 1 diff --git a/src/cupbearer/scripts/conf/eval_detector_conf.py b/src/cupbearer/scripts/conf/eval_detector_conf.py deleted file mode 100644 index 33d6bbf8..00000000 --- a/src/cupbearer/scripts/conf/eval_detector_conf.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass - -from cupbearer.detectors import AnomalyDetector -from cupbearer.tasks import Task -from cupbearer.utils.scripts import ScriptConfig - - -@dataclass(kw_only=True) -class Config(ScriptConfig): - task: Task - detector: AnomalyDetector - pbar: bool = False diff --git a/src/cupbearer/scripts/conf/train_classifier_conf.py b/src/cupbearer/scripts/conf/train_classifier_conf.py deleted file mode 100644 index 5fcd3473..00000000 --- a/src/cupbearer/scripts/conf/train_classifier_conf.py +++ /dev/null @@ -1,47 +0,0 @@ -from dataclasses import dataclass, field - -from cupbearer.data import BackdoorDataset, WanetBackdoor -from cupbearer.models import HookedModel -from cupbearer.utils.scripts import ScriptConfig -from cupbearer.utils.train import DebugTrainConfig, TrainConfig -from torch.utils.data import Dataset - - -@dataclass(kw_only=True) -class Config(ScriptConfig): - model: HookedModel - train_config: TrainConfig = field(default_factory=TrainConfig) - train_data: Dataset - num_classes: int - val_data: dict[str, Dataset] = field(default_factory=dict) - # If True, returns the Lighting Trainer object (which has the model and a bunch - # of other information, this may be useful when using interactively). - # Otherwise (default), return only a dictionary of latest metrics, to avoid e.g. - # submitit trying to pickle the entire Trainer object. - return_trainer: bool = False - - def __post_init__(self): - super().__post_init__() - - # For datasets that are not necessarily deterministic based only on - # arguments, this is where validation sets are set to follow train_data - if isinstance(self.train_data, BackdoorDataset): - for name, val_config in self.val_data.items(): - # WanetBackdoor - if ( - isinstance(self.train_data.backdoor, WanetBackdoor) - and isinstance(val_config, BackdoorDataset) - and isinstance(val_config.backdoor, WanetBackdoor) - ): - str_factor = ( - val_config.backdoor.warping_strength - / self.train_data.backdoor.warping_strength - ) - val_config.backdoor.control_grid = ( - str_factor * self.train_data.backdoor.control_grid - ) - - -@dataclass -class DebugConfig(Config): - train_config: DebugTrainConfig = field(default_factory=DebugTrainConfig) diff --git a/src/cupbearer/scripts/conf/train_detector_conf.py b/src/cupbearer/scripts/conf/train_detector_conf.py deleted file mode 100644 index 84a362b0..00000000 --- a/src/cupbearer/scripts/conf/train_detector_conf.py +++ /dev/null @@ -1,15 +0,0 @@ -from dataclasses import dataclass - -from cupbearer.detectors import AnomalyDetector -from cupbearer.tasks import Task -from cupbearer.utils.scripts import ScriptConfig -from cupbearer.utils.train import TrainConfig -from cupbearer.utils.utils import BaseConfig, mutable_field - - -@dataclass(kw_only=True) -class Config(ScriptConfig): - task: Task - detector: AnomalyDetector - num_classes: int - train: BaseConfig = mutable_field(TrainConfig()) diff --git a/src/cupbearer/scripts/eval_classifier.py b/src/cupbearer/scripts/eval_classifier.py index 021d866d..d30d1316 100644 --- a/src/cupbearer/scripts/eval_classifier.py +++ b/src/cupbearer/scripts/eval_classifier.py @@ -1,41 +1,48 @@ import json +from pathlib import Path +from typing import Optional import lightning as L from loguru import logger -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, Dataset from cupbearer.data import BackdoorDataset +from cupbearer.models import HookedModel from cupbearer.scripts._shared import Classifier from cupbearer.utils.scripts import script -from .conf.eval_classifier_conf import Config - @script -def main(cfg: Config): - assert cfg.path is not None # make type checker happy - - if isinstance(cfg.data, BackdoorDataset): - logger.debug(f"Loading transform: {cfg.data.backdoor}") - cfg.data.backdoor.load(cfg.path) +def main( + data: Dataset, + model: HookedModel, + path: Path | str, + max_batches: Optional[int] = None, + max_batch_size: int = 2048, +): + path = Path(path) + + if isinstance(data, BackdoorDataset): + logger.debug(f"Loading transform: {data.backdoor}") + data.backdoor.load(path) dataloader = DataLoader( - cfg.data, - batch_size=cfg.max_batch_size, + data, + batch_size=max_batch_size, shuffle=False, ) classifier = Classifier.load_from_checkpoint( - cfg.path / "checkpoints" / "last.ckpt", - model=cfg.model, + path / "checkpoints" / "last.ckpt", + model=model, test_loader_names=["test"], ) trainer = L.Trainer( logger=False, - default_root_dir=cfg.path, - limit_test_batches=cfg.max_batches, + default_root_dir=path, + limit_test_batches=max_batches, ) metrics = trainer.test(classifier, [dataloader]) - with open(cfg.path / "eval.json", "w") as f: + with open(path / "eval.json", "w") as f: json.dump(metrics, f) diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index 31217a7e..b20774ad 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -1,18 +1,18 @@ -from cupbearer.scripts.conf.eval_detector_conf import Config +from cupbearer.detectors import AnomalyDetector +from cupbearer.tasks import Task from cupbearer.utils.scripts import script @script -def main(cfg: Config): - assert cfg.detector is not None # make type checker happy - # Init - train_data = cfg.task.trusted_data - test_data = cfg.task.test_data - cfg.detector.set_model(cfg.task.model) +def main( + task: Task, + detector: AnomalyDetector, + pbar: bool = False, +): + detector.set_model(task.model) - # Evaluate detector - cfg.detector.eval( - train_dataset=train_data, - test_dataset=test_data, - pbar=cfg.pbar, + detector.eval( + train_dataset=task.trusted_data, + test_dataset=task.test_data, + pbar=pbar, ) diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index dce13a9e..05b8f3f2 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -1,49 +1,118 @@ import warnings +from pathlib import Path from typing import Any import lightning as L +from lightning.pytorch import loggers from lightning.pytorch.callbacks import ModelCheckpoint +from torch.utils.data import DataLoader -from cupbearer.data import BackdoorDataset +from cupbearer.models import HookedModel from cupbearer.scripts._shared import Classifier from cupbearer.utils.scripts import script -from .conf.train_classifier_conf import Config - @script -def main(cfg: Config) -> dict[str, Any] | L.Trainer: - train_loader = cfg.train_config.get_dataloader(cfg.train_data) +def main( + model: HookedModel, + train_loader: DataLoader, + num_classes: int, + path: Path | str, + lr: float = 1e-3, + val_loaders: DataLoader | dict[str, DataLoader] | None = None, + # If True, returns the Lighting Trainer object (which has the model and a bunch + # of other information, this may be useful when using interactively). + # Otherwise (default), return only a dictionary of latest metrics, to avoid e.g. + # submitit trying to pickle the entire Trainer object. + return_trainer: bool = False, + wandb: bool = False, + **trainer_kwargs, +) -> dict[str, Any] | L.Trainer: + path = Path(path) + + if trainer_kwargs is None: + trainer_kwargs = {} + if val_loaders is None: + val_loaders = {} + elif isinstance(val_loaders, DataLoader): + val_loaders = {"val": val_loaders} - val_loaders = { - k: cfg.train_config.get_dataloader(v, train=False) - for k, v in cfg.val_data.items() - } + # arguments, this is where validation sets are set to follow train_data + # TODO: we could get weird bugs here if e.g. train_data is a Subset of some + # BackdoorDataset. + # if isinstance(train_data, BackdoorDataset): + # for name, val_config in val_data.items(): + # # WanetBackdoor + # if ( + # isinstance(train_data.backdoor, WanetBackdoor) + # and isinstance(val_config, BackdoorDataset) + # and isinstance(val_config.backdoor, WanetBackdoor) + # ): + # str_factor = ( + # val_config.backdoor.warping_strength + # / train_data.backdoor.warping_strength + # ) + # val_config.backdoor.control_grid = ( + # str_factor * train_data.backdoor.control_grid + # ) - # The WaNet backdoor (and maybe others in the future) has randomly generated state - # that needs to be stored if we want to load it later. - if isinstance(cfg.train_data, BackdoorDataset): - cfg.train_data.backdoor.store(cfg.path) + # # The WaNet backdoor (and maybe others in the future) has randomly generated state + # # that needs to be stored if we want to load it later. + # if isinstance(train_data, BackdoorDataset): + # train_data.backdoor.store(path) classifier = Classifier( - model=cfg.model, - num_classes=cfg.num_classes, - optim_cfg=cfg.train_config.optimizer, + model=model, + num_classes=num_classes, + lr=lr, val_loader_names=list(val_loaders.keys()), ) + callbacks = trainer_kwargs.pop("callbacks", []) + # TODO: once we do longer training runs we'll want to have multiple # checkpoints, potentially based on validation loss - callbacks = cfg.train_config.callbacks - if cfg.path: + if ( + path + # If the user already provided a custom checkpoint config, we'll use that: + and not any(isinstance(c, ModelCheckpoint) for c in callbacks) + # If the user explicitly disabled checkpointing, we don't want to override that: + and trainer_kwargs.get("enable_checkpointing", True) + ): callbacks.append( ModelCheckpoint( - dirpath=cfg.path / "checkpoints", + dirpath=path / "checkpoints", save_last=True, ) ) - trainer = cfg.train_config.get_trainer(callbacks=callbacks, path=cfg.path) + # Define metrics logger + # TODO: make adjustable and set config correctly + if wandb: + metrics_logger = loggers.WandbLogger(project="cupbearer") + metrics_logger.experiment.config.update(trainer_kwargs) + metrics_logger.experiment.config.update( + { + "model": repr(model), + "train_data": repr(train_loader.dataset), + "batch_size": train_loader.batch_size, + } + ) + if path: + metrics_logger = loggers.TensorBoardLogger( + save_dir=path, + name="", + version="", + sub_dir="tensorboard", + ) + else: + metrics_logger = None + + trainer = L.Trainer( + default_root_dir=path, + **trainer_kwargs, + ) + with warnings.catch_warnings(): if not val_loaders: warnings.filterwarnings( @@ -59,7 +128,7 @@ def main(cfg: Config) -> dict[str, Any] | L.Trainer: val_dataloaders=list(val_loaders.values()) or None, ) - if cfg.return_trainer: + if return_trainer: return trainer else: return trainer.logged_metrics diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index fbc4151c..d7dfb00d 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -1,25 +1,36 @@ +from cupbearer.detectors import AnomalyDetector +from cupbearer.tasks import Task from cupbearer.utils.scripts import script +from cupbearer.utils.train import TrainConfig +from cupbearer.utils.utils import BaseConfig from . import EvalDetectorConfig, eval_detector -from .conf.train_detector_conf import Config @script -def main(cfg: Config): - cfg.detector.set_model(cfg.task.model) +def main( + task: Task, + detector: AnomalyDetector, + num_classes: int, + train: BaseConfig | None = None, + seed: int = 0, +): + if train is None: + train = TrainConfig() + detector.set_model(task.model) - cfg.detector.train( - trusted_data=cfg.task.trusted_data, - untrusted_data=cfg.task.untrusted_train_data, - num_classes=cfg.num_classes, - train_config=cfg.train, + detector.train( + trusted_data=task.trusted_data, + untrusted_data=task.untrusted_train_data, + num_classes=num_classes, + train_config=train, ) - path = cfg.detector.save_path + path = detector.save_path if path: - cfg.detector.save_weights(path / "detector") + detector.save_weights(path / "detector") eval_cfg = EvalDetectorConfig( - detector=cfg.detector, - task=cfg.task, - seed=cfg.seed, + detector=detector, + task=task, + seed=seed, ) eval_detector(eval_cfg) diff --git a/src/cupbearer/utils/scripts.py b/src/cupbearer/utils/scripts.py index 488254be..246d46e8 100644 --- a/src/cupbearer/utils/scripts.py +++ b/src/cupbearer/utils/scripts.py @@ -1,33 +1,19 @@ -import functools -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Callable, Optional, TypeVar - -from cupbearer.utils.utils import BaseConfig - - -@dataclass(kw_only=True) -class ScriptConfig(BaseConfig): - seed: int = 0 - path: Optional[Path] = None - save_config: bool = True - - -ConfigType = TypeVar("ConfigType", bound=ScriptConfig) +from typing import Callable def script( - script_fn: Callable[[ConfigType], Any], -) -> Callable[[ConfigType], Any]: - @functools.wraps(script_fn) - def run_script(cfg: ConfigType): - save_cfg(cfg, save_config=cfg.save_config) - return script_fn(cfg) + script_fn: Callable, +) -> Callable: + # @functools.wraps(script_fn) + # def run_script(cfg: ConfigType): + # save_cfg(cfg, save_config=cfg.save_config) + # return script_fn(cfg) - return run_script + # return run_script + return script_fn -def save_cfg(cfg: ScriptConfig, save_config: bool = True): +def save_cfg(cfg, save_config: bool = True): # if cfg.path: # cfg.path.mkdir(parents=True, exist_ok=True) # if save_config: @@ -41,6 +27,3 @@ def save_cfg(cfg: ScriptConfig, save_config: bool = True): # sort_keys=False, # ) pass - - -T = TypeVar("T") From bdd56fbcc3ec315eb045818aa45ac00b8f75eeb1 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 12:50:42 -0800 Subject: [PATCH 09/25] Remove backdoor loading/storing logic I think we should let the user handle this and just have big warning flags around WaNet---making sure we always do this correctly automatically seems nearly impossible so better to be explicit about that --- src/cupbearer/data/backdoors.py | 53 ++++++++++------------- src/cupbearer/data/transforms.py | 8 ---- src/cupbearer/scripts/eval_classifier.py | 6 --- src/cupbearer/scripts/train_classifier.py | 24 ---------- 4 files changed, 23 insertions(+), 68 deletions(-) diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index 04bb161a..6751235b 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -1,7 +1,8 @@ import os from abc import ABC from dataclasses import dataclass -from typing import Optional, Tuple +from pathlib import Path +from typing import Tuple import torch import torch.nn.functional as F @@ -91,28 +92,23 @@ def inject_backdoor(self, img: torch.Tensor): return img -@dataclass +@dataclass(kw_only=True) class WanetBackdoor(Backdoor): """Implements trigger transform from "Wanet - Imperceptible Warping-based Backdoor Attack" by Anh Tuan Nguyen and Anh Tuan Tran, ICLR, 2021.""" + # Path to load control grid from, or None to generate a new one. + # Deliberartely non-optional to avoid accidentally generating a new grid! + path: Path | str | None p_noise: float = 0.0 # Probability of non-backdoor warping control_grid_width: int = 4 # Side length of unscaled warping field warping_strength: float = 0.5 # Strength of warping effect grid_rescale: float = 1.0 # Factor to rescale grid from warping effect - _control_grid: Optional[ - tuple[ - list[list[float]], - list[list[float]], - ] - ] = None # Used for reproducibility, typically not set manually def __post_init__(self): super().__post_init__() self._warping_field = None - - # Init control_grid so that it is saved in config - self.control_grid + self._control_grid = None assert 0 <= self.p_noise <= 1, "Probability must be between 0 and 1" assert ( @@ -121,7 +117,10 @@ def __post_init__(self): @property def control_grid(self) -> torch.Tensor: - if self._control_grid is None: + if self._control_grid is not None: + return self._control_grid + + if self.path: logger.debug("Generating new control grid for warping field.") control_grid_shape = (2, self.control_grid_width, self.control_grid_width) control_grid = 2 * torch.rand(*control_grid_shape) - 1 @@ -129,7 +128,14 @@ def control_grid(self) -> torch.Tensor: control_grid = control_grid * self.warping_strength self.control_grid = control_grid else: - control_grid = torch.tensor(self._control_grid) + logger.debug( + f"Loading control grid from {self._get_savefile_fullpath(self.path)}" + ) + control_grid = torch.load(self._get_savefile_fullpath(self.path)) + if control_grid.shape[-1] != self.control_grid_width: + logger.warning("Control grid width updated from load.") + self.control_grid_width = control_grid.shape[-1] + self.control_grid = control_grid control_grid_shape = (2, self.control_grid_width, self.control_grid_width) assert control_grid.shape == control_grid_shape @@ -143,8 +149,7 @@ def control_grid(self, control_grid: torch.Tensor): if control_grid.shape != control_grid_shape: raise ValueError("Control grid shape is incompatible.") - # We keep self._control_grid serializable - self._control_grid = tuple(control_grid.tolist()) + self._control_grid = control_grid @property def warping_field(self) -> torch.Tensor: @@ -177,21 +182,9 @@ def init_warping_field(self, px: int, py: int): def _get_savefile_fullpath(basepath): return os.path.join(basepath, "wanet_backdoor.pt") - def store(self, basepath): - super().store(basepath) - logger.debug(f"Storing control grid to {self._get_savefile_fullpath(basepath)}") - torch.save(self.control_grid, self._get_savefile_fullpath(basepath)) - - def load(self, basepath): - super().load(basepath) - logger.debug( - f"Loading control grid from {self._get_savefile_fullpath(basepath)}" - ) - control_grid = torch.load(self._get_savefile_fullpath(basepath)) - if control_grid.shape[-1] != self.control_grid_width: - logger.warning("Control grid width updated from load.") - self.control_grid_width = control_grid.shape[-1] - self.control_grid = control_grid + def store(self, path: Path | str): + logger.debug(f"Storing control grid to {self._get_savefile_fullpath(path)}") + torch.save(self.control_grid, self._get_savefile_fullpath(path)) def _warp(self, img: torch.Tensor, warping_field: torch.Tensor) -> torch.Tensor: if img.ndim == 3: diff --git a/src/cupbearer/data/transforms.py b/src/cupbearer/data/transforms.py index 6b15cde3..ebaae3cb 100644 --- a/src/cupbearer/data/transforms.py +++ b/src/cupbearer/data/transforms.py @@ -11,14 +11,6 @@ class Transform(ABC): def __call__(self, sample): pass - def store(self, basepath): - """Save transform state to reproduce instance later.""" - pass - - def load(self, basepath): - """Load transform state to reproduce stored instance.""" - pass - class AdaptedTransform(Transform, ABC): """Adapt a transform designed to work on inputs to work on img, label pairs.""" diff --git a/src/cupbearer/scripts/eval_classifier.py b/src/cupbearer/scripts/eval_classifier.py index d30d1316..ac0a42c3 100644 --- a/src/cupbearer/scripts/eval_classifier.py +++ b/src/cupbearer/scripts/eval_classifier.py @@ -3,10 +3,8 @@ from typing import Optional import lightning as L -from loguru import logger from torch.utils.data import DataLoader, Dataset -from cupbearer.data import BackdoorDataset from cupbearer.models import HookedModel from cupbearer.scripts._shared import Classifier from cupbearer.utils.scripts import script @@ -22,10 +20,6 @@ def main( ): path = Path(path) - if isinstance(data, BackdoorDataset): - logger.debug(f"Loading transform: {data.backdoor}") - data.backdoor.load(path) - dataloader = DataLoader( data, batch_size=max_batch_size, diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index 05b8f3f2..c55a4fc4 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -37,30 +37,6 @@ def main( elif isinstance(val_loaders, DataLoader): val_loaders = {"val": val_loaders} - # arguments, this is where validation sets are set to follow train_data - # TODO: we could get weird bugs here if e.g. train_data is a Subset of some - # BackdoorDataset. - # if isinstance(train_data, BackdoorDataset): - # for name, val_config in val_data.items(): - # # WanetBackdoor - # if ( - # isinstance(train_data.backdoor, WanetBackdoor) - # and isinstance(val_config, BackdoorDataset) - # and isinstance(val_config.backdoor, WanetBackdoor) - # ): - # str_factor = ( - # val_config.backdoor.warping_strength - # / train_data.backdoor.warping_strength - # ) - # val_config.backdoor.control_grid = ( - # str_factor * train_data.backdoor.control_grid - # ) - - # # The WaNet backdoor (and maybe others in the future) has randomly generated state - # # that needs to be stored if we want to load it later. - # if isinstance(train_data, BackdoorDataset): - # train_data.backdoor.store(path) - classifier = Classifier( model=model, num_classes=num_classes, From 62e618a8294bbfd3f31953e723a5d3c497f1d97e Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 14:55:12 -0800 Subject: [PATCH 10/25] Remove TrainConfig Abstractions and tests are still very broken --- src/cupbearer/detectors/__init__.py | 2 - .../detectors/abstraction/__init__.py | 8 +- .../abstraction/abstraction_detector.py | 20 ++-- src/cupbearer/detectors/anomaly_detector.py | 7 +- src/cupbearer/detectors/finetuning.py | 13 ++- .../detectors/statistical/__init__.py | 6 -- .../statistical/mahalanobis_detector.py | 13 ++- .../detectors/statistical/que_detector.py | 9 +- .../statistical/spectral_detector.py | 3 +- .../detectors/statistical/statistical.py | 93 +++------------- src/cupbearer/scripts/train_classifier.py | 5 +- src/cupbearer/scripts/train_detector.py | 20 +--- src/cupbearer/utils/__init__.py | 2 - src/cupbearer/utils/optimizers.py | 19 ---- src/cupbearer/utils/train.py | 100 ------------------ src/cupbearer/utils/utils.py | 23 ---- 16 files changed, 51 insertions(+), 292 deletions(-) delete mode 100644 src/cupbearer/utils/optimizers.py delete mode 100644 src/cupbearer/utils/train.py diff --git a/src/cupbearer/detectors/__init__.py b/src/cupbearer/detectors/__init__.py index 2da3d794..775b2dbe 100644 --- a/src/cupbearer/detectors/__init__.py +++ b/src/cupbearer/detectors/__init__.py @@ -3,9 +3,7 @@ from .anomaly_detector import AnomalyDetector from .finetuning import FinetuningAnomalyDetector from .statistical import ( - ActivationCovarianceTrainConfig, MahalanobisDetector, - MahalanobisTrainConfig, QuantumEntropyDetector, SpectralSignatureDetector, ) diff --git a/src/cupbearer/detectors/abstraction/__init__.py b/src/cupbearer/detectors/abstraction/__init__.py index ba48e172..33519c79 100644 --- a/src/cupbearer/detectors/abstraction/__init__.py +++ b/src/cupbearer/detectors/abstraction/__init__.py @@ -2,8 +2,6 @@ from dataclasses import dataclass, field from cupbearer.models import HookedModel -from cupbearer.utils.train import TrainConfig -from cupbearer.utils.utils import BaseConfig from .abstraction import ( Abstraction, @@ -19,7 +17,7 @@ # let users specify a path to a python function that gets called # to construct the abstraction. (With get_default_abstraction being the default.) @dataclass -class AbstractionConfig(BaseConfig, ABC): +class AbstractionConfig(ABC): size_reduction: int = 4 @abstractmethod @@ -48,13 +46,13 @@ class AbstractionDetectorConfig: abstraction: AbstractionConfig = field( default_factory=LocallyConsistentAbstractionConfig ) - train: TrainConfig = field(default_factory=TrainConfig) + max_batch_size: int = 4096 def build(self, model, save_dir) -> AbstractionDetector: abstraction = self.abstraction.build(model) return AbstractionDetector( model=model, abstraction=abstraction, - max_batch_size=self.train.max_batch_size, + max_batch_size=self.max_batch_size, save_path=save_dir, ) diff --git a/src/cupbearer/detectors/abstraction/abstraction_detector.py b/src/cupbearer/detectors/abstraction/abstraction_detector.py index df49ddea..cec80e4f 100644 --- a/src/cupbearer/detectors/abstraction/abstraction_detector.py +++ b/src/cupbearer/detectors/abstraction/abstraction_detector.py @@ -15,8 +15,6 @@ ActivationBasedDetector, ) from cupbearer.models import HookedModel -from cupbearer.utils.optimizers import OptimizerConfig -from cupbearer.utils.train import TrainConfig def per_layer(func: Callable): @@ -94,14 +92,14 @@ def __init__( self, get_activations: Callable[[torch.Tensor], tuple[Any, dict[str, torch.Tensor]]], abstraction: Abstraction, - optim_cfg: OptimizerConfig, + lr: float = 1e-3, ): super().__init__() self.save_hyperparameters(ignore=["get_activations", "abstraction"]) self.get_activations = get_activations self.abstraction = abstraction - self.optim_cfg = optim_cfg + self.lr = lr def _shared_step(self, batch): _, activations = self.get_activations(batch) @@ -118,7 +116,7 @@ def training_step(self, batch, batch_idx): def configure_optimizers(self): # Note we only optimize over the abstraction parameters, the model is frozen - return self.optim_cfg.get_optimizer(self.abstraction.parameters()) + return torch.optim.Adam(self.abstraction.parameters(), lr=self.lr) class AbstractionDetector(ActivationBasedDetector): @@ -150,7 +148,9 @@ def train( untrusted_data, *, num_classes: int, - train_config: TrainConfig, + lr: float = 1e-3, + batch_size: int = 64, + **trainer_kwargs, ): if trusted_data is None: raise ValueError("Abstraction detector requires trusted training data.") @@ -160,10 +160,12 @@ def train( module = AbstractionModule( self.get_activations, self.abstraction, - optim_cfg=train_config.optimizer, + lr=lr, ) - train_loader = train_config.get_dataloader(trusted_data) + train_loader = torch.utils.data.DataLoader( + trusted_data, batch_size=batch_size, shuffle=True + ) # TODO: implement validation data # val_loaders = { @@ -186,7 +188,7 @@ def train( # (which seems tricky to do manually). module.model = self.model - trainer = train_config.get_trainer(path=self.save_path) + trainer = L.Trainer(default_root_dir=self.save_path, **trainer_kwargs) trainer.fit( model=module, train_dataloaders=train_loader, diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index 75c5569e..c97db6f1 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -43,12 +43,7 @@ def set_model(self, model: HookedModel): @abstractmethod def train( - self, - trusted_data: Dataset | None, - untrusted_data: Dataset | None, - *, - num_classes: int, - train_config: utils.BaseConfig, + self, trusted_data: Dataset | None, untrusted_data: Dataset | None, **kwargs ): """Train the anomaly detector with the given datasets on the given model. diff --git a/src/cupbearer/detectors/finetuning.py b/src/cupbearer/detectors/finetuning.py index d0f2e014..50b224dd 100644 --- a/src/cupbearer/detectors/finetuning.py +++ b/src/cupbearer/detectors/finetuning.py @@ -1,13 +1,14 @@ import copy import warnings +import lightning as L import torch import torch.nn.functional as F +from torch.utils.data import DataLoader from cupbearer.detectors.anomaly_detector import AnomalyDetector from cupbearer.scripts._shared import Classifier from cupbearer.utils import utils -from cupbearer.utils.train import TrainConfig class FinetuningAnomalyDetector(AnomalyDetector): @@ -26,22 +27,24 @@ def train( untrusted_data, *, num_classes: int, - train_config: TrainConfig, + lr: float = 1e-3, + batch_size: int = 64, + **trainer_kwargs, ): if trusted_data is None: raise ValueError("Finetuning detector requires trusted training data.") classifier = Classifier( self.finetuned_model, num_classes=num_classes, - optim_cfg=train_config.optimizer, + lr=lr, save_hparams=False, ) # Create a DataLoader for the clean dataset - clean_loader = train_config.get_dataloader(trusted_data) + clean_loader = DataLoader(trusted_data, batch_size=batch_size, shuffle=True) # Finetune the model on the clean dataset - trainer = train_config.get_trainer(path=self.save_path) + trainer = L.Trainer(default_root_dir=self.save_path, **trainer_kwargs) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", diff --git a/src/cupbearer/detectors/statistical/__init__.py b/src/cupbearer/detectors/statistical/__init__.py index 76bc19a2..9b56b1c5 100644 --- a/src/cupbearer/detectors/statistical/__init__.py +++ b/src/cupbearer/detectors/statistical/__init__.py @@ -2,9 +2,3 @@ from .mahalanobis_detector import MahalanobisDetector from .que_detector import QuantumEntropyDetector from .spectral_detector import SpectralSignatureDetector -from .statistical import ( - ActivationCovarianceTrainConfig, - DebugActivationCovarianceTrainConfig, - DebugMahalanobisTrainConfig, - MahalanobisTrainConfig, -) diff --git a/src/cupbearer/detectors/statistical/mahalanobis_detector.py b/src/cupbearer/detectors/statistical/mahalanobis_detector.py index b91d4287..9dad4343 100644 --- a/src/cupbearer/detectors/statistical/mahalanobis_detector.py +++ b/src/cupbearer/detectors/statistical/mahalanobis_detector.py @@ -3,22 +3,21 @@ from cupbearer.detectors.statistical.helpers import mahalanobis from cupbearer.detectors.statistical.statistical import ( ActivationCovarianceBasedDetector, - MahalanobisTrainConfig, ) class MahalanobisDetector(ActivationCovarianceBasedDetector): - use_trusted: bool = True - - def post_covariance_training(self, train_config: MahalanobisTrainConfig): + def post_covariance_training( + self, rcond: float = 1e-5, relative: bool = False, **kwargs + ): self.inv_covariances = { - k: torch.linalg.pinv(C, rcond=train_config.rcond, hermitian=True) + k: torch.linalg.pinv(C, rcond=rcond, hermitian=True) for k, C in self.covariances.items() } self.inv_diag_covariances = None - if train_config.relative: + if relative: self.inv_diag_covariances = { - k: torch.where(torch.diag(C) > train_config.rcond, 1 / torch.diag(C), 0) + k: torch.where(torch.diag(C) > rcond, 1 / torch.diag(C), 0) for k, C in self.covariances.items() } diff --git a/src/cupbearer/detectors/statistical/que_detector.py b/src/cupbearer/detectors/statistical/que_detector.py index 7bc8dd15..1207161e 100644 --- a/src/cupbearer/detectors/statistical/que_detector.py +++ b/src/cupbearer/detectors/statistical/que_detector.py @@ -3,14 +3,11 @@ from cupbearer.detectors.statistical.helpers import quantum_entropy from cupbearer.detectors.statistical.statistical import ( ActivationCovarianceBasedDetector, - ActivationCovarianceTrainConfig, ) class QuantumEntropyDetector(ActivationCovarianceBasedDetector): - use_trusted: bool = True - - def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig): + def post_covariance_training(self, rcond: float = 1e-5, **kwargs): whitening_matrices = {} for k, cov in self.covariances.items(): # Compute decomposition @@ -18,9 +15,7 @@ def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig # Zero entries corresponding to eigenvalues smaller than rcond vals_rsqrt = eigs.eigenvalues.rsqrt() - vals_rsqrt[ - eigs.eigenvalues < train_config.rcond * eigs.eigenvalues.max() - ] = 0 + vals_rsqrt[eigs.eigenvalues < rcond * eigs.eigenvalues.max()] = 0 # PCA whitening # following https://doi.org/10.1080/00031305.2016.1277159 diff --git a/src/cupbearer/detectors/statistical/spectral_detector.py b/src/cupbearer/detectors/statistical/spectral_detector.py index 7774721d..5a79f630 100644 --- a/src/cupbearer/detectors/statistical/spectral_detector.py +++ b/src/cupbearer/detectors/statistical/spectral_detector.py @@ -2,7 +2,6 @@ from cupbearer.detectors.statistical.statistical import ( ActivationCovarianceBasedDetector, - ActivationCovarianceTrainConfig, ) @@ -15,7 +14,7 @@ class SpectralSignatureDetector(ActivationCovarianceBasedDetector): use_trusted: bool = False - def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig): + def post_covariance_training(self, **kwargs): # Calculate top right singular vectors from covariance matrices self.top_singular_vectors = { k: torch.linalg.eigh(cov).eigenvectors[:, -1] diff --git a/src/cupbearer/detectors/statistical/statistical.py b/src/cupbearer/detectors/statistical/statistical.py index 2e686777..031bbdb6 100644 --- a/src/cupbearer/detectors/statistical/statistical.py +++ b/src/cupbearer/detectors/statistical/statistical.py @@ -1,73 +1,15 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass import torch -from torch.utils.data import DataLoader, Dataset +from torch.utils.data import DataLoader from tqdm import tqdm from cupbearer.detectors.anomaly_detector import ActivationBasedDetector from cupbearer.detectors.statistical.helpers import update_covariance -from cupbearer.utils.utils import BaseConfig - - -@dataclass -class StatisticalTrainConfig(BaseConfig, ABC): - max_batches: int = 0 - batch_size: int = 4096 - max_batch_size: int = 4096 - pbar: bool = True - num_workers: int = 0 - # robust: bool = False # TODO spectre uses - # https://www.semanticscholar.org/paper/Being-Robust-(in-High-Dimensions)-Can-Be-Practical-Diakonikolas-Kamath/2a6de51d86f13e9eb7efa85491682dad0ccd65e8?utm_source=direct_link - - def get_dataloader(self, dataset: Dataset, train=True): - if train: - return DataLoader( - dataset, - batch_size=self.batch_size, - shuffle=True, - num_workers=self.num_workers, - persistent_workers=self.num_workers > 0, - ) - else: - return DataLoader( - dataset, - batch_size=self.batch_size, - shuffle=False, - ) - - -@dataclass -class DebugStatisticalTrainConfig(StatisticalTrainConfig): - max_batches: int = 3 - batch_size: int = 5 - max_batch_size: int = 5 - - -@dataclass -class ActivationCovarianceTrainConfig(StatisticalTrainConfig): - rcond: float = 1e-5 - - -@dataclass -class DebugActivationCovarianceTrainConfig( - DebugStatisticalTrainConfig, ActivationCovarianceTrainConfig -): - pass - - -@dataclass -class MahalanobisTrainConfig(ActivationCovarianceTrainConfig): - relative: bool = False - - -@dataclass -class DebugMahalanobisTrainConfig(DebugStatisticalTrainConfig, MahalanobisTrainConfig): - pass class StatisticalDetector(ActivationBasedDetector, ABC): - use_trusted: bool + use_trusted: bool = True @abstractmethod def init_variables(self, activation_sizes: dict[str, torch.Size]): @@ -82,8 +24,10 @@ def train( trusted_data, untrusted_data, *, - num_classes: int, - train_config: StatisticalTrainConfig, + batch_size: int = 1024, + pbar: bool = True, + max_batches: int | None = None, + **kwargs, ): # Common for statistical methods is that the training does not require # gradients, but instead computes summary statistics or similar @@ -101,7 +45,8 @@ def train( ) data = untrusted_data - data_loader = train_config.get_dataloader(data) + # No reason to shuffle, we're just computing statistics + data_loader = DataLoader(data, batch_size=batch_size, shuffle=False) example_batch = next(iter(data_loader)) _, example_activations = self.get_activations(example_batch) @@ -109,11 +54,11 @@ def train( activation_sizes = {k: v[0].size() for k, v in example_activations.items()} self.init_variables(activation_sizes) - if train_config.pbar: + if pbar: data_loader = tqdm(data_loader) for i, batch in enumerate(data_loader): - if train_config.max_batches and i >= train_config.max_batches: + if max_batches and i >= max_batches: break _, activations = self.get_activations(batch) self.batch_update(activations) @@ -142,22 +87,12 @@ def batch_update(self, activations: dict[str, torch.Tensor]): ) @abstractmethod - def post_covariance_training(self, train_config: ActivationCovarianceTrainConfig): + def post_covariance_training(self, **kwargs): pass - def train( - self, - trusted_data, - untrusted_data, - *, - num_classes: int, - train_config: ActivationCovarianceTrainConfig, - ): + def train(self, trusted_data, untrusted_data, **kwargs): super().train( - trusted_data=trusted_data, - untrusted_data=untrusted_data, - num_classes=num_classes, - train_config=train_config, + trusted_data=trusted_data, untrusted_data=untrusted_data, **kwargs ) # Post process @@ -167,4 +102,4 @@ def train( if any(torch.count_nonzero(C) == 0 for C in self.covariances.values()): raise RuntimeError("All zero covariance matrix detected.") - self.post_covariance_training(train_config=train_config) + self.post_covariance_training(**kwargs) diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index c55a4fc4..39303c35 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -84,10 +84,7 @@ def main( else: metrics_logger = None - trainer = L.Trainer( - default_root_dir=path, - **trainer_kwargs, - ) + trainer = L.Trainer(default_root_dir=path, **trainer_kwargs) with warnings.catch_warnings(): if not val_loaders: diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index d7dfb00d..f9392d74 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -1,36 +1,24 @@ from cupbearer.detectors import AnomalyDetector from cupbearer.tasks import Task from cupbearer.utils.scripts import script -from cupbearer.utils.train import TrainConfig -from cupbearer.utils.utils import BaseConfig -from . import EvalDetectorConfig, eval_detector +from . import eval_detector @script def main( task: Task, detector: AnomalyDetector, - num_classes: int, - train: BaseConfig | None = None, - seed: int = 0, + **train_kwargs, ): - if train is None: - train = TrainConfig() detector.set_model(task.model) detector.train( trusted_data=task.trusted_data, untrusted_data=task.untrusted_train_data, - num_classes=num_classes, - train_config=train, + **train_kwargs, ) path = detector.save_path if path: detector.save_weights(path / "detector") - eval_cfg = EvalDetectorConfig( - detector=detector, - task=task, - seed=seed, - ) - eval_detector(eval_cfg) + eval_detector(detector=detector, task=task, pbar=True) diff --git a/src/cupbearer/utils/__init__.py b/src/cupbearer/utils/__init__.py index 5ca825cb..1d326396 100644 --- a/src/cupbearer/utils/__init__.py +++ b/src/cupbearer/utils/__init__.py @@ -1,4 +1,2 @@ # ruff: noqa: F401 -from .optimizers import OptimizerConfig -from .train import DebugTrainConfig, TrainConfig from .utils import inputs_from_batch, load, save diff --git a/src/cupbearer/utils/optimizers.py b/src/cupbearer/utils/optimizers.py deleted file mode 100644 index 3adf4fbf..00000000 --- a/src/cupbearer/utils/optimizers.py +++ /dev/null @@ -1,19 +0,0 @@ -from dataclasses import dataclass - -import torch - -from cupbearer.utils.utils import BaseConfig - - -@dataclass -class OptimizerConfig(BaseConfig): - name: str = "adam" - lr: float = 1e-3 - - def get_optimizer(self, params) -> torch.optim.Optimizer: - if self.name == "adam": - return torch.optim.Adam(params, lr=self.lr) - elif self.name == "sgd": - return torch.optim.SGD(params, lr=self.lr) - else: - raise ValueError(f"Unknown optimizer {self.name}") diff --git a/src/cupbearer/utils/train.py b/src/cupbearer/utils/train.py deleted file mode 100644 index a87b9b6d..00000000 --- a/src/cupbearer/utils/train.py +++ /dev/null @@ -1,100 +0,0 @@ -from dataclasses import asdict, dataclass, field -from pathlib import Path -from typing import Optional - -import lightning as L -from lightning.pytorch import callbacks, loggers -from torch.utils.data import DataLoader, Dataset - -from cupbearer.utils.optimizers import OptimizerConfig -from cupbearer.utils.utils import BaseConfig - - -@dataclass(kw_only=True) -class TrainConfig(BaseConfig): - num_epochs: int = 10 - batch_size: int = 128 - max_batch_size: int = 2048 - optimizer: OptimizerConfig = field(default_factory=OptimizerConfig) - num_workers: int = 0 - pin_memory: bool = True - max_steps: int = -1 - check_val_every_n_epoch: int = 1 - pbar: bool = False - log_every_n_steps: Optional[int] = None - wandb: bool = False - devices: int | list[int] | str = "auto" - accelerator: str = "auto" - precision: int | str = 32 - monitor_device_stats: bool = False - profiler: Optional[str] = None - - @property - def callbacks(self): - callback_list = [] - if self.monitor_device_stats: - callback_list.append(callbacks.DeviceStatsMonitor(cpu_stats=True)) - - return callback_list - - def get_dataloader(self, dataset: Dataset, train=True): - if train: - return DataLoader( - dataset, - batch_size=self.batch_size, - shuffle=True, - num_workers=self.num_workers, - persistent_workers=self.num_workers > 0, - pin_memory=self.pin_memory, - ) - else: - return DataLoader( - dataset, - batch_size=self.max_batch_size, - shuffle=False, - ) - - # We deliberately don't make the `path` argument optional, since that makes it - # easy to forget passing it on (and this will likely only be used in internal - # code anyway). - def get_trainer(self, path: Path | None, **kwargs): - # Define metrics logger - if self.wandb: - metrics_logger = loggers.WandbLogger(project="abstractions") - metrics_logger.experiment.config.update(asdict(self)) - if path: - metrics_logger = loggers.TensorBoardLogger( - save_dir=path, - name="", - version="", - sub_dir="tensorboard", - ) - else: - metrics_logger = None - - trainer_kwargs = dict( - max_epochs=self.num_epochs, - max_steps=self.max_steps, - callbacks=self.callbacks, - logger=metrics_logger, - default_root_dir=path, - check_val_every_n_epoch=self.check_val_every_n_epoch, - enable_progress_bar=self.pbar, - log_every_n_steps=self.log_every_n_steps, - devices=self.devices, - accelerator=self.accelerator, - precision=self.precision, - profiler=self.profiler, - ) - trainer_kwargs.update(kwargs) # override defaults if given - return L.Trainer(**trainer_kwargs) - - -@dataclass(kw_only=True) -class DebugTrainConfig(TrainConfig): - num_epochs: int = 1 - max_steps: int = 1 - max_batch_size: int = 2 - wandb: bool = False - batch_size: int = 2 - log_every_n_steps: int = 1 diff --git a/src/cupbearer/utils/utils.py b/src/cupbearer/utils/utils.py index aa6013f3..7d519c14 100644 --- a/src/cupbearer/utils/utils.py +++ b/src/cupbearer/utils/utils.py @@ -4,12 +4,10 @@ import functools import importlib import pickle -from dataclasses import dataclass from pathlib import Path from typing import Iterable, TypeVar, Union import torch -from simple_parsing.helpers import serialization SUFFIX = ".pt" TYPE_PREFIX = "__TYPE__:" @@ -139,27 +137,6 @@ def dict_field(): return dataclasses.field(default_factory=dict) -@dataclass(kw_only=True) -class BaseConfig(serialization.serializable.Serializable): - def __post_init__(self): - pass - - def to_dict( - self, - dict_factory: type[dict] = dict, - recurse: bool = True, - save_dc_types: bool = True, - ) -> dict: - # This is the only change we make: default is for save_dc_types to be False. - # Instead, we always pass `True`. (We don't want the default elsewhere - # to get passed here and override this.) - # We could pass save_dc_types to `save`, but that doesn't propagate into - # lists of dataclasses. - return serialization.serializable.to_dict( - self, dict_factory, recurse, save_dc_types=True - ) - - def get_object(path: str): """Get an object from a string. From 94c54ed78e0134ec3037b77ec2d8a9afb4587ea3 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 15:10:28 -0800 Subject: [PATCH 11/25] Adjust abstractions --- .../detectors/abstraction/__init__.py | 52 +------------------ .../detectors/abstraction/abstraction.py | 2 + .../abstraction/abstraction_detector.py | 26 +--------- 3 files changed, 5 insertions(+), 75 deletions(-) diff --git a/src/cupbearer/detectors/abstraction/__init__.py b/src/cupbearer/detectors/abstraction/__init__.py index 33519c79..563f8d5f 100644 --- a/src/cupbearer/detectors/abstraction/__init__.py +++ b/src/cupbearer/detectors/abstraction/__init__.py @@ -1,7 +1,4 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass, field - -from cupbearer.models import HookedModel +# ruff: noqa: F401 from .abstraction import ( Abstraction, @@ -9,50 +6,3 @@ LocallyConsistentAbstraction, ) from .abstraction_detector import AbstractionDetector - - -# This is all unnessarily verbose right now, it's a remnant from when we had -# robust optimization for abstractions and I experimented with some variations. -# Leaving it like this for now, but ultimately, the way to go is probably to just -# let users specify a path to a python function that gets called -# to construct the abstraction. (With get_default_abstraction being the default.) -@dataclass -class AbstractionConfig(ABC): - size_reduction: int = 4 - - @abstractmethod - def build(self, model: HookedModel) -> Abstraction: - pass - - -class LocallyConsistentAbstractionConfig(AbstractionConfig): - def build(self, model: HookedModel) -> LocallyConsistentAbstraction: - return LocallyConsistentAbstraction.get_default( - model, - self.size_reduction, - ) - - -class AutoencoderAbstractionConfig(AbstractionConfig): - def build(self, model: HookedModel) -> AutoencoderAbstraction: - return AutoencoderAbstraction.get_default( - model, - self.size_reduction, - ) - - -@dataclass -class AbstractionDetectorConfig: - abstraction: AbstractionConfig = field( - default_factory=LocallyConsistentAbstractionConfig - ) - max_batch_size: int = 4096 - - def build(self, model, save_dir) -> AbstractionDetector: - abstraction = self.abstraction.build(model) - return AbstractionDetector( - model=model, - abstraction=abstraction, - max_batch_size=self.max_batch_size, - save_path=save_dir, - ) diff --git a/src/cupbearer/detectors/abstraction/abstraction.py b/src/cupbearer/detectors/abstraction/abstraction.py index 53587d77..766b33b0 100644 --- a/src/cupbearer/detectors/abstraction/abstraction.py +++ b/src/cupbearer/detectors/abstraction/abstraction.py @@ -54,6 +54,8 @@ def visit(node): class Abstraction(nn.Module): + # TODO: I think we should likely get rid of get_default and instead just have some + # informal collection of helper functions for building reasonable abstractions. @classmethod @abstractmethod def get_default(cls, model: HookedModel, size_reduction: int) -> Abstraction: diff --git a/src/cupbearer/detectors/abstraction/abstraction_detector.py b/src/cupbearer/detectors/abstraction/abstraction_detector.py index cec80e4f..83098eb8 100644 --- a/src/cupbearer/detectors/abstraction/abstraction_detector.py +++ b/src/cupbearer/detectors/abstraction/abstraction_detector.py @@ -14,7 +14,6 @@ from cupbearer.detectors.anomaly_detector import ( ActivationBasedDetector, ) -from cupbearer.models import HookedModel def per_layer(func: Callable): @@ -60,12 +59,7 @@ def compute_cosine_losses(input: torch.Tensor, target: torch.Tensor) -> torch.Te @per_layer def compute_kl_losses(input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - return F.kl_div( - input, - target, - reduction="none", - log_target=True, - ).sum(dim=1) + return F.kl_div(input, target, reduction="none", log_target=True).sum(dim=1) def compute_losses( @@ -92,10 +86,9 @@ def __init__( self, get_activations: Callable[[torch.Tensor], tuple[Any, dict[str, torch.Tensor]]], abstraction: Abstraction, - lr: float = 1e-3, + lr: float, ): super().__init__() - self.save_hyperparameters(ignore=["get_activations", "abstraction"]) self.get_activations = get_activations self.abstraction = abstraction @@ -124,7 +117,6 @@ class AbstractionDetector(ActivationBasedDetector): def __init__( self, - model: HookedModel, abstraction: Abstraction, max_batch_size: int = 4096, save_path: str | Path | None = None, @@ -132,22 +124,16 @@ def __init__( self.abstraction = abstraction names = list(abstraction.tau_maps.keys()) super().__init__( - model, activation_name_func=lambda _: names, max_batch_size=max_batch_size, save_path=save_path, ) - @property - def should_train_on_clean_data(self) -> bool: - return True - def train( self, trusted_data, untrusted_data, *, - num_classes: int, lr: float = 1e-3, batch_size: int = 64, **trainer_kwargs, @@ -168,14 +154,6 @@ def train( ) # TODO: implement validation data - # val_loaders = { - # k: train_config.get_dataloader(v.build, train=False) - # for k, v in self.val_data.items() - # } - # checkpoint_callback = ModelCheckpoint( - # dirpath=self.save_path, - # filename="detector", - # ) self.model.eval() # We don't need gradients for base model parameters: From 4c7e0c2bfd0538149b095a72eda3382cb3077ea7 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 16:59:46 -0800 Subject: [PATCH 12/25] Remove loggers I think we haven't been using these for a while --- src/cupbearer/utils/logger.py | 66 ----------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 src/cupbearer/utils/logger.py diff --git a/src/cupbearer/utils/logger.py b/src/cupbearer/utils/logger.py deleted file mode 100644 index 4e33f271..00000000 --- a/src/cupbearer/utils/logger.py +++ /dev/null @@ -1,66 +0,0 @@ -from abc import ABC -from typing import Any, Dict, Mapping, Optional - - -class Logger(ABC): - """Base class for all metric loggers. - - Subclasses need to override either `_log_scalar` or `log_metrics`. - """ - - def _log_scalar(self, name: str, value: Any, step: int, **kwargs): - raise NotImplementedError - - def log_metrics(self, metrics: Mapping[str, Any], step: int): - for name, value in metrics.items(): - self._log_scalar(name, value, step) - - def close(self): - pass - - -class DummyLogger(Logger): - def _log_scalar(self, name: str, value: Any, step: int, **kwargs): - pass - - -class ClearMLLogger(Logger): - def __init__(self, project_name: str, task_name: str): - super().__init__() - # Import here instead of at the top so this isn't a hard dependency - from clearml import Task - - # Don't seed anything here, that should be handled elsewhere - Task.set_random_seed(None) - self.task = Task.init(project_name=project_name, task_name=task_name) - self.logger = self.task.get_logger() - - def _log_scalar(self, name: str, value: Any, step: int, **kwargs): - # ClearML takes a name for a plot and then separately a name - # for the series in that plot. For now, we just make an extra - # plot for every series. - return self.logger.report_scalar(name, name, value, step) - - def close(self): - self.task.close() - - -class WandbLogger(Logger): - def __init__( - self, - project_name: str, - task_name: Optional[str] = None, - config: Optional[Dict[str, Any]] = None, - **kwargs, - ): - super().__init__() - import wandb - - wandb.init(project=project_name, name=task_name, config=config, **kwargs) - self.logger = wandb - - def log_metrics(self, metrics: Dict[str, Any], step: int): - return self.logger.log(metrics, step) - - def close(self): - self.logger.finish() From 6809a7e71d1f655a447864e7021d20957b706934 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 17:18:30 -0800 Subject: [PATCH 13/25] Fix bugs and tests Tests all pass now; I removed one or two that aren't applicable anymore (notably checking whether WaNet loads correctly out of the box) --- src/cupbearer/data/backdoors.py | 7 +- src/cupbearer/detectors/__init__.py | 2 +- .../detectors/abstraction/abstraction.py | 2 +- .../detectors/statistical/statistical.py | 4 +- src/cupbearer/scripts/eval_classifier.py | 6 +- src/cupbearer/scripts/train_classifier.py | 47 ++-- src/cupbearer/scripts/train_detector.py | 2 - src/cupbearer/tasks/adversarial_examples.py | 2 +- tests/test_data.py | 251 +++++------------- tests/test_detectors.py | 28 +- tests/test_pipeline.py | 236 ++++++++-------- 11 files changed, 220 insertions(+), 367 deletions(-) diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index 6751235b..c4f1298b 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -110,6 +110,11 @@ def __post_init__(self): self._warping_field = None self._control_grid = None + # Load or generate control grid; important to do this now before we might + # create multiple workers---we wouldn't want to generate different random + # control grids in each one. + self.control_grid + assert 0 <= self.p_noise <= 1, "Probability must be between 0 and 1" assert ( 0 <= self.p_noise + self.p_backdoor <= 1 @@ -120,7 +125,7 @@ def control_grid(self) -> torch.Tensor: if self._control_grid is not None: return self._control_grid - if self.path: + if self.path is None: logger.debug("Generating new control grid for warping field.") control_grid_shape = (2, self.control_grid_width, self.control_grid_width) control_grid = 2 * torch.rand(*control_grid_shape) - 1 diff --git a/src/cupbearer/detectors/__init__.py b/src/cupbearer/detectors/__init__.py index 775b2dbe..0cd36fec 100644 --- a/src/cupbearer/detectors/__init__.py +++ b/src/cupbearer/detectors/__init__.py @@ -1,5 +1,5 @@ # ruff: noqa: F401 -from .abstraction import AbstractionDetectorConfig +from .abstraction import AbstractionDetector from .anomaly_detector import AnomalyDetector from .finetuning import FinetuningAnomalyDetector from .statistical import ( diff --git a/src/cupbearer/detectors/abstraction/abstraction.py b/src/cupbearer/detectors/abstraction/abstraction.py index 766b33b0..bbeb421d 100644 --- a/src/cupbearer/detectors/abstraction/abstraction.py +++ b/src/cupbearer/detectors/abstraction/abstraction.py @@ -195,7 +195,7 @@ def get_mlp_abstraction( return cls(tau_maps, steps) -class AutoencoderAbstraction(nn.Module): +class AutoencoderAbstraction(Abstraction): def __init__( self, tau_maps: dict[str, nn.Module], # encoders diff --git a/src/cupbearer/detectors/statistical/statistical.py b/src/cupbearer/detectors/statistical/statistical.py index 031bbdb6..ee25eb88 100644 --- a/src/cupbearer/detectors/statistical/statistical.py +++ b/src/cupbearer/detectors/statistical/statistical.py @@ -26,7 +26,7 @@ def train( *, batch_size: int = 1024, pbar: bool = True, - max_batches: int | None = None, + max_steps: int | None = None, **kwargs, ): # Common for statistical methods is that the training does not require @@ -58,7 +58,7 @@ def train( data_loader = tqdm(data_loader) for i, batch in enumerate(data_loader): - if max_batches and i >= max_batches: + if max_steps and i >= max_steps: break _, activations = self.get_activations(batch) self.batch_update(activations) diff --git a/src/cupbearer/scripts/eval_classifier.py b/src/cupbearer/scripts/eval_classifier.py index ac0a42c3..dd08aacf 100644 --- a/src/cupbearer/scripts/eval_classifier.py +++ b/src/cupbearer/scripts/eval_classifier.py @@ -7,22 +7,20 @@ from cupbearer.models import HookedModel from cupbearer.scripts._shared import Classifier -from cupbearer.utils.scripts import script -@script def main( data: Dataset, model: HookedModel, path: Path | str, max_batches: Optional[int] = None, - max_batch_size: int = 2048, + batch_size: int = 2048, ): path = Path(path) dataloader = DataLoader( data, - batch_size=max_batch_size, + batch_size=batch_size, shuffle=False, ) diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index 39303c35..aaed8fc3 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -9,10 +9,8 @@ from cupbearer.models import HookedModel from cupbearer.scripts._shared import Classifier -from cupbearer.utils.scripts import script -@script def main( model: HookedModel, train_loader: DataLoader, @@ -49,9 +47,8 @@ def main( # TODO: once we do longer training runs we'll want to have multiple # checkpoints, potentially based on validation loss if ( - path # If the user already provided a custom checkpoint config, we'll use that: - and not any(isinstance(c, ModelCheckpoint) for c in callbacks) + not any(isinstance(c, ModelCheckpoint) for c in callbacks) # If the user explicitly disabled checkpointing, we don't want to override that: and trainer_kwargs.get("enable_checkpointing", True) ): @@ -62,27 +59,31 @@ def main( ) ) + trainer_kwargs["callbacks"] = callbacks + # Define metrics logger # TODO: make adjustable and set config correctly - if wandb: - metrics_logger = loggers.WandbLogger(project="cupbearer") - metrics_logger.experiment.config.update(trainer_kwargs) - metrics_logger.experiment.config.update( - { - "model": repr(model), - "train_data": repr(train_loader.dataset), - "batch_size": train_loader.batch_size, - } - ) - if path: - metrics_logger = loggers.TensorBoardLogger( - save_dir=path, - name="", - version="", - sub_dir="tensorboard", - ) - else: - metrics_logger = None + if "logger" not in trainer_kwargs: + if wandb: + metrics_logger = loggers.WandbLogger(project="cupbearer") + metrics_logger.experiment.config.update(trainer_kwargs) + metrics_logger.experiment.config.update( + { + "model": repr(model), + "train_data": repr(train_loader.dataset), + "batch_size": train_loader.batch_size, + } + ) + elif path: + metrics_logger = loggers.TensorBoardLogger( + save_dir=path, + name="", + version="", + sub_dir="tensorboard", + ) + else: + metrics_logger = None + trainer_kwargs["logger"] = metrics_logger trainer = L.Trainer(default_root_dir=path, **trainer_kwargs) diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index f9392d74..350d12ff 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -1,11 +1,9 @@ from cupbearer.detectors import AnomalyDetector from cupbearer.tasks import Task -from cupbearer.utils.scripts import script from . import eval_detector -@script def main( task: Task, detector: AnomalyDetector, diff --git a/src/cupbearer/tasks/adversarial_examples.py b/src/cupbearer/tasks/adversarial_examples.py index ee593558..496deab8 100644 --- a/src/cupbearer/tasks/adversarial_examples.py +++ b/src/cupbearer/tasks/adversarial_examples.py @@ -25,7 +25,7 @@ def adversarial_examples( anomaly_func=lambda dataset, train: make_adversarial_examples( model, dataset, - cache_path / f"advexes_{'train' if train else 'test'}", + cache_path / f"adversarial_examples_{'train' if train else 'test'}", **kwargs, ), trusted_fraction=trusted_fraction, diff --git a/tests/test_data.py b/tests/test_data.py index 66a31b5f..b323b89d 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,3 +1,4 @@ +import functools from dataclasses import dataclass import numpy as np @@ -22,17 +23,6 @@ def __getitem__(self, index): return self.value -@dataclass -class DummyConfig(data.DatasetConfig): - length: int - value: str - # Doesn't apply or matter - num_classes: int = 0 - - def _build(self) -> Dataset: - return DummyDataset(self.length, self.value) - - class DummyImageData(Dataset): def __init__(self, length: int, num_classes: int, shape: tuple[int, int]): self.length = length @@ -45,7 +35,7 @@ def __init__(self, length: int, num_classes: int, shape: tuple[int, int]): dtype=torch.float32, # Move channel dimension to front ).permute(2, 0, 1) - # Need any seed so that labels are (somewhat) consitent over instances + # Need any seed so that labels are (somewhat) consistent over instances self._rng = np.random.default_rng(seed=5965) def __len__(self): @@ -57,16 +47,6 @@ def __getitem__(self, index) -> tuple[torch.Tensor, int]: return self.img, self._rng.integers(self.num_classes) -@dataclass -class DummyImageConfig(data.DatasetConfig): - length: int - num_classes: int = 10 - shape: tuple[int, int] = (8, 12) - - def _build(self) -> Dataset: - return DummyImageData(self.length, self.num_classes, self.shape) - - ######################### # Tests for TestDataMix ######################### @@ -87,21 +67,6 @@ def mixed_dataset(clean_dataset, anomalous_dataset): return data.MixedData(clean_dataset, anomalous_dataset) -@pytest.fixture -def clean_config(): - return DummyConfig(9, "a") - - -@pytest.fixture -def anomalous_config(): - return DummyConfig(7, "b") - - -@pytest.fixture -def mixed_config(clean_config, anomalous_config): - return data.MixedDataConfig(clean_config, anomalous_config) - - def test_len(mixed_dataset): assert len(mixed_dataset) == 14 assert mixed_dataset.normal_len == mixed_dataset.anomalous_len == 7 @@ -127,96 +92,50 @@ def test_uneven_weight(clean_dataset, anomalous_dataset): assert mixed_data[i] == ("b", 1) -def test_simple_mixed_build(mixed_config): - mixed_data = mixed_config.build() - assert len(mixed_data) == 14 - assert mixed_data.normal_len == mixed_data.anomalous_len == 7 - for i in range(7): - assert mixed_data[i] == ("a", 0) - for i in range(7, 14): - assert mixed_data[i] == ("b", 1) - - -def test_mixed_max_size(clean_config, anomalous_config): - # Just some random big enough numbers: - clean_config.length = 105 - anomalous_config.length = 97 - # These max sizes shouldn't affect anything, but why not throw them into the mix. - clean_config.max_size = 51 - anomalous_config.max_size = 23 - # The actual mixed dataset we build now is the same as before: 10 datapoints, - # 3 normal and 7 anomalous. - mixed_config = data.MixedDataConfig(clean_config, anomalous_config) - mixed_config.max_size = 10 - mixed_config.normal_weight = 0.3 - mixed_data = mixed_config.build() - - assert len(mixed_data) == 10 - assert mixed_data.normal_len == 3 - assert mixed_data.anomalous_len == 7 - for i in range(3): - assert mixed_data[i] == ("a", 0) - for i in range(3, 10): - assert mixed_data[i] == ("b", 1) - - ####################### # Tests for Backdoors ####################### @pytest.fixture -def clean_image_config(): - return DummyImageConfig(9) +def clean_image_dataset(): + return DummyImageData(9, 10, (8, 12)) @pytest.fixture( params=[ data.backdoors.CornerPixelBackdoor, data.backdoors.NoiseBackdoor, - data.backdoors.WanetBackdoor, + functools.partial(data.backdoors.WanetBackdoor, path=None), ] ) -def BackdoorConfig(request): +def backdoor_type(request): return request.param -def test_backdoor_relabeling(clean_image_config, BackdoorConfig): - clean_image_config.num_classes = 2**63 - 1 +def test_backdoor_relabeling(clean_image_dataset, backdoor_type): target_class = 1 - data_config = data.BackdoorData( - original=clean_image_config, - backdoor=BackdoorConfig( - p_backdoor=1.0, - target_class=target_class, - ), + dataset = data.BackdoorDataset( + original=clean_image_dataset, + backdoor=backdoor_type(p_backdoor=1.0, target_class=target_class), ) - for img, label in data_config.build(): + for img, label in dataset: assert label == target_class -def test_backdoor_img_changes(clean_image_config, BackdoorConfig): - clean_config = data.BackdoorData( - original=clean_image_config, - backdoor=BackdoorConfig( - p_backdoor=0.0, - ), +def test_backdoor_img_changes(clean_image_dataset, backdoor_type): + clean_data = data.BackdoorDataset( + original=clean_image_dataset, backdoor=backdoor_type(p_backdoor=0.0) ) - anomalous_config = data.BackdoorData( - original=clean_image_config, - backdoor=BackdoorConfig( - p_backdoor=1.0, - ), + anomalous_data = data.BackdoorDataset( + original=clean_image_dataset, backdoor=backdoor_type(p_backdoor=1.0) ) - for clean_sample, (anomalous_img, _) in zip( - clean_config.build(), - anomalous_config.build(), - ): + for clean_sample, (anomalous_img, _) in zip(clean_data, anomalous_data): clean_img, _ = clean_sample # Check that something has changed - assert clean_img is not anomalous_config.backdoor(clean_sample)[0] - assert torch.any(clean_img != anomalous_config.backdoor(clean_sample)[0]) + assert clean_img is not anomalous_data.backdoor(clean_sample)[0] + assert torch.any(clean_img != anomalous_data.backdoor(clean_sample)[0]) assert torch.any(clean_img != anomalous_img) # Check that pixel values still in valid range @@ -231,26 +150,30 @@ def test_backdoor_img_changes(clean_image_config, BackdoorConfig): ) -def test_wanet_backdoor(clean_image_config): - clean_image_config.num_classes = 2**63 - 1 - target_class = 1 - clean_config = data.BackdoorData( - original=clean_image_config, +def test_wanet_backdoor(clean_image_dataset): + # Pick a target class outside the actual range so we can later tell whether it + # was set correctly. + target_class = 10_000 + clean_data = data.BackdoorDataset( + original=clean_image_dataset, backdoor=data.backdoors.WanetBackdoor( + path=None, p_backdoor=0.0, target_class=target_class, ), ) - anomalous_config = data.BackdoorData( - original=clean_image_config, + anomalous_data = data.BackdoorDataset( + original=clean_image_dataset, backdoor=data.backdoors.WanetBackdoor( + path=None, p_backdoor=1.0, target_class=target_class, ), ) - noise_config = data.BackdoorData( - original=clean_image_config, + noise_data = data.BackdoorDataset( + original=clean_image_dataset, backdoor=data.backdoors.WanetBackdoor( + path=None, p_backdoor=0.0, p_noise=1.0, target_class=target_class, @@ -260,12 +183,9 @@ def test_wanet_backdoor(clean_image_config): (clean_img, clean_label), (anoma_img, anoma_label), (noise_img, noise_label), - ) in zip( - clean_config.build(), - anomalous_config.build(), - noise_config.build(), - ): - # Check labels + ) in zip(clean_data, anomalous_data, noise_data): + # Check labels. Our target class is outside the valid range, + # so no chance it got randomly chosen. assert clean_label != target_class assert anoma_label == target_class assert noise_label != target_class @@ -285,27 +205,17 @@ def test_wanet_backdoor(clean_image_config): def test_wanet_backdoor_on_multiple_workers( - clean_image_config, + clean_image_dataset, ): - clean_image_config.num_classes = 1 - target_class = 1 - anomalous_config = data.BackdoorData( - original=clean_image_config, - backdoor=data.backdoors.WanetBackdoor( - p_backdoor=1.0, - p_noise=0.0, - target_class=target_class, - ), - ) - data_loader = DataLoader( - dataset=anomalous_config.build(), - num_workers=2, - batch_size=1, + anomalous_data = data.BackdoorDataset( + original=clean_image_dataset, + backdoor=data.backdoors.WanetBackdoor(path=None, p_backdoor=1.0, p_noise=0.0), ) + data_loader = DataLoader(dataset=anomalous_data, num_workers=2, batch_size=1) imgs = [img for img_batch, label_batch in data_loader for img in img_batch] assert all(torch.allclose(imgs[0], img) for img in imgs) - clean_image = clean_image_config.build().dataset.img + clean_image = clean_image_dataset.img assert not any(torch.allclose(clean_image, img) for img in imgs) @@ -325,19 +235,16 @@ def augmentation(request): return request.param -def test_augmentation(clean_image_config, augmentation): +def test_augmentation(clean_image_dataset, augmentation): # See that augmentation does something unless dud - for img, label in clean_image_config.build(): + for img, label in clean_image_dataset: aug_img, aug_label = augmentation((img, label)) assert label == aug_label assert not torch.allclose(aug_img, img) # Try with multiple workers and batches data_loader = DataLoader( - dataset=clean_image_config.build(), - num_workers=2, - batch_size=3, - drop_last=False, + dataset=clean_image_dataset, num_workers=2, batch_size=3, drop_last=False ) for img, label in data_loader: aug_img, aug_label = augmentation((img, label)) @@ -352,50 +259,48 @@ def test_augmentation(clean_image_config, augmentation): assert torch.all(aug_img == img) -def test_random_crop(clean_image_config): +def test_random_crop(clean_image_dataset): fill_val = 2.75 augmentation = data.RandomCrop( padding=100, # huge padding so that chance of no change is small fill=fill_val, ) - for img, label in clean_image_config.build(): + for img, label in clean_image_dataset: aug_img, aug_label = augmentation((img, label)) assert torch.any(aug_img == fill_val) @dataclass -class DummyPytorchImageConfig(data.PytorchConfig): +class DummyPytorchDataset(data.PytorchDataset): name: str = "dummy" length: int = 32 num_classes: int = 10 shape: tuple[int, int] = (8, 12) + default_augmentations: bool = True - def get_transforms(self): - transforms = super().get_transforms() - assert isinstance(transforms[0], data.transforms.ToTensor) - return transforms[1:] + def __post_init__(self): + # Because our data are already tensors, we need to disable the default ToTensor + assert len(self.transforms) == 1 + assert isinstance(self.transforms[0], data.transforms.ToTensor) + self.transforms = [] + # Now call super to add the augmentations + super().__post_init__() def _build(self) -> Dataset: return DummyImageData(self.length, self.num_classes, self.shape) -@pytest.fixture -def pytorch_data_config(): - return DummyPytorchImageConfig() - - -def test_pytorch_dataset_transforms(pytorch_data_config, BackdoorConfig): - for (_img, _label), (img, label) in zip( - pytorch_data_config._build(), pytorch_data_config.build() - ): +def test_pytorch_dataset_transforms(): + pytorch_dataset = DummyPytorchDataset() + for (_img, _label), (img, label) in zip(pytorch_dataset._build(), pytorch_dataset): assert _label == label assert _img.size() == img.size() assert _img is not img, "Transforms does not seem to have been applied" - transforms = pytorch_data_config.get_transforms() + transforms = pytorch_dataset.transforms transform_typereps = [repr(type(t)) for t in transforms] augmentation_used = False - for trafo in pytorch_data_config.get_transforms(): + for trafo in transforms: # Check that transform is unique in list assert transforms.count(trafo) == 1 assert transform_typereps.count(repr(type(trafo))) == 1 @@ -409,38 +314,8 @@ def test_pytorch_dataset_transforms(pytorch_data_config, BackdoorConfig): assert not augmentation_used, "Transform applied after augmentation" assert augmentation_used - # Test for BackdoorData - data_config = data.BackdoorData( - original=pytorch_data_config, - backdoor=BackdoorConfig(), - ) - transforms = data_config.get_transforms() - transform_typereps = [repr(type(t)) for t in transforms] - augmentation_used = False - backdoor_used = False - for trafo in data_config.get_transforms(): - # Check that transform is unique in list - assert transforms.count(trafo) == 1 - assert transform_typereps.count(repr(type(trafo))) == 1 - - # Check transform types - assert not backdoor_used, "Multiple backdoors in transforms" - assert isinstance(trafo, data.transforms.Transform) - if isinstance(trafo, data.transforms.ProbabilisticTransform): - augmentation_used = True - elif isinstance(trafo, data.backdoors.Backdoor): - backdoor_used = True - else: - assert not augmentation_used, "Transform applied after augmentation" - assert augmentation_used - assert backdoor_used - -def test_no_augmentations(BackdoorConfig): - pytorch_data_config = DummyPytorchImageConfig(default_augmentations=False) - data_config = data.BackdoorData( - original=pytorch_data_config, - backdoor=BackdoorConfig(), - ) - for trafo in data_config.get_transforms(): +def test_no_augmentations(): + dataset = DummyPytorchDataset(default_augmentations=False) + for trafo in dataset.transforms: assert not isinstance(trafo, data.transforms.ProbabilisticTransform) diff --git a/tests/test_detectors.py b/tests/test_detectors.py index 5d53a2b4..caaddc9a 100644 --- a/tests/test_detectors.py +++ b/tests/test_detectors.py @@ -4,7 +4,6 @@ import torch from cupbearer.detectors.statistical import ( MahalanobisDetector, - MahalanobisTrainConfig, QuantumEntropyDetector, SpectralSignatureDetector, ) @@ -39,23 +38,22 @@ ], ) class TestTrainedStatisticalDetectors: - # Currently MahalanobisTrainConfig works for all statistical detectors - train_config = MahalanobisTrainConfig( - batch_size=16, - rcond=1e-5, - ) + rcond: float = 1e-5 def train_detector(self, dataset, Model, Detector, **kwargs): example_input, _ = next(iter(dataset)) + detector = Detector() model = Model(input_shape=example_input.shape, output_dim=7) - detector = Detector(model=model) + detector.set_model(model) detector.train( # Just make sure all detectors get the data they need: trusted_data=dataset, untrusted_data=dataset, num_classes=7, - train_config=self.train_config, + batch_size=16, + rcond=self.rcond, + max_steps=1, ) return detector @@ -87,10 +85,8 @@ def test_inverse_covariance_matrices(self, dataset, Model): assert inv_cov.size() == cov.size() # Check that inverse is (pseudo) inverse - rank = torch.linalg.matrix_rank(cov, rtol=self.train_config.rcond) - assert ( - torch.linalg.matrix_rank(inv_cov, rtol=self.train_config.rcond) == rank - ) + rank = torch.linalg.matrix_rank(cov, rtol=self.rcond) + assert torch.linalg.matrix_rank(inv_cov, rtol=self.rcond) == rank # TODO I'm uncertain which tolerances to use here, this is a # guesstimate based on some of the computations that are done and @@ -111,12 +107,10 @@ def test_whitening_matrices(self, dataset, Model): assert W.size() == cov.size() # Check that Whitening matrix computes (pseudo) inverse - rank = torch.linalg.matrix_rank(cov, rtol=self.train_config.rcond) - assert torch.linalg.matrix_rank(W, rtol=self.train_config.rcond) == rank + rank = torch.linalg.matrix_rank(cov, rtol=self.rcond) + assert torch.linalg.matrix_rank(W, rtol=self.rcond) == rank inv_cov = W @ W.mT - assert ( - torch.linalg.matrix_rank(inv_cov, rtol=self.train_config.rcond) == rank - ) + assert torch.linalg.matrix_rank(inv_cov, rtol=self.rcond) == rank # TODO I'm uncertain which tolerances to use here, this is a # guesstimate based on some of the computations that are done and diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 4fdad0ea..c9033417 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,17 +1,7 @@ import pytest import torch from cupbearer import data, detectors, models, tasks -from cupbearer.scripts import ( - eval_classifier, - train_classifier, - train_detector, -) -from cupbearer.scripts.conf import ( - eval_classifier_conf, - train_classifier_conf, - train_detector_conf, -) -from cupbearer.utils.train import DebugTrainConfig +from cupbearer.scripts import eval_classifier, train_classifier, train_detector # Ignore warnings about num_workers pytestmark = pytest.mark.filterwarnings( @@ -24,87 +14,131 @@ @pytest.fixture(scope="module") -def backdoor_classifier_path(module_tmp_path): +def model(): + return models.MLP(input_shape=(1, 28, 28), hidden_dims=[5, 5], output_dim=10) + + +@pytest.fixture(scope="module") +def mnist(): + # 10 samples will be plenty for all our tests + return torch.utils.data.Subset(data.MNIST(train=False), range(10)) + + +@pytest.fixture +def backdoor_task(model, mnist): + return tasks.backdoor_detection( + model=model, + train_data=mnist, + test_data=mnist, + backdoor=data.CornerPixelBackdoor(), + # For detectors that need untrusted data + trusted_fraction=0.5, + ) + + +@pytest.fixture(scope="module") +def backdoor_classifier_path(model, mnist, module_tmp_path): """Trains a backdoored classifier and returns the path to the run directory.""" - cfg = train_classifier_conf.DebugConfig( - train_data=data.BackdoorData( - original=data.MNIST(), backdoor=data.CornerPixelBackdoor() - ), - model=models.DebugMLPConfig(), + dataset = data.BackdoorDataset(original=mnist, backdoor=data.CornerPixelBackdoor()) + train_loader = torch.utils.data.DataLoader(dataset, batch_size=2) + train_classifier( + train_loader=train_loader, + model=model, + num_classes=10, path=module_tmp_path, + max_steps=1, + logger=False, ) - train_classifier(cfg) - assert (module_tmp_path / "config.yaml").is_file() assert (module_tmp_path / "checkpoints" / "last.ckpt").is_file() - assert (module_tmp_path / "tensorboard").is_dir() return module_tmp_path @pytest.mark.slow -def test_eval_classifier(backdoor_classifier_path): - cfg = eval_classifier_conf.DebugConfig( - path=backdoor_classifier_path, data=data.MNIST(train=False) +def test_eval_classifier(model, mnist, backdoor_classifier_path): + # Test model loading once here; other tests will just use whatever state the model + # happens to have at that point instead of constantly loading the trained version. + models.load(model, backdoor_classifier_path) + + eval_classifier( + data=mnist, + model=model, + path=backdoor_classifier_path, + max_batches=1, + batch_size=2, ) - eval_classifier(cfg) - assert (backdoor_classifier_path / "eval.json").is_file() @pytest.mark.slow -def test_train_abstraction_corner_backdoor(backdoor_classifier_path, tmp_path): - cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection(path=backdoor_classifier_path), - detector=detectors.AbstractionDetectorConfig(train=DebugTrainConfig()), - path=tmp_path, +def test_train_abstraction_corner_backdoor(model, backdoor_task, tmp_path): + train_detector( + task=backdoor_task, + detector=detectors.AbstractionDetector( + abstraction=detectors.abstraction.LocallyConsistentAbstraction.get_default( + model, size_reduction=2 + ), + max_batch_size=2, + save_path=tmp_path, + ), + batch_size=2, + max_steps=1, ) - train_detector(cfg) - assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() assert (tmp_path / "histogram.pdf").is_file() assert (tmp_path / "eval.json").is_file() - assert (tmp_path / "tensorboard").is_dir() - @pytest.mark.slow -def test_train_autoencoder_corner_backdoor(backdoor_classifier_path, tmp_path): - cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection(path=backdoor_classifier_path), - detector=detectors.AbstractionDetectorConfig( - train=DebugTrainConfig(), - abstraction=detectors.abstraction.AutoencoderAbstractionConfig(), +def test_train_autoencoder_corner_backdoor(model, backdoor_task, tmp_path): + train_detector( + task=backdoor_task, + detector=detectors.AbstractionDetector( + abstraction=detectors.abstraction.AutoencoderAbstraction.get_default( + model, size_reduction=2 + ), + max_batch_size=2, + save_path=tmp_path, ), - path=tmp_path, + batch_size=2, + max_steps=1, ) - train_detector(cfg) - assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() assert (tmp_path / "histogram.pdf").is_file() assert (tmp_path / "eval.json").is_file() - assert (tmp_path / "tensorboard").is_dir() - @pytest.mark.slow -def test_train_mahalanobis_advex(backdoor_classifier_path, tmp_path): - # This test doesn't need a backdoored classifier, but we already have one - # and it doesn't hurt, so reusing it makes execution faster. - cfg = train_detector_conf.Config( - task=tasks.adversarial_examples.DebugAdversarialExampleTask( - path=backdoor_classifier_path +def test_train_mahalanobis_advex(model, mnist, tmp_path): + train_detector( + task=tasks.adversarial_examples( + model, + train_data=mnist, + test_data=mnist, + cache_path=tmp_path, + batch_size=2, + max_examples=2, + # Success threshold=1.0 means it's fine even if the classifier gets 100% + # accuracy after the attack---we don't want to error out because of this. + success_threshold=1.0, + steps=1, ), - detector=detectors.DebugMahalanobisConfig(), - path=tmp_path, + detector=detectors.MahalanobisDetector( + max_batch_size=2, + save_path=tmp_path, + ), + batch_size=2, + max_steps=1, ) - train_detector(cfg) - assert (backdoor_classifier_path / "adv_examples_train.pt").is_file() - assert (backdoor_classifier_path / "adv_examples.pdf").is_file() - assert (tmp_path / "config.yaml").is_file() + # Note: we don't expect train samples to exist since we have no untrusted train data + assert not (tmp_path / "adversarial_examples_train.pt").is_file() + assert not (tmp_path / "adversarial_examples_train.pdf").is_file() + assert (tmp_path / "adversarial_examples_test.pt").is_file() + assert (tmp_path / "adversarial_examples_test.pdf").is_file() assert (tmp_path / "detector.pt").is_file() # Eval outputs: assert (tmp_path / "histogram.pdf").is_file() @@ -115,25 +149,19 @@ def test_train_mahalanobis_advex(backdoor_classifier_path, tmp_path): @pytest.mark.parametrize( "detector_type", [ - detectors.DebugMahalanobisConfig, - detectors.DebugSpectralSignatureConfig, - detectors.DebugQuantumEntropyConfig, + detectors.MahalanobisDetector, + detectors.SpectralSignatureDetector, + detectors.QuantumEntropyDetector, ], ) -def test_train_statistical_backdoor(backdoor_classifier_path, tmp_path, detector_type): - cfg = train_detector_conf.Config( - task=tasks.backdoor_detection.DebugBackdoorDetection( - # Need some untrusted data for SpectralSignatureConfig - path=backdoor_classifier_path, - trusted_fraction=0.5, - ), - detector=detector_type(), - path=tmp_path, +def test_train_statistical_backdoor(tmp_path, backdoor_task, detector_type): + train_detector( + task=backdoor_task, + detector=detector_type(max_batch_size=2, save_path=tmp_path), + batch_size=2, + max_steps=1, ) - train_detector(cfg) - - assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() # Eval outputs: assert (tmp_path / "histogram.pdf").is_file() @@ -141,63 +169,17 @@ def test_train_statistical_backdoor(backdoor_classifier_path, tmp_path, detector @pytest.mark.slow -def test_finetuning_detector(backdoor_classifier_path, tmp_path): - cfg = train_detector_conf.Config( - task=tasks.BackdoorDetection(path=backdoor_classifier_path), - detector=detectors.finetuning.FinetuningConfig(train=DebugTrainConfig()), - path=tmp_path, +def test_finetuning_detector(backdoor_task, tmp_path): + train_detector( + task=backdoor_task, + detector=detectors.FinetuningAnomalyDetector( + max_batch_size=2, save_path=tmp_path + ), + num_classes=10, + batch_size=2, + max_steps=1, ) - train_detector(cfg) - assert (tmp_path / "config.yaml").is_file() assert (tmp_path / "detector.pt").is_file() assert (tmp_path / "histogram.pdf").is_file() assert (tmp_path / "eval.json").is_file() - - assert (tmp_path / "tensorboard").is_dir() - - -@pytest.mark.slow -def test_wanet(tmp_path): - cfg = train_classifier_conf.DebugConfig( - train_data=data.BackdoorData( - original=data.GTSRB(), backdoor=data.WanetBackdoor() - ), - model=models.DebugMLPConfig(), - path=tmp_path / "wanet", - val_data={ - "backdoor": data.BackdoorData( - original=data.GTSRB(), backdoor=data.WanetBackdoor() - ) - }, - train_config=DebugTrainConfig(num_workers=1), - ) - train_classifier(cfg) - - assert (tmp_path / "wanet" / "config.yaml").is_file() - assert (tmp_path / "wanet" / "checkpoints" / "last.ckpt").is_file() - assert (tmp_path / "wanet" / "tensorboard").is_dir() - - # Checks mostly to make the type checker happy for the allclose assert - assert isinstance(cfg.val_data["backdoor"], data.BackdoorData) - assert isinstance(cfg.val_data["backdoor"].backdoor, data.WanetBackdoor) - assert isinstance(cfg.train_data, data.BackdoorData) - assert isinstance(cfg.train_data.backdoor, data.WanetBackdoor) - assert torch.allclose( - cfg.val_data["backdoor"].backdoor.control_grid, - cfg.train_data.backdoor.control_grid, - ) - - # Check that from_run can load WanetBackdoor properly - train_detector_cfg = train_detector_conf.Config( - task=tasks.backdoor_detection.DebugBackdoorDetection(path=tmp_path / "wanet"), - detector=detectors.DebugMahalanobisConfig(), - path=tmp_path / "wanet-mahalanobis", - ) - train_detector(train_detector_cfg) - assert isinstance(train_detector_cfg.task, tasks.BackdoorDetection) - assert isinstance(train_detector_cfg.task._backdoor, data.WanetBackdoor) - assert torch.allclose( - train_detector_cfg.task._backdoor.control_grid, - cfg.train_data.backdoor.control_grid, - ) From 6f0e4724d305a896ddf20d7dcc7c29b7756b0cc0 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 17:41:05 -0800 Subject: [PATCH 14/25] Move save_path and max_batch_size arguments I think it doesn't make much sense intuitively to have them be arguments to the detector --- .../abstraction/abstraction_detector.py | 16 +++------ src/cupbearer/detectors/anomaly_detector.py | 35 +++++++++---------- src/cupbearer/detectors/finetuning.py | 7 ++-- src/cupbearer/scripts/eval_detector.py | 6 ++++ src/cupbearer/scripts/train_detector.py | 19 +++++++--- tests/test_pipeline.py | 27 +++++++------- 6 files changed, 59 insertions(+), 51 deletions(-) diff --git a/src/cupbearer/detectors/abstraction/abstraction_detector.py b/src/cupbearer/detectors/abstraction/abstraction_detector.py index 83098eb8..b89a5a4d 100644 --- a/src/cupbearer/detectors/abstraction/abstraction_detector.py +++ b/src/cupbearer/detectors/abstraction/abstraction_detector.py @@ -115,24 +115,16 @@ def configure_optimizers(self): class AbstractionDetector(ActivationBasedDetector): """Anomaly detector based on an abstraction.""" - def __init__( - self, - abstraction: Abstraction, - max_batch_size: int = 4096, - save_path: str | Path | None = None, - ): + def __init__(self, abstraction: Abstraction): self.abstraction = abstraction names = list(abstraction.tau_maps.keys()) - super().__init__( - activation_name_func=lambda _: names, - max_batch_size=max_batch_size, - save_path=save_path, - ) + super().__init__(activation_name_func=lambda _: names) def train( self, trusted_data, untrusted_data, + save_path: Path | str, *, lr: float = 1e-3, batch_size: int = 64, @@ -166,7 +158,7 @@ def train( # (which seems tricky to do manually). module.model = self.model - trainer = L.Trainer(default_root_dir=self.save_path, **trainer_kwargs) + trainer = L.Trainer(default_root_dir=save_path, **trainer_kwargs) trainer.fit( model=module, train_dataloaders=train_loader, diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index c97db6f1..f1e917c1 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -3,7 +3,7 @@ from collections.abc import Collection from contextlib import contextmanager from pathlib import Path -from typing import Callable, Optional +from typing import Callable import numpy as np import sklearn.metrics @@ -19,16 +19,9 @@ class AnomalyDetector(ABC): - def __init__( - self, - max_batch_size: int = 4096, - save_path: Optional[Path | str] = None, - ): + def __init__(self): # For storing the original detector variables when finetuning self._original_variables = None - self.max_batch_size = max_batch_size - self.save_path = None if save_path is None else Path(save_path) - self.trained = False def set_model(self, model: HookedModel): @@ -43,7 +36,11 @@ def set_model(self, model: HookedModel): @abstractmethod def train( - self, trusted_data: Dataset | None, untrusted_data: Dataset | None, **kwargs + self, + trusted_data: Dataset | None, + untrusted_data: Dataset | None, + save_path: Path | str | None, + **kwargs, ): """Train the anomaly detector with the given datasets on the given model. @@ -100,7 +97,9 @@ def eval( # to untrusted data then). train_dataset: Dataset, test_dataset: MixedData, + batch_size: int = 1024, histogram_percentile: float = 95, + save_path: Path | str | None = None, num_bins: int = 100, pbar: bool = False, ): @@ -110,7 +109,7 @@ def eval( test_loader = DataLoader( test_dataset, - batch_size=self.max_batch_size, + batch_size=batch_size, # For some methods, such as adversarial abstractions, it might matter how # normal/anomalous data is distributed into batches. In that case, we want # to mix them by default. @@ -153,14 +152,16 @@ def eval( bins = np.linspace(lower_lim, upper_lim, num_bins) - if not self.save_path: + if not save_path: return - self.save_path.mkdir(parents=True, exist_ok=True) + save_path = Path(save_path) + + save_path.mkdir(parents=True, exist_ok=True) # Everything from here is just saving metrics and creating figures # (which we skip if they aren't going to be saved anyway). - with open(self.save_path / "eval.json", "w") as f: + with open(save_path / "eval.json", "w") as f: json.dump(metrics, f) # Visualizations for anomaly scores @@ -176,7 +177,7 @@ def eval( plt.xlabel("Anomaly score") plt.ylabel("Frequency") plt.title("Anomaly score distribution") - plt.savefig(self.save_path / "histogram.pdf") + plt.savefig(save_path / "histogram.pdf") @abstractmethod def layerwise_scores(self, batch) -> dict[str, torch.Tensor]: @@ -240,10 +241,8 @@ def __init__( activation_name_func: str | Callable[[HookedModel], Collection[str]] | None = None, - max_batch_size: int = 4096, - save_path: Path | str | None = None, ): - super().__init__(max_batch_size=max_batch_size, save_path=save_path) + super().__init__() if activation_name_func is None: activation_name_func = default_activation_name_func diff --git a/src/cupbearer/detectors/finetuning.py b/src/cupbearer/detectors/finetuning.py index 50b224dd..8dfe14c0 100644 --- a/src/cupbearer/detectors/finetuning.py +++ b/src/cupbearer/detectors/finetuning.py @@ -1,5 +1,6 @@ import copy import warnings +from pathlib import Path import lightning as L import torch @@ -12,9 +13,6 @@ class FinetuningAnomalyDetector(AnomalyDetector): - def __init__(self, max_batch_size, save_path): - super().__init__(max_batch_size, save_path) - def set_model(self, model): super().set_model(model) # We might as well make a copy here already, since whether we'll train this @@ -25,6 +23,7 @@ def train( self, trusted_data, untrusted_data, + save_path: Path | str, *, num_classes: int, lr: float = 1e-3, @@ -44,7 +43,7 @@ def train( clean_loader = DataLoader(trusted_data, batch_size=batch_size, shuffle=True) # Finetune the model on the clean dataset - trainer = L.Trainer(default_root_dir=self.save_path, **trainer_kwargs) + trainer = L.Trainer(default_root_dir=save_path, **trainer_kwargs) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index b20774ad..8f94d267 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -1,3 +1,5 @@ +from pathlib import Path + from cupbearer.detectors import AnomalyDetector from cupbearer.tasks import Task from cupbearer.utils.scripts import script @@ -7,7 +9,9 @@ def main( task: Task, detector: AnomalyDetector, + save_path: Path | str | None, pbar: bool = False, + batch_size: int = 1024, ): detector.set_model(task.model) @@ -15,4 +19,6 @@ def main( train_dataset=task.trusted_data, test_dataset=task.test_data, pbar=pbar, + save_path=save_path, + batch_size=batch_size, ) diff --git a/src/cupbearer/scripts/train_detector.py b/src/cupbearer/scripts/train_detector.py index 350d12ff..30f6b722 100644 --- a/src/cupbearer/scripts/train_detector.py +++ b/src/cupbearer/scripts/train_detector.py @@ -1,3 +1,5 @@ +from pathlib import Path + from cupbearer.detectors import AnomalyDetector from cupbearer.tasks import Task @@ -7,6 +9,8 @@ def main( task: Task, detector: AnomalyDetector, + save_path: Path | str | None, + eval_batch_size: int = 1024, **train_kwargs, ): detector.set_model(task.model) @@ -14,9 +18,16 @@ def main( detector.train( trusted_data=task.trusted_data, untrusted_data=task.untrusted_train_data, + save_path=save_path, **train_kwargs, ) - path = detector.save_path - if path: - detector.save_weights(path / "detector") - eval_detector(detector=detector, task=task, pbar=True) + if save_path: + save_path = Path(save_path) + detector.save_weights(save_path / "detector") + eval_detector( + detector=detector, + task=task, + pbar=True, + batch_size=eval_batch_size, + save_path=save_path, + ) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index c9033417..9f7d23e0 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -80,10 +80,10 @@ def test_train_abstraction_corner_backdoor(model, backdoor_task, tmp_path): abstraction=detectors.abstraction.LocallyConsistentAbstraction.get_default( model, size_reduction=2 ), - max_batch_size=2, - save_path=tmp_path, ), + save_path=tmp_path, batch_size=2, + eval_batch_size=2, max_steps=1, ) assert (tmp_path / "detector.pt").is_file() @@ -99,11 +99,11 @@ def test_train_autoencoder_corner_backdoor(model, backdoor_task, tmp_path): detector=detectors.AbstractionDetector( abstraction=detectors.abstraction.AutoencoderAbstraction.get_default( model, size_reduction=2 - ), - max_batch_size=2, - save_path=tmp_path, + ) ), batch_size=2, + eval_batch_size=2, + save_path=tmp_path, max_steps=1, ) assert (tmp_path / "detector.pt").is_file() @@ -127,11 +127,10 @@ def test_train_mahalanobis_advex(model, mnist, tmp_path): success_threshold=1.0, steps=1, ), - detector=detectors.MahalanobisDetector( - max_batch_size=2, - save_path=tmp_path, - ), + detector=detectors.MahalanobisDetector(), + save_path=tmp_path, batch_size=2, + eval_batch_size=2, max_steps=1, ) # Note: we don't expect train samples to exist since we have no untrusted train data @@ -157,8 +156,10 @@ def test_train_mahalanobis_advex(model, mnist, tmp_path): def test_train_statistical_backdoor(tmp_path, backdoor_task, detector_type): train_detector( task=backdoor_task, - detector=detector_type(max_batch_size=2, save_path=tmp_path), + detector=detector_type(), batch_size=2, + eval_batch_size=2, + save_path=tmp_path, max_steps=1, ) @@ -172,11 +173,11 @@ def test_train_statistical_backdoor(tmp_path, backdoor_task, detector_type): def test_finetuning_detector(backdoor_task, tmp_path): train_detector( task=backdoor_task, - detector=detectors.FinetuningAnomalyDetector( - max_batch_size=2, save_path=tmp_path - ), + detector=detectors.FinetuningAnomalyDetector(), + save_path=tmp_path, num_classes=10, batch_size=2, + eval_batch_size=2, max_steps=1, ) assert (tmp_path / "detector.pt").is_file() From ae98812f4cc01132f82f4691b55286b07104c333 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 17:42:17 -0800 Subject: [PATCH 15/25] Remove another unused file --- src/cupbearer/utils/custom_transforms.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 src/cupbearer/utils/custom_transforms.py diff --git a/src/cupbearer/utils/custom_transforms.py b/src/cupbearer/utils/custom_transforms.py deleted file mode 100644 index 0b09346e..00000000 --- a/src/cupbearer/utils/custom_transforms.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Tuple - -# We use torch to generate random numbers, to keep things consistent -# with torchvision transforms. -from PIL.Image import Image - - -class AddInfoDict: - """Adds an info dict to the sample, in which other transforms can store information. - - This is meant to be used as the first transform, so that the info dict is - always present and other transforms can rely on it. - """ - - def __call__(self, sample: Tuple[Image, int]): - img, target = sample - # Some metrics need the original target (which CornerPixelToWhite changes). - # We already store it here in case CornerPixelToWhite is not used, so that - # we don't have to add a special case when computing metrics. - return img, target, {"original_target": target} From 31a79939fdeeb3b22b7ca976219570aae70736b3 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 17:49:12 -0800 Subject: [PATCH 16/25] Remove more unused code --- src/cupbearer/data/adversarial.py | 2 +- src/cupbearer/data/pytorch.py | 10 +- src/cupbearer/detectors/anomaly_detector.py | 2 +- src/cupbearer/detectors/finetuning.py | 4 +- src/cupbearer/scripts/eval_detector.py | 2 - src/cupbearer/utils/__init__.py | 121 ++++++++++++++- src/cupbearer/utils/scripts.py | 29 ---- src/cupbearer/utils/utils.py | 159 -------------------- 8 files changed, 128 insertions(+), 201 deletions(-) delete mode 100644 src/cupbearer/utils/scripts.py delete mode 100644 src/cupbearer/utils/utils.py diff --git a/src/cupbearer/data/adversarial.py b/src/cupbearer/data/adversarial.py index 7847c1c4..f9b4a672 100644 --- a/src/cupbearer/data/adversarial.py +++ b/src/cupbearer/data/adversarial.py @@ -8,7 +8,7 @@ from matplotlib import pyplot as plt from torch.utils.data import DataLoader, Dataset, Subset -from cupbearer.utils import utils +from cupbearer import utils class AdversarialExampleDataset(Dataset): diff --git a/src/cupbearer/data/pytorch.py b/src/cupbearer/data/pytorch.py index 43366d17..626ba2df 100644 --- a/src/cupbearer/data/pytorch.py +++ b/src/cupbearer/data/pytorch.py @@ -1,8 +1,8 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from torch.utils.data import Dataset -from cupbearer.utils.utils import get_object, mutable_field +from cupbearer.utils import get_object from .transforms import ( RandomCrop, @@ -18,7 +18,7 @@ class PytorchDataset(Dataset): name: str train: bool = True - transforms: list[Transform] = mutable_field([ToTensor()]) + transforms: list[Transform] = field(default_factory=lambda: [ToTensor()]) default_augmentations: bool = True def __post_init__(self): @@ -74,8 +74,8 @@ def __post_init__(self): class GTSRB(PytorchDataset): name: str = "torchvision.datasets.GTSRB" num_classes: int = 43 - transforms: list[Transform] = mutable_field( - [ + transforms: list[Transform] = field( + default_factory=lambda: [ Resize(size=(32, 32)), ToTensor(), ] diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index f1e917c1..571d19ab 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -13,9 +13,9 @@ from torch.utils.data import DataLoader, Dataset from tqdm.auto import tqdm +from cupbearer import utils from cupbearer.data import MixedData from cupbearer.models.models import HookedModel -from cupbearer.utils import utils class AnomalyDetector(ABC): diff --git a/src/cupbearer/detectors/finetuning.py b/src/cupbearer/detectors/finetuning.py index 8dfe14c0..063995d4 100644 --- a/src/cupbearer/detectors/finetuning.py +++ b/src/cupbearer/detectors/finetuning.py @@ -9,7 +9,7 @@ from cupbearer.detectors.anomaly_detector import AnomalyDetector from cupbearer.scripts._shared import Classifier -from cupbearer.utils import utils +from cupbearer.utils import inputs_from_batch class FinetuningAnomalyDetector(AnomalyDetector): @@ -63,7 +63,7 @@ def layerwise_scores(self, batch): ) def scores(self, batch): - inputs = utils.inputs_from_batch(batch) + inputs = inputs_from_batch(batch) original_output = self.model(inputs) finetuned_output = self.finetuned_model(inputs) diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index 8f94d267..ed9604f5 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -2,10 +2,8 @@ from cupbearer.detectors import AnomalyDetector from cupbearer.tasks import Task -from cupbearer.utils.scripts import script -@script def main( task: Task, detector: AnomalyDetector, diff --git a/src/cupbearer/utils/__init__.py b/src/cupbearer/utils/__init__.py index 1d326396..61fcceaa 100644 --- a/src/cupbearer/utils/__init__.py +++ b/src/cupbearer/utils/__init__.py @@ -1,2 +1,119 @@ -# ruff: noqa: F401 -from .utils import inputs_from_batch, load, save +import codecs +import importlib +import pickle +from pathlib import Path +from typing import Union + +import torch + +SUFFIX = ".pt" +TYPE_PREFIX = "__TYPE__:" +PICKLE_PREFIX = "__PICKLE__:" + + +def from_string(s): + # Doesn't restore Paths but all the code should be able to handle getting strings + # instead. + if not isinstance(s, str): + return s + if s.startswith(TYPE_PREFIX): + s = s[len(TYPE_PREFIX) :] + return get_object(s) + if s.startswith(PICKLE_PREFIX): + s = s[len(PICKLE_PREFIX) :] + pickled = codecs.decode(s.encode(), "base64") + return pickle.loads(pickled) + + return s + + +def validate_and_convert_leaf(leaf): + if isinstance(leaf, (str, int, float, bool, torch.Tensor)): + return leaf + if isinstance(leaf, Path): + return str(leaf) + if isinstance(leaf, type): + return TYPE_PREFIX + leaf.__module__ + "." + leaf.__name__ + + try: + pickled = pickle.dumps(leaf) + except Exception as e: + raise ValueError(f"Could not pickle object {leaf}") from e + # Make sure we're not accidentally encoding huge objects inefficiently into strings: + if len(pickled) > 1e6: + raise ValueError( + f"Object of type {type(leaf)} has {round(len(pickled) / 1e6, 1)} MB " + "when pickled. This is probably a mistake." + ) + pickle_str = codecs.encode(pickled, "base64").decode() + return PICKLE_PREFIX + pickle_str + + +def tree_map(f, tree): + """Like jax.tree_map, but simpler and for pytorch.""" + # We could use https://github.com/metaopt/optree in the future, + # which would be faster and generally add support for various tree operations. + if isinstance(tree, list): + return [tree_map(f, x) for x in tree] + if isinstance(tree, tuple): + return tuple(tree_map(f, x) for x in tree) + if isinstance(tree, dict): + return {k: tree_map(f, v) for k, v in tree.items()} + try: + return f(tree) + except Exception as e: + raise ValueError( + f"Could not apply {f} to leaf {tree} of type {type(tree)}" + ) from e + + +def save(data, path: Union[str, Path], overwrite: bool = False): + data = tree_map(validate_and_convert_leaf, data) + path = Path(path) + directory = path.parent + directory.mkdir(parents=True, exist_ok=True) + if path.exists(): + if overwrite: + assert not path.is_dir(), f"{path} is a directory, won't overwrite" + path.unlink() + else: + raise RuntimeError(f"File {path} already exists.") + torch.save(data, path.with_suffix(SUFFIX)) + + +def load(path: Union[str, Path]): + path = Path(path) + if path.is_dir(): + raise ValueError( + f"Expected a file, got directory {path}. " + "Maybe this is in the legacy Jax format?" + ) + + if path.suffix != SUFFIX: + path = path.with_suffix(SUFFIX) + with open(path, "rb") as file: + data = torch.load(file) + data = tree_map(from_string, data) + return data + + +def get_object(path: str): + """Get an object from a string. + + Args: + path: A string of the form "module.submodule.object_name". + + Returns: + The object named by `path`. + """ + module_name, object_name = path.rsplit(".", 1) + module = importlib.import_module(module_name) + return getattr(module, object_name) + + +def inputs_from_batch(batch): + # batch may contain labels or other info, if so we strip it out + if isinstance(batch, (tuple, list)): + return batch[0] + else: + return batch diff --git a/src/cupbearer/utils/scripts.py b/src/cupbearer/utils/scripts.py deleted file mode 100644 index 246d46e8..00000000 --- a/src/cupbearer/utils/scripts.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Callable - - -def script( - script_fn: Callable, -) -> Callable: - # @functools.wraps(script_fn) - # def run_script(cfg: ConfigType): - # save_cfg(cfg, save_config=cfg.save_config) - # return script_fn(cfg) - - # return run_script - return script_fn - - -def save_cfg(cfg, save_config: bool = True): - # if cfg.path: - # cfg.path.mkdir(parents=True, exist_ok=True) - # if save_config: - # # TODO: replace this with cfg.save if/when that exposes save_dc_types. - # # Note that we need save_dc_types here even though `BaseConfig` already - # # enables that, since `save` calls `to_dict` directly, not `obj.to_dict`. - # simple_parsing.helpers.serialization.serializable.save( - # cfg, - # cfg.path / "config.yaml", - # save_dc_types=True, - # sort_keys=False, - # ) - pass diff --git a/src/cupbearer/utils/utils.py b/src/cupbearer/utils/utils.py deleted file mode 100644 index 7d519c14..00000000 --- a/src/cupbearer/utils/utils.py +++ /dev/null @@ -1,159 +0,0 @@ -import codecs -import copy -import dataclasses -import functools -import importlib -import pickle -from pathlib import Path -from typing import Iterable, TypeVar, Union - -import torch - -SUFFIX = ".pt" -TYPE_PREFIX = "__TYPE__:" -PICKLE_PREFIX = "__PICKLE__:" - - -def from_string(s): - # Doesn't restore Paths but all the code should be able to handle getting strings - # instead. - if not isinstance(s, str): - return s - if s.startswith(TYPE_PREFIX): - s = s[len(TYPE_PREFIX) :] - return get_object(s) - if s.startswith(PICKLE_PREFIX): - s = s[len(PICKLE_PREFIX) :] - pickled = codecs.decode(s.encode(), "base64") - return pickle.loads(pickled) - - return s - - -def validate_and_convert_leaf(leaf): - if isinstance(leaf, (str, int, float, bool, torch.Tensor)): - return leaf - if isinstance(leaf, Path): - return str(leaf) - if isinstance(leaf, type): - return TYPE_PREFIX + leaf.__module__ + "." + leaf.__name__ - - try: - pickled = pickle.dumps(leaf) - except Exception as e: - raise ValueError(f"Could not pickle object {leaf}") from e - # Make sure we're not accidentally encoding huge objects inefficiently into strings: - if len(pickled) > 1e6: - raise ValueError( - f"Object of type {type(leaf)} has {round(len(pickled) / 1e6, 1)} MB " - "when pickled. This is probably a mistake." - ) - pickle_str = codecs.encode(pickled, "base64").decode() - return PICKLE_PREFIX + pickle_str - - -def tree_map(f, tree): - """Like jax.tree_map, but simpler and for pytorch.""" - # We could use https://github.com/metaopt/optree in the future, - # which would be faster and generally add support for various tree operations. - if isinstance(tree, list): - return [tree_map(f, x) for x in tree] - if isinstance(tree, tuple): - return tuple(tree_map(f, x) for x in tree) - if isinstance(tree, dict): - return {k: tree_map(f, v) for k, v in tree.items()} - try: - return f(tree) - except Exception as e: - raise ValueError( - f"Could not apply {f} to leaf {tree} of type {type(tree)}" - ) from e - - -def save(data, path: Union[str, Path], overwrite: bool = False): - data = tree_map(validate_and_convert_leaf, data) - path = Path(path) - directory = path.parent - directory.mkdir(parents=True, exist_ok=True) - if path.exists(): - if overwrite: - assert not path.is_dir(), f"{path} is a directory, won't overwrite" - path.unlink() - else: - raise RuntimeError(f"File {path} already exists.") - torch.save(data, path.with_suffix(SUFFIX)) - - -def load(path: Union[str, Path]): - path = Path(path) - if path.is_dir(): - raise ValueError( - f"Expected a file, got directory {path}. " - "Maybe this is in the legacy Jax format?" - ) - - if path.suffix != SUFFIX: - path = path.with_suffix(SUFFIX) - with open(path, "rb") as file: - data = torch.load(file) - data = tree_map(from_string, data) - return data - - -def product(xs: Iterable): - return functools.reduce(lambda x, y: x * y, xs, 1) - - -def merge_dicts(a: dict, b: dict) -> dict: - """Merges two dictionaries recursively.""" - - merged = a.copy() - for key, value in b.items(): - if key in merged and isinstance(merged[key], dict): - # Make sure we don't overwrite a dict with a non-dict - assert isinstance(value, dict) - merged[key] = merge_dicts(merged[key], value) - else: - if isinstance(value, dict): - # Make sure we don't overwrite a non-dict with a dict - assert key not in merged - merged[key] = value - - return merged - - -T = TypeVar("T") - - -def mutable_field(default: T = None) -> T: - return dataclasses.field(default_factory=lambda: copy.deepcopy(default)) - - -def list_field(): - return dataclasses.field(default_factory=list) - - -def dict_field(): - return dataclasses.field(default_factory=dict) - - -def get_object(path: str): - """Get an object from a string. - - Args: - path: A string of the form "module.submodule.object_name". - - Returns: - The object named by `path`. - """ - module_name, object_name = path.rsplit(".", 1) - module = importlib.import_module(module_name) - return getattr(module, object_name) - - -def inputs_from_batch(batch): - # batch may contain labels or other info, if so we strip it out - if isinstance(batch, (tuple, list)): - return batch[0] - else: - return batch From f0dacc5123be402f3e258476f9dbffb69a1ad7d9 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 17:56:34 -0800 Subject: [PATCH 17/25] Minor improvements and remove TODOs --- src/cupbearer/detectors/anomaly_detector.py | 10 +++------- src/cupbearer/scripts/eval_detector.py | 3 +-- src/cupbearer/scripts/train_classifier.py | 2 +- src/cupbearer/tasks/backdoor_detection.py | 5 ----- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/cupbearer/detectors/anomaly_detector.py b/src/cupbearer/detectors/anomaly_detector.py index 571d19ab..1b22a461 100644 --- a/src/cupbearer/detectors/anomaly_detector.py +++ b/src/cupbearer/detectors/anomaly_detector.py @@ -92,11 +92,7 @@ def eval( self, # Don't need train_dataset here, but e.g. adversarial abstractions need it, # and in general there's no reason to deny detectors access to it during eval. - # TODO: I think we can/should remove this and require detectors to handle - # anything involving training data during training (now that they get access - # to untrusted data then). - train_dataset: Dataset, - test_dataset: MixedData, + dataset: MixedData, batch_size: int = 1024, histogram_percentile: float = 95, save_path: Path | str | None = None, @@ -105,10 +101,10 @@ def eval( ): # Check this explicitly because otherwise things can break in weird ways # when we assume that anomaly labels are included. - assert isinstance(test_dataset, MixedData), type(test_dataset) + assert isinstance(dataset, MixedData), type(dataset) test_loader = DataLoader( - test_dataset, + dataset, batch_size=batch_size, # For some methods, such as adversarial abstractions, it might matter how # normal/anomalous data is distributed into batches. In that case, we want diff --git a/src/cupbearer/scripts/eval_detector.py b/src/cupbearer/scripts/eval_detector.py index ed9604f5..5bead65c 100644 --- a/src/cupbearer/scripts/eval_detector.py +++ b/src/cupbearer/scripts/eval_detector.py @@ -14,8 +14,7 @@ def main( detector.set_model(task.model) detector.eval( - train_dataset=task.trusted_data, - test_dataset=task.test_data, + dataset=task.test_data, pbar=pbar, save_path=save_path, batch_size=batch_size, diff --git a/src/cupbearer/scripts/train_classifier.py b/src/cupbearer/scripts/train_classifier.py index aaed8fc3..8689d802 100644 --- a/src/cupbearer/scripts/train_classifier.py +++ b/src/cupbearer/scripts/train_classifier.py @@ -62,7 +62,6 @@ def main( trainer_kwargs["callbacks"] = callbacks # Define metrics logger - # TODO: make adjustable and set config correctly if "logger" not in trainer_kwargs: if wandb: metrics_logger = loggers.WandbLogger(project="cupbearer") @@ -72,6 +71,7 @@ def main( "model": repr(model), "train_data": repr(train_loader.dataset), "batch_size": train_loader.batch_size, + "lr": lr, } ) elif path: diff --git a/src/cupbearer/tasks/backdoor_detection.py b/src/cupbearer/tasks/backdoor_detection.py index 51942285..b6e4c22d 100644 --- a/src/cupbearer/tasks/backdoor_detection.py +++ b/src/cupbearer/tasks/backdoor_detection.py @@ -20,11 +20,6 @@ def backdoor_detection( "this is probably unintentional." ) - # TODO: for WaNet, we currently expect the user to load the control grid. - # (Otherwise we'd have to always take in a path here, and also when working - # in a notebook it might just be easier to pass in the existing backdoor object.) - # But we should somehow check somewhere that it's loaded to avoid silent errors. - return Task.from_base_data( model=model, train_data=train_data, From 0267bd13809d0aa70dec4ba90615c8c2f786da4e Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 18:09:11 -0800 Subject: [PATCH 18/25] Fix demo notebook --- notebooks/simple_demo.ipynb | 248 +++++++++++++++----------------- src/cupbearer/utils/__init__.py | 9 ++ 2 files changed, 122 insertions(+), 135 deletions(-) diff --git a/notebooks/simple_demo.ipynb b/notebooks/simple_demo.ipynb index 3f6a2bc6..da86a010 100644 --- a/notebooks/simple_demo.ipynb +++ b/notebooks/simple_demo.ipynb @@ -17,26 +17,11 @@ "outputs": [], "source": [ "import json\n", - "from datetime import datetime\n", - "from pathlib import Path\n", + "from torch.utils.data import DataLoader\n", "\n", "from cupbearer import data, detectors, models, scripts, tasks, utils" ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def get_path(base=\"logs\", time=True):\n", - " if time:\n", - " timestamp = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n", - " else:\n", - " timestamp = datetime.now().strftime(\"%Y-%m-%d\")\n", - " return Path(base) / timestamp" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -47,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -94,7 +79,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48219c8359284728a9ec6a2144927c0a", + "model_id": "f3fbd649f84545b79393518b1875ea71", "version_major": 2, "version_minor": 0 }, @@ -118,7 +103,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "24b6f01a53f14158821bf04d8d7ee377", + "model_id": "94e198d2efdb4899a5cc3829dc6a90b7", "version_major": 2, "version_minor": 0 }, @@ -132,7 +117,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b157b6824e0f468690e0bb219c6ca8c2", + "model_id": "6218ec07439243379c33377a00ade093", "version_major": 2, "version_minor": 0 }, @@ -146,7 +131,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1b7021c89c6e40ba97cb5147930bda1d", + "model_id": "c1d3eeecce294a499dca8d392f36b779", "version_major": 2, "version_minor": 0 }, @@ -160,35 +145,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c6252aadc20841a6b7e164a6fe30a204", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Validation: | | 0/? [00:00┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", "┃ Test metric DataLoader 0 ┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│ test/acc_epoch 0.9520999789237976 │\n", - "│ test/acc_step 0.9520999789237976 │\n", - "│ test/loss 0.15424881875514984 │\n", + "│ test/acc_epoch 0.9467999935150146 │\n", + "│ test/acc_step 0.9467999935150146 │\n", + "│ test/loss 0.16958695650100708 │\n", "└───────────────────────────┴───────────────────────────┘\n", "\n" ], @@ -310,9 +266,9 @@ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", - "│\u001b[36m \u001b[0m\u001b[36m test/acc_epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9520999789237976 \u001b[0m\u001b[35m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36m test/acc_step \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9520999789237976 \u001b[0m\u001b[35m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36m test/loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.15424881875514984 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/acc_epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9467999935150146 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/acc_step \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.9467999935150146 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test/loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.16958695650100708 \u001b[0m\u001b[35m \u001b[0m│\n", "└───────────────────────────┴───────────────────────────┘\n" ] }, @@ -322,7 +278,9 @@ ], "source": [ "scripts.eval_classifier(\n", - " scripts.EvalClassifierConfig(path=classifier_path, data=val_data, model=model)\n", + " data=val_data,\n", + " model=model,\n", + " path=classifier_path,\n", ")" ] }, @@ -330,19 +288,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "These results will also have been stored to `logs/demo/classifier/eval.json` if we want to process them further (e.g. to compare many runs):" + "These results will also have been stored to `/eval.json` if we want to process them further (e.g. to compare many runs):" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'test/loss': 0.15424881875514984, 'test/acc_step': 0.9520999789237976, 'test/acc_epoch': 0.9520999789237976}]\n" + "[{'test/loss': 0.16958695650100708, 'test/acc_step': 0.9467999935150146, 'test/acc_epoch': 0.9467999935150146}]\n" ] } ], @@ -361,31 +319,54 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ + "# Initialize a new model with the same architecture as before:\n", + "model = models.MLP(input_shape=(28, 28), hidden_dims=[128, 128], output_dim=10)\n", + "# Load the weights:\n", "models.load(model, classifier_path)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 15/15 [00:06<00:00, 2.30it/s]\n", - "\u001b[32m2024-02-29 22:14:34.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36msave_weights\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mSaving detector to logs/demo/detector/2024-02-29_22-14-27/detector\u001b[0m\n", - "\u001b[32m2024-02-29 22:14:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mAUC_ROC: 1.0000\u001b[0m\n", - "\u001b[32m2024-02-29 22:14:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAP: 1.0000\u001b[0m\n" + "100%|██████████| 59/59 [00:06<00:00, 9.09it/s]\n", + "\u001b[32m2024-03-02 18:08:41.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36msave_weights\u001b[0m:\u001b[36m220\u001b[0m - \u001b[1mSaving detector to logs/demo/detector/2024-03-02_18-08-34/detector\u001b[0m\n" ] }, { "data": { - "image/png": "", + "application/vnd.jupyter.widget-view+json": { + "model_id": "ec6c3dcf6ee742048eeb88a37650d650", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Evaluating: 0%| | 0/10 [00:00" ] @@ -396,15 +377,12 @@ ], "source": [ "scripts.train_detector(\n", - " scripts.TrainDetectorConfig(\n", - " path=(detector_path := get_path(\"logs/demo/detector\")),\n", - " task=tasks.backdoor_detection(\n", - " model, train_data, val_data, data.CornerPixelBackdoor()\n", - " ),\n", - " detector=detectors.MahalanobisDetector(save_path=detector_path),\n", - " train=detectors.MahalanobisTrainConfig(),\n", - " num_classes=10,\n", - " )\n", + " save_path=(detector_path := utils.log_path(\"logs/demo/detector\")),\n", + " task=tasks.backdoor_detection(\n", + " model, train_data, val_data, data.CornerPixelBackdoor()\n", + " ),\n", + " detector=detectors.MahalanobisDetector(),\n", + " num_classes=10,\n", ")" ] }, @@ -412,24 +390,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we can see, this was a trivial detection task. As an ablation, we can test whether the detector specifically flags backdoored inputs as anomalous, or just anything out of distribution:" + "As we can see, this was a trivial detection task. As an ablation, we can test whether the detector specifically flags backdoored inputs as anomalous, or just anything out of distribution. Let's again reload the detector just to show how that works:" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-02-29 22:14:35.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36mload_weights\u001b[0m:\u001b[36m232\u001b[0m - \u001b[1mLoading detector from logs/demo/detector/2024-02-29_22-14-27/detector\u001b[0m\n" + "\u001b[32m2024-03-02 18:08:42.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36mload_weights\u001b[0m:\u001b[36m224\u001b[0m - \u001b[1mLoading detector from logs/demo/detector/2024-03-02_18-08-34/detector\u001b[0m\n" ] } ], "source": [ - "detector = detectors.MahalanobisDetector(save_path=detector_path / \"ood_eval\")\n", + "detector = detectors.MahalanobisDetector()\n", "# TODO: The fact that weights are saved in \"detector\" is just a convention used by\n", "# the train_detector script, this is kind of weird.\n", "detector.load_weights(detector_path / \"detector\")" @@ -437,20 +415,20 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-02-29 22:14:36.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mAUC_ROC: 0.9934\u001b[0m\n", - "\u001b[32m2024-02-29 22:14:36.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAP: 0.9779\u001b[0m\n" + "\u001b[32m2024-03-02 18:08:43.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m139\u001b[0m - \u001b[1mAUC_ROC: 0.9940\u001b[0m\n", + "\u001b[32m2024-03-02 18:08:43.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcupbearer.detectors.anomaly_detector\u001b[0m:\u001b[36meval\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mAP: 0.9784\u001b[0m\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -461,24 +439,24 @@ ], "source": [ "scripts.eval_detector(\n", - " scripts.EvalDetectorConfig(\n", - " detector=detector,\n", - " task=tasks.Task.from_separate_data(\n", - " model=model,\n", - " # TODO: this won't actually be used, plausibly Tasks should be split better\n", - " # into their training and test data.\n", - " trusted_data=train_data,\n", - " # Our anomalous data is the backdoor data from above, except we use the\n", - " # MNIST test split.\n", - " anomalous_test_data=data.BackdoorDataset(\n", - " original=val_data,\n", - " backdoor=data.CornerPixelBackdoor(),\n", - " ),\n", - " # Our normal data is MNIST with added noise, this makes the images OOD\n", - " # but they shouldn't be mechanistically anomalous.\n", - " clean_test_data=data.TransformDataset(val_data, data.GaussianNoise(0.3)),\n", + " detector=detector,\n", + " # We save to a different directory to avoid overwriting the existing default eval:\n", + " save_path=detector_path / \"ood_eval\",\n", + " task=tasks.Task.from_separate_data(\n", + " model=model,\n", + " # TODO: this won't actually be used, plausibly Tasks should be split better\n", + " # into their training and test data.\n", + " trusted_data=train_data,\n", + " # Our anomalous data is the backdoor data from above, except we use the\n", + " # MNIST test split.\n", + " anomalous_test_data=data.BackdoorDataset(\n", + " original=val_data,\n", + " backdoor=data.CornerPixelBackdoor(),\n", " ),\n", - " )\n", + " # Our normal data is MNIST with added noise, this makes the images OOD\n", + " # but they shouldn't be mechanistically anomalous.\n", + " clean_test_data=data.TransformDataset(val_data, data.GaussianNoise(0.3)),\n", + " ),\n", ")" ] }, diff --git a/src/cupbearer/utils/__init__.py b/src/cupbearer/utils/__init__.py index 61fcceaa..793052c8 100644 --- a/src/cupbearer/utils/__init__.py +++ b/src/cupbearer/utils/__init__.py @@ -1,6 +1,7 @@ import codecs import importlib import pickle +from datetime import datetime from pathlib import Path from typing import Union @@ -117,3 +118,11 @@ def inputs_from_batch(batch): return batch[0] else: return batch + + +def log_path(base="logs", time=True): + if time: + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + else: + timestamp = datetime.now().strftime("%Y-%m-%d") + return Path(base) / timestamp From 975289ef83a928a2884a8a3a9a7d59462cc7dff6 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 18:12:42 -0800 Subject: [PATCH 19/25] Add WaNet warning --- src/cupbearer/data/backdoors.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index c4f1298b..13bf9baa 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -95,7 +95,16 @@ def inject_backdoor(self, img: torch.Tensor): @dataclass(kw_only=True) class WanetBackdoor(Backdoor): """Implements trigger transform from "Wanet - Imperceptible Warping-based - Backdoor Attack" by Anh Tuan Nguyen and Anh Tuan Tran, ICLR, 2021.""" + Backdoor Attack" by Anh Tuan Nguyen and Anh Tuan Tran, ICLR, 2021. + + WARNING: The backdoor trigger is a specific (randomly generated) warping pattern. + Networks are trained to only respond to this specific pattern, so evaluating + a network on a freshly initialized WanetBackdoor with a new trigger won't work. + Within a single process, just make sure you only initialize WanetBackdoor once + and then use that everywhere. + Between different processes, you need to store the trigger using the `store()` + method, and then later pass it in as the `path` argument to the new WanetBackdoor. + """ # Path to load control grid from, or None to generate a new one. # Deliberartely non-optional to avoid accidentally generating a new grid! From 1b82635c2ade7f204f7dbc8e89b596f2d1aa95fb Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 18:13:32 -0800 Subject: [PATCH 20/25] Update gitignore We also want to ignore log dirs in e.g. the notebook folder --- .gitignore | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 6304768c..6b4b6cd2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,9 @@ *.egg-info .python-version __pycache__ -/data -/results -/slurm -/logs +data/ +logs/ +slurm/ .venv -/wandb -/dist +wandb/ +dist/ From 35220aabc9c65952ef20999d193f00947d1a6819 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 18:21:36 -0800 Subject: [PATCH 21/25] Update documentation somewhat --- README.md | 4 +- docs/adding_a_script.md | 139 ----------------------------------- docs/adding_a_task.md | 87 ---------------------- docs/configuration.md | 43 ----------- docs/high_level_structure.md | 70 ++++-------------- 5 files changed, 15 insertions(+), 328 deletions(-) delete mode 100644 docs/adding_a_script.md delete mode 100644 docs/configuration.md diff --git a/README.md b/README.md index eed2d3d3..ee4754e7 100644 --- a/README.md +++ b/README.md @@ -31,13 +31,13 @@ installing `cupbearer`, in particular if you want to control CUDA version etc. ## Running experiments We provide scripts in `cupbearer.scripts` for more easily running experiments. -See [demo.ipynb](demo.ipynb) for a quick example of how to use them---this is likely +See [the demo notebook](notebooks/simple_demo.ipynb) for a quick example of how to use them---this is likely also the best way to get an overview of how the components of `cupbearer` fit together. These "scripts" are Python functions and designed to be used from within Python, e.g. in a Jupyter notebook or via [submitit](https://github.com/facebookincubator/submitit/tree/main) if on Slurm. But of course you could also write a simple Python wrapper and then use -them from the CLI. Their configuration interface is designed to be very general, +them from the CLI. The scripts are designed to be pretty general, which sometimes comes at the cost of being a bit verbose---we recommend writing helper functions for your specific use case on top of the general script interface. Of course you can also use the components of `cupbearer` directly without going through diff --git a/docs/adding_a_script.md b/docs/adding_a_script.md deleted file mode 100644 index 4c8b6723..00000000 --- a/docs/adding_a_script.md +++ /dev/null @@ -1,139 +0,0 @@ -# Creating new scripts -You don't need to implement any scripts your new task or detector needs using the -interface described in this document. However, it's designed to work well with the -rest of `cupbearer` and probably makes sense for most cases. - -As an overview, here's how to create a new script: -1. Put a python file in `scripts` with some function `my_function`. -2. The only argument to `my_function` should be an object of a dataclass `MyConfig` - that inherits from `utils.scripts.ScriptConfig`. -3. The definition of `MyConfig` needs to be placed in its own file. -4. Use `utils.scripts.run(my_function, MyConfig)` in the python file to run the script. -5. Now you'll be able to run the script from the command line using `python -m cupbearer.scripts.my_file`. - -The rest of this doc goes into some background to understand how scripts work -in `cupbearer`. - -## Example walkthrough -Let's look at `eval_detector.py`: -```python -from cupbearer.scripts.conf.eval_detector_conf import Config -from cupbearer.utils.scripts import run -from torch.utils.data import Subset - - -def main(cfg: Config): - reference_data = cfg.task.build_reference_data() - anomalous_data = cfg.task.build_anomalous_data() - if cfg.max_size: - reference_data = Subset(reference_data, range(cfg.max_size)) - anomalous_data = Subset(anomalous_data, range(cfg.max_size)) - model = cfg.task.build_model() - params = cfg.task.build_params() - detector = cfg.detector.build(model=model, params=params, save_dir=cfg.dir.path) - - detector.eval( - normal_dataset=reference_data, - anomalous_datasets={"anomalous": anomalous_data}, - ) - - -if __name__ == "__main__": - run(main, Config, save_config=False) -``` -There are two key things to note here: -- We have a function `main` that takes a single argument of type `Config`. (The name of `main` doesn't matter.) -- If the script is run as the main file, we call `run(main, Config)`. - -Actually, in this case, we also have `save_config=False` in the call to `run`. By default, -`run` will save the config as a yaml file, which this flag disables. - -Here is the definition of `Config`, in `conf/eval_detector_conf.py`: -```python -@dataclass(kw_only=True) -class Config(ScriptConfig): - task: TaskConfigBase = config_group(TaskConfigBase) - detector: DetectorConfig = config_group(DetectorConfig) - max_size: Optional[int] = None - - def _set_debug(self): - super()._set_debug() - self.max_size = 2 -``` -A few things to note: -- `Config` inherits from `ScriptConfig`. All script configurations should do this. -- `Config` is a dataclass, as all configs should be. -- For the fields that are themselves dataclasses, we use `config_group` as a default. - This lets users set these fields from the command line (where you otherwise couldn't - pass dataclasses as values). A config group is basically a dictionary mapping from - names (that users use on the CLI) to subclasses of some base class. For example, - `config_group(DetectorConfig)` means that users can choose any of the registered - detectors. If this detector has config options, these can also be set. - Config groups are discussed in more detail in [configuration.md](configuration.md). -- There's a `_set_debug` method. This is a special method that's called when the - `--debug` flag is passed to the script. It should set all values where this makes - sense to values that lead to a fast run. (For example, this flag is always used - in unit tests.) The `super()._set_debug()` call is important, since it ensures - that `_set_debug` is called recusively on all fields that support it. - Again see [configuration.md](configuration.md) for more details. - -## The `Config` definition needs to be in its own file -There is currently a technical limitation: the definition of the `Config` class -mustn't be in the same file as the script that users will call from the CLI. That's -why all the configs are in the `conf` folder. - -The reason for this is that serializing a configuration dataclass to yaml stores -the full path of the dataclass (in order to reliably deserialize it later). If the -dataclass is defined in the main script, that path will be `__main__.Config`, which -can then not be restored from a different script. - -## `ScriptConfig` -As mentioned above, all configs for scripts should inherit from `ScriptConfig`. -Let's look at `ScriptConfig` to understand the effects of that: -```python -@dataclass(kw_only=True) -class ScriptConfig(BaseConfig): - seed: int = 0 - dir: DirConfig = mutable_field(DirConfig) - debug: bool = field(action="store_true") - debug_with_logging: bool = field(action="store_true") - - ... -``` -(See [configuration.md](configuration.md) for more details on `BaseConfig`.) - -This is where the `debug` flag mentioned above is defined (the `field` here -is from `simple_parsing` and extends `dataclasses.field`). Apart from that, there's -a `seed` field, since basically every script will need that. - -Perhaps most interesting is the `dir` field. This is a `DirConfig`, which has three -fields: -```python -@dataclass(kw_only=True) -class DirConfig(BaseConfig): - base: Optional[str] = None - run: str = field( - default_factory=lambda: datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - ) - full: Optional[str] = None -``` -By default, `base` and `full` are `None`. This means that nothing will be logged to disk. -If `full` is set, then the path will always be `full`, no matter what `base` and `run` -are. Otherwise, if `base` is set, the path will be `base/run`. This can be useful if you -want to automatically generate new directories for each run without naming them all. -For example, -```bash -python -m cupbearer.scripts.train_detector --dir.base logs/train_detector ... -``` -would create a new directory `logs/train_detector//` for each run. - -While typically, `ScriptConfig.dir` is meant to be a newly created logging directory, -it can also sometimes take on the role of an input directory. For example, -```bash -python -m cupbearer.scripts.eval_detector --dir.full logs/train_detector/... --detector from_run ... -``` -would load a detector from the directory `logs/train_detector/...` and evaluate it -(since the `from_run` option for the detector config group uses the `dir` argument). - -How directories are handled is one of the places that seems most likely to change, -so try not to rely too much on the current version. diff --git a/docs/adding_a_task.md b/docs/adding_a_task.md index c7995771..b1e18ac2 100644 --- a/docs/adding_a_task.md +++ b/docs/adding_a_task.md @@ -1,88 +1 @@ # Adding a new task - -The only component that a task absolutely needs is an implementation of the -`TaskConfigBase` abstract class: -```python -class TaskConfigBase(BaseConfig, ABC): - @abstractmethod - def build_reference_data(self) -> Dataset: - pass - - @abstractmethod - def build_model(self) -> Model: - pass - - def build_params(self): - return None - - @abstractmethod - def build_anomalous_data(self) -> Dataset: - pass -``` -If your config has any parameters, you should use a dataclass to set them. E.g. -```python -@dataclass -class MyTaskConfig(TaskConfigBase): - my_required_param: str - my_optional_param: int = 42 - - ... -``` -This will automagically let you override these parameters from the command line -(and any parameters without default values will be required). - -`build_reference_data` and `build_anomalous_data` both need to return `pytorch` `Dataset`s. -`build_model` needs to return a `models.Model`, which is a special type of `flax.linen.Module`. -`build_params` can return a parameter dict for the returned `Model` (if `None`, the model -will be randomly initialized, which is usually not what you want). - -In practice, the datasets and the model will have to come from somewhere, so you'll -often implement a few things in addition to the task config class. There are predefined -interfaces for datasets and models, and if possible I suggest using those (either -using their existing implementations, or adding your own). For example, consider -the adversarial example task: -```python -@dataclass -class AdversarialExampleTask(TaskConfigBase): - run_path: Path - - def __post_init__(self): - self._reference_data = TrainDataFromRun(path=self.run_path) - self._anomalous_data = AdversarialExampleConfig(run_path=self.run_path) - self._model = StoredModel(path=self.run_path) - - def build_anomalous_data(self) -> Dataset: - return self._anomalous_data.build_dataset() - - def build_model(self) -> Model: - return self._model.build_model() - - def build_params(self) -> Model: - return self._model.build_params() - - def build_reference_data(self) -> Dataset: - return self._reference_data.build_dataset() -``` -This task only has one parameter, the path to the training run of a base model. -It then uses the training data of that run as reference data, and an adversarial -version of it as anomalous data. The model is just the trained base model, loaded -from disk. - -You can also add new scripts in the `scripts` directory, to generate the datasets -and/or train the model. For example, the adversarial examples task has an -associated script `make_adversarial_examples.py`. (To get the model, we can simply -use the existing `train_classifier.py` script.) - -There's no formal connection between scripts and the rest of the library---you can -leave it up to users to run the necessary preparatory scripts before using your new -task. But if feasible, you may want to automate this. For example, the `AdversarialExampleDataset` -automatically runs `make_adversarial_examples.py` if the necessary files are not found. - -Finally, you need to register your task to make it accessible from the command line -in the existing scripts. Simply add the task config class to the `TASKS` dict in `tasks/__init__.py` -(with an arbitrary name as the key). - -Then you should be able to run commands like -```bash -python -m cupbearer.scripts.train_detector --task my_task --detector my_detector --task.my_required_param foo -``` diff --git a/docs/configuration.md b/docs/configuration.md deleted file mode 100644 index 4d6db583..00000000 --- a/docs/configuration.md +++ /dev/null @@ -1,43 +0,0 @@ -# Configuration dataclasses -As briefly discussed in [high_level_structure.md](high_level_structure.md), `cupbearer` -makes heavy use of dataclasses for configuration. For the most part, these are just -normal dataclasses, but there are a few special things to keep in mind. - -## Inherit from `BaseConfig` -All configuration dataclasses should inherit from `cupbearer.utils.utils.BaseConfig`. -That ensures that storing configs to disk and loading them again will work correctly, -as well as a debug feature described below. - -Most dataclasses will not inherit from `BaseConfig` directly, but instead from a more -specialized class like `ScriptConfig` or `DatasetConfig`. - -## `kw_only=True` -Sometimes a parent dataclass will have some optional fields, and then a child class will -add required fields. This would usually lead to problems because required fields can't -come after optional ones. To deal with that, many dataclasses in `cupbearer` use -`@dataclass(kw_only=True)`, which makes all fields keyword-only arguments to `__init__`. - -## `_set_debug()` -It can be convenient to run a script with the fastest possible settings for debugging -error messages or for automated testing (e.g. just train for a single batch with -a single sample, use a small model, ...). In `cupbearer`, every configuration dataclass -should "know" how to set itself to such a debug mode: it should have a `_set_debug()` -method that sets all its fields to the debug values that lead to fast runs. Of course -if a config has no such values, it doesn't need to implement `_set_debug()`. - -Importantly, `_set_debug()` should also call `super()._set_debug()`. This ensures that -fields from the parent class are set to their debug values. It also recursively calls -`_set_debug()` on all fields that are themselves configuration dataclasses, so there's -no need to do that manually. - -## Special CLI fields -You can use `simple_parsing.helpers.field` instead of the builtin `dataclasses.field` -to get some additional functionality, most notably specifying how options can be changed -from the CLI. This will mostly be unnecessary, but can be nice for boolean flags. - -For example, the debug option described above is implemented using -```python -debug: bool = field(action="store_true") -``` -in `ScriptConfig`, which means you can call scripts using simply `--debug` instead of -`--debug True`. diff --git a/docs/high_level_structure.md b/docs/high_level_structure.md index 253d25ac..371cb4d4 100644 --- a/docs/high_level_structure.md +++ b/docs/high_level_structure.md @@ -3,32 +3,13 @@ In this document, we'll go over all the subpackages of `cupbearer` to see what r they play and how to extend them. For more details of extending `cupbearer`, see the other documentation files on specific subpackages. -## Configuration -Different parts of `cupbearer` interface with each other through many configuration -dataclasses. Each dataset, model, task, detector, script, etc. should expose all its -hyperparameters and configuration options through such a dataclass. That way, -all options will automatically be configurable from the command line. - -Many of the configuration dataclass ABCs have one or several `build()` methods that -create the actual object of interest based on the configuration. For example, -the `DetectorConfig` ABC has an abstract `build()` method that must return an -`AnomalyDetector` instance. - -See [configuration.md](configuration.md) for more details on the configuration -dataclasses and what to keep in mind when writing your own. - ## Helper subpackages ### `cupbearer.data` The `data` package contains implementations of basic datasets, transforms, and specialized datasets (e.g. datasets consisting only of adversarial examples). -The key interface is the `DatasetConfig` class. It has a `build()` method that -needs to return a pytorch `Dataset` instance. -In principle, you don't need to use the `DatasetConfig` interface (or anything -from the `data` package) to implement new tasks or detectors. Tasks and detectors -just pass `Dataset` instances between each other. But unless you have a good reason -to avoid the `DatasetConfig` interface, it's best to use it since it already works -with the scripts and you get some features such as configuring transforms for free. +Using this subpackage is optional, you can define tasks directly using standard +pytorch `Dataset`s. ### `cupbearer.models` Unlike the `data` package, you have to use the `models` package at the moment. @@ -37,53 +18,32 @@ to the model's activations. Using the implementations from the `models` package ensures a consistent way to get activations from models. As long as you don't want to add new model architectures, most of the details of this package won't matter. -For now, only linear computational graphs are supported, i.e. each model needs to -be a fixed sequence of computational steps performed one after the other -(like a `Sequential` module in many deep learning frameworks). A `Computation` -is just a type alias for such as sequence of steps. The `Model` class takes such a -`Computation` and is itself a `flax.linen.Module` that implements the computation. -The main thing it does on top of `flax.linen.Sequential` is that it can also return -all the activations of the model. It also has a function for plotting the architecture -of the model. - -Similar to the `DataConfig` interface, there's a `ModelConfig` with a `build()` -method that returns a `Model` instance. +In the future, we'll likely deprecate the `HookedModel` interface and just support +standard `torch.nn.Module`s via pytorch hooks. ### `cupbearer.utils` -The `utils` package contains many miscallaneous helper functions. You probably won't -interact with these too much, but here are a few that it may be good to know about: -- `utils.trainer` contains a `Trainer` class that's a very simple version of pytorch - lightning for flax. You certainly don't need to use this in any scripts you add, - but it may save you some boilerplate. NOTE: we might deprecate this in the future - and replace it with something like `elegy`. -- `utils.utils.save` and `utils.utils.load` can save and store pytrees. They use the - `orbax` checkpointer under the hood, but add some hacky support for saving/loading - types. - -We'll cover a few more functions from the `utils` package when we talk about scripts. +The `utils` package contains some miscallaneous helper functions. Most of these are +mainly for internal usage, but see the example notebooks for helpful ones. ## Tasks -The `tasks` package contains the `TaskConfigBase` ABC, which is the interface any -task needs to implement, as well as all the existing tasks. To add a new task: -1. Create a new module or subpackage in `tasks`, where you implement a new class - that inherits `TaskConfigBase`. -2. Add your new class to the `TASKS` dictionary in `tasks/__init__.py`. +The `tasks` package contains the `Task` class, which is the interface any +task needs to implement, as well as all the existing tasks. To add a new task, +you can either inherit `Task` or simply write a function that returns a `Task` instance. -Often, you'll also need to implement a new type of dataset or model. +Often, you'll also need to implement a new type of dataset or model for your task. That code probably belongs in the `data` and `model` packages, though sometimes it's a judgement call. See [adding_a_task.md](adding_a_task.md) for more details. ## Detectors -The `detectors` package is similar to `tasks`, but for anomaly detectors. In addition -to the `DetectorConfig` interface, it also contains an `AnomalyDetector` ABC, which -any detection method needs to subclass for its actual implementation. +The `detectors` package is similar to `tasks`, but for anomaly detectors. The key +interface is `AnomalyDetector`. See [adding_a_detector.md](adding_a_detector.md) for more details. ## Scripts -The `scripts` package contains command line scripts and their configurations. +The `scripts` package contains Python functions for running common workflows. Two scripts are meant to be used by all detectors/tasks: - `train_detector` trains a detector on a task and saves the trained detector to disk. - `eval_detector` evaluates a stored (or otherwise specified) detector and evaluates @@ -92,7 +52,3 @@ Two scripts are meant to be used by all detectors/tasks: All other scripts are helper scripts for specific tasks or detectors. For example, most tasks will need a script to train the model to be analyzed, and perhaps to prepare the dataset. - -There's a lot more to be said about scripts, see the [README](../README.md) for a brief -overview of *running* scripts, and [adding_a_script.md](adding_a_script.md) for details -on writing new scripts. From f9ab02b47e3d09b10c3dccc998f948550cc31d6e Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sat, 2 Mar 2024 18:29:52 -0800 Subject: [PATCH 22/25] Remove simple_parsing dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58026bf3..77edeb37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ dependencies = [ "torch~=2.0.0", "torchvision~=0.15.1", "torchattacks~=3.5.1", - "simple_parsing~=0.1.3", "lightning~=2.1.0", "torchmetrics~=1.2.0", "tensorboard", From d61c6762a15848c548cdf5a003ae26e5ed5c3218 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Sun, 3 Mar 2024 18:09:18 -0800 Subject: [PATCH 23/25] Adjust tampering/LM code to no-config style --- src/cupbearer/data/__init__.py | 4 +- src/cupbearer/data/huggingface.py | 22 +++-------- src/cupbearer/data/tampering.py | 61 +++++++++---------------------- src/cupbearer/models/__init__.py | 3 +- tests/test_tampering.py | 55 +++++++++++++++++++--------- 5 files changed, 62 insertions(+), 83 deletions(-) diff --git a/src/cupbearer/data/__init__.py b/src/cupbearer/data/__init__.py index fca090c3..08879ef3 100644 --- a/src/cupbearer/data/__init__.py +++ b/src/cupbearer/data/__init__.py @@ -8,9 +8,9 @@ NoiseBackdoor, WanetBackdoor, ) -from .huggingface import IMDBDatasetConfig +from .huggingface import IMDBDataset from .pytorch import CIFAR10, GTSRB, MNIST, PytorchDataset -from .tampering import TamperingDataConfig +from .tampering import TamperingDataset from .toy_ambiguous_features import ToyDataset from .transforms import ( GaussianNoise, diff --git a/src/cupbearer/data/huggingface.py b/src/cupbearer/data/huggingface.py index 25b06646..18499f12 100644 --- a/src/cupbearer/data/huggingface.py +++ b/src/cupbearer/data/huggingface.py @@ -1,27 +1,15 @@ -from dataclasses import dataclass - import datasets import torch -from . import DatasetConfig - class IMDBDataset(torch.utils.data.Dataset): - def __init__(self): - self.dataset = datasets.load_dataset("imdb", split="train") + def __init__(self, train: bool = True): + split = "train" if train else "validation" + self.dataset = datasets.load_dataset("imdb", split=split) def __len__(self): return len(self.dataset) def __getitem__(self, idx): - return self.dataset[idx]["text"], self.dataset[idx]["label"] - - -@dataclass -class IMDBDatasetConfig(DatasetConfig): - @property - def num_classes(self): - return 2 - - def _build(self): - return IMDBDataset() + sample = self.dataset[idx] + return sample["text"], sample["label"] diff --git a/src/cupbearer/data/tampering.py b/src/cupbearer/data/tampering.py index e51a6447..a9d3a9c2 100644 --- a/src/cupbearer/data/tampering.py +++ b/src/cupbearer/data/tampering.py @@ -1,16 +1,27 @@ -from dataclasses import dataclass -from typing import ClassVar - import torch from datasets import load_dataset -from . import DatasetConfig +TAMPERING_DATSETS = { + "diamonds": "redwoodresearch/diamonds-seed0", + "text_props": "redwoodresearch/text_properties", + "gen_stories": "redwoodresearch/generated_stories", +} class TamperingDataset(torch.utils.data.Dataset): - def __init__(self, dataset): + def __init__(self, name: str, train: bool = True): + # TODO: allow for local loading / saving super().__init__() - self.dataset = dataset + self.train = train + self.name = name + + hf_name = ( + TAMPERING_DATSETS[self.name] + if self.name in TAMPERING_DATSETS + else self.name + ) + split = "train" if self.train else "validation" + self.dataset = load_dataset(hf_name, split=split) def __getitem__(self, idx): sample = self.dataset[idx] @@ -22,41 +33,3 @@ def __getitem__(self, idx): def __len__(self): return len(self.dataset) - - -TAMPERING_DATSETS = { - "diamonds": "redwoodresearch/diamonds-seed0", - "text_props": "redwoodresearch/text_properties", - "gen_stories": "redwoodresearch/generated_stories", -} - - -@dataclass -class TamperingDataConfig(DatasetConfig): - n_sensors: ClassVar[int] = 3 # not configurable - train: bool = True # TODO: how does cupbearer use this? - name: str = None - - def __post_init__(self): - assert self.name, "must pass name argument" - return super().__post_init__() - - @property - def num_classes(self): - # only used for multi-class classification - return None - - @property - def num_labels(self): - # n sensors + all(sensors) - return self.n_sensors + 1 - - def _build(self) -> TamperingDataset: # TODO: allow for local loading / saving - name = ( - TAMPERING_DATSETS[self.name] - if self.name in TAMPERING_DATSETS - else self.name - ) - split = "train" if self.train else "validation" - dataset = load_dataset(name, split=split) - return TamperingDataset(dataset) diff --git a/src/cupbearer/models/__init__.py b/src/cupbearer/models/__init__.py index 7dd0f470..185e256a 100644 --- a/src/cupbearer/models/__init__.py +++ b/src/cupbearer/models/__init__.py @@ -2,11 +2,10 @@ from pathlib import Path import torch -from transformers.modeling_utils import PreTrainedModel -from transformers.tokenization_utils_base import PreTrainedTokenizerBase from .hooked_model import HookedModel from .models import CNN, MLP, PreActResNet +from .transformers_hf import TamperingPredictionTransformer def load(model: HookedModel, path: Path | str): diff --git a/tests/test_tampering.py b/tests/test_tampering.py index d3db1c14..f54fdbf9 100644 --- a/tests/test_tampering.py +++ b/tests/test_tampering.py @@ -1,41 +1,60 @@ import pytest +import torch from cupbearer import data, models from cupbearer.scripts import ( eval_classifier, train_classifier, ) -from cupbearer.scripts.conf import ( - eval_classifier_conf, - train_classifier_conf, -) @pytest.fixture(scope="module") -def measurement_predictor_path(module_tmp_path): - cfg = train_classifier_conf.DebugConfig( - model=models.TamperTransformerConfig(name="pythia-14m"), - train_data=data.TamperingDataConfig(name="redwoodresearch/diamonds-seed0"), +def pythia(): + transformer, tokenizer, emb_dim, max_len = models.transformers_hf.load_transformer( + "pythia-14m" + ) + return models.TamperingPredictionTransformer( + model=transformer, + tokenizer=tokenizer, + embed_dim=emb_dim, + max_length=max_len, + n_sensors=3, + ) + + +@pytest.fixture(scope="module") +def diamond(): + return torch.utils.data.Subset(data.TamperingDataset("diamonds"), range(10)) + + +@pytest.fixture(scope="module") +def measurement_predictor_path(pythia, diamond, module_tmp_path): + train_loader = torch.utils.data.DataLoader(diamond, batch_size=2) + + train_classifier( + train_loader=train_loader, + model=pythia, + num_labels=4, task="multilabel", path=module_tmp_path, + max_steps=1, + logger=False, ) - train_classifier(cfg) - assert (module_tmp_path / "config.yaml").is_file() assert (module_tmp_path / "checkpoints" / "last.ckpt").is_file() - assert (module_tmp_path / "tensorboard").is_dir() return module_tmp_path @pytest.mark.slow -def test_eval_classifier(measurement_predictor_path): - cfg = eval_classifier_conf.DebugConfig( +def test_eval_classifier(pythia, diamond, measurement_predictor_path): + models.load(pythia, measurement_predictor_path) + + eval_classifier( + data=diamond, + model=pythia, path=measurement_predictor_path, - data=data.TamperingDataConfig( - name="redwoodresearch/diamonds-seed0", train=False - ), + max_batches=1, + batch_size=2, ) - eval_classifier(cfg) - assert (measurement_predictor_path / "eval.json").is_file() From 565f45623debb07caa1a7a629b3feb981de37b6d Mon Sep 17 00:00:00 2001 From: Viktor Rehnberg Date: Mon, 4 Mar 2024 11:56:26 +0100 Subject: [PATCH 24/25] Add convenience method to clone WanetBackdoor instance --- src/cupbearer/data/backdoors.py | 45 ++++++++++++++++++++++++++++++++- tests/test_data.py | 28 ++++++++++++-------- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index 13bf9baa..2e34d9d3 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -1,8 +1,10 @@ +from __future__ import annotations + import os from abc import ABC from dataclasses import dataclass from pathlib import Path -from typing import Tuple +from typing import Optional, Tuple import torch import torch.nn.functional as F @@ -165,6 +167,47 @@ def control_grid(self, control_grid: torch.Tensor): self._control_grid = control_grid + def clone( + self, + *, + target_class: Optional[int] = None, + path: Optional[Path | str] = None, + p_backdoor: Optional[float] = None, + p_noise: Optional[float] = None, + warping_strength: Optional[float] = None, + grid_rescale: Optional[float] = None, + ) -> WanetBackdoor: + """Create a new instance but with the same control_grid as current instance.""" + other = type(self)( + path=(path if path is not None else self.path), + p_backdoor=(p_backdoor if p_backdoor is not None else self.p_backdoor), + p_noise=(p_noise if p_noise is not None else self.p_noise), + target_class=( + target_class if target_class is not None else self.target_class + ), + control_grid_width=self.control_grid_width, + warping_strength=( + warping_strength + if warping_strength is not None + else self.warping_strength + ), + grid_rescale=( + grid_rescale if grid_rescale is not None else self.grid_rescale + ), + ) + logger.debug("Setting control grid of clone from instance.") + assert self._warping_field is None + other.control_grid = ( + self.control_grid * other.warping_strength / self.warping_strength + ) + return other + + path: Path | str | None + p_noise: float = 0.0 # Probability of non-backdoor warping + control_grid_width: int = 4 # Side length of unscaled warping field + warping_strength: float = 0.5 # Strength of warping effect + grid_rescale: float = 1.0 # Factor to rescale grid from warping effect + @property def warping_field(self) -> torch.Tensor: if self._warping_field is None: diff --git a/tests/test_data.py b/tests/test_data.py index b323b89d..9e66ba04 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,4 +1,5 @@ import functools +import itertools from dataclasses import dataclass import numpy as np @@ -154,29 +155,26 @@ def test_wanet_backdoor(clean_image_dataset): # Pick a target class outside the actual range so we can later tell whether it # was set correctly. target_class = 10_000 + backdoor = data.backdoors.WanetBackdoor( + path=None, + p_backdoor=0.0, + target_class=target_class, + ) clean_data = data.BackdoorDataset( original=clean_image_dataset, - backdoor=data.backdoors.WanetBackdoor( - path=None, - p_backdoor=0.0, - target_class=target_class, - ), + backdoor=backdoor, ) anomalous_data = data.BackdoorDataset( original=clean_image_dataset, - backdoor=data.backdoors.WanetBackdoor( - path=None, + backdoor=backdoor.clone( p_backdoor=1.0, - target_class=target_class, ), ) noise_data = data.BackdoorDataset( original=clean_image_dataset, - backdoor=data.backdoors.WanetBackdoor( - path=None, + backdoor=backdoor.clone( p_backdoor=0.0, p_noise=1.0, - target_class=target_class, ), ) for ( @@ -202,6 +200,14 @@ def test_wanet_backdoor(clean_image_dataset): assert torch.max(clean_img) <= 1 assert torch.max(anoma_img) <= 1 assert torch.max(noise_img) <= 1 + for ds1, ds2 in itertools.combinations( + [clean_data, anomalous_data, noise_data], + r=2, + ): + assert torch.allclose( + ds1.backdoor.warping_field, + ds2.backdoor.warping_field, + ) def test_wanet_backdoor_on_multiple_workers( From 2c1b38cbfdb7e0f7c2e8b22edc9b7eed4f11ebb4 Mon Sep 17 00:00:00 2001 From: Erik Jenner Date: Mon, 4 Mar 2024 12:18:29 -0800 Subject: [PATCH 25/25] Minor changes to WaNet cloning --- src/cupbearer/data/backdoors.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/cupbearer/data/backdoors.py b/src/cupbearer/data/backdoors.py index 2e34d9d3..9f3b9b16 100644 --- a/src/cupbearer/data/backdoors.py +++ b/src/cupbearer/data/backdoors.py @@ -171,7 +171,6 @@ def clone( self, *, target_class: Optional[int] = None, - path: Optional[Path | str] = None, p_backdoor: Optional[float] = None, p_noise: Optional[float] = None, warping_strength: Optional[float] = None, @@ -179,7 +178,7 @@ def clone( ) -> WanetBackdoor: """Create a new instance but with the same control_grid as current instance.""" other = type(self)( - path=(path if path is not None else self.path), + path=self.path, p_backdoor=(p_backdoor if p_backdoor is not None else self.p_backdoor), p_noise=(p_noise if p_noise is not None else self.p_noise), target_class=( @@ -202,12 +201,6 @@ def clone( ) return other - path: Path | str | None - p_noise: float = 0.0 # Probability of non-backdoor warping - control_grid_width: int = 4 # Side length of unscaled warping field - warping_strength: float = 0.5 # Strength of warping effect - grid_rescale: float = 1.0 # Factor to rescale grid from warping effect - @property def warping_field(self) -> torch.Tensor: if self._warping_field is None: