diff --git a/botorch/acquisition/thompson_sampling.py b/botorch/acquisition/thompson_sampling.py index ad596bc2d5..04c0d8f934 100644 --- a/botorch/acquisition/thompson_sampling.py +++ b/botorch/acquisition/thompson_sampling.py @@ -6,10 +6,16 @@ import torch from botorch.acquisition.analytic import AcquisitionFunction -from botorch.acquisition.objective import PosteriorTransform +from botorch.acquisition.objective import ( + IdentityMCObjective, + MCAcquisitionObjective, + PosteriorTransform, +) +from botorch.exceptions.errors import UnsupportedError +from botorch.models.deterministic import GenericDeterministicModel from botorch.models.model import Model from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model -from botorch.utils.transforms import t_batch_mode_transform +from botorch.utils.transforms import is_ensemble, t_batch_mode_transform from torch import Tensor @@ -32,55 +38,151 @@ class PathwiseThompsonSampling(AcquisitionFunction): def __init__( self, model: Model, + objective: MCAcquisitionObjective | None = None, posterior_transform: PosteriorTransform | None = None, ) -> None: r"""Single-outcome TS. + If using a multi-output `model`, the acquisition function requires either an + `objective` or a `posterior_transform` that transforms the multi-output + posterior samples to single-output posterior samples. + Args: model: A fitted GP model. - posterior_transform: A PosteriorTransform. If using a multi-output model, - a PosteriorTransform that transforms the multi-output posterior into a - single-output posterior is required. + objective: The MCAcquisitionObjective under which the samples are + evaluated. Defaults to `IdentityMCObjective()`. + posterior_transform: An optional PosteriorTransform. """ - if model._is_fully_bayesian: - raise NotImplementedError( - "PathwiseThompsonSampling is not supported for fully Bayesian models", - ) super().__init__(model=model) self.batch_size: int | None = None - - def redraw(self) -> None: + self.samples: GenericDeterministicModel | None = None + self.ensemble_indices: Tensor | None = None + + # NOTE: This conditional block is copied from MCAcquisitionFunction, we should + # consider inherting from it and e.g. getting the X_pending logic as well. + if objective is None and model.num_outputs != 1: + if posterior_transform is None: + raise UnsupportedError( + "Must specify an objective or a posterior transform when using " + "a multi-output model." + ) + elif not posterior_transform.scalarize: + raise UnsupportedError( + "If using a multi-output model without an objective, " + "posterior_transform must scalarize the output." + ) + if objective is None: + objective = IdentityMCObjective() + self.objective = objective + self.posterior_transform = posterior_transform + + def redraw(self, batch_size: int) -> None: + sample_shape = (batch_size,) self.samples = get_matheron_path_model( - model=self.model, sample_shape=torch.Size([self.batch_size]) + model=self.model, sample_shape=torch.Size(sample_shape) ) + if is_ensemble(self.model): + # the ensembling dimension is assumed to be part of the batch shape + model_batch_shape = self.model.batch_shape + if len(model_batch_shape) > 1: + raise NotImplementedError( + "Ensemble models with more than one ensemble dimension are not " + "yet supported." + ) + num_ensemble = model_batch_shape[0] + # ensemble_indices is cached here to ensure that the acquisition function + # becomes deterministic for the same input and can be optimized with LBFGS. + # ensemble_indices is used in select_from_ensemble_models. + self.ensemble_indices = torch.randint( + 0, + num_ensemble, + (*sample_shape, 1, self.model.num_outputs), + ) @t_batch_mode_transform() def forward(self, X: Tensor) -> Tensor: r"""Evaluate the pathwise posterior sample draws on the candidate set X. Args: - X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design points. + X: A `batch_shape x q x d`-dim batched tensor of `d`-dim design points. Returns: - A `(b1 x ... bk) x [num_models for fully bayesian]`-dim tensor of - evaluations on the posterior sample draws. + A `batch_shape`-dim tensor of evaluations on the posterior sample draws, + where the samples are summed over the q-batch dimension. """ - batch_size = X.shape[-2] - q_dim = -2 + objective_values = self._pathwise_forward(X) # batch_shape x q + # NOTE: The current implementation sums over the q-batch dimension, which means + # that we are optimizing the sum of independent Thompson samples. In the future, + # we can leverage *batched* L-BFGS optimization, rather than summing over the q + # dimension, which will guarantee descent steps for all members of the batch + # through batch-member-specific learning rate selection. + return objective_values.sum(-1) # batch_shape + def _pathwise_forward(self, X: Tensor) -> Tensor: + """Evaluate the pathwise posterior sample draws on the candidate set X. + + Args: + X: A `batch_shape x q x d`-dim batched tensor of `d`-dim design points. + + Returns: + A `batch_shape x q`-dim tensor of evaluations on the posterior sample draws. + """ + batch_size = X.shape[-2] # batch_shape x q x 1 x d X = X.unsqueeze(-2) - if self.batch_size is None: + if self.samples is None: self.batch_size = batch_size - self.redraw() - elif self.batch_size != batch_size: + self.redraw(batch_size=batch_size) + + if self.batch_size != batch_size: raise ValueError( BATCH_SIZE_CHANGE_ERROR.format(self.batch_size, batch_size) ) + # batch_shape x q [x num_ensembles] x 1 x m + posterior_values = self.samples(X) + # batch_shape x q [x num_ensembles] x m + posterior_values = posterior_values.squeeze(-2) - # posterior_values.shape post-squeeze: # batch_shape x q x m - posterior_values = self.samples(X).squeeze(-2) - # sum over batch dim and squeeze num_objectives dim (-1) - return posterior_values.sum(q_dim).squeeze(-1) + posterior_values = self.select_from_ensemble_models(values=posterior_values) + + if self.posterior_transform: + posterior_values = self.posterior_transform.evaluate(posterior_values) + # objective removes the `m` dimension + objective_values = self.objective(posterior_values) # batch_shape x q + return objective_values + + def select_from_ensemble_models(self, values: Tensor): + """Subselecting a value associated with a single sample in the ensemble for each + element of samples that is not associated with an ensemble dimension. + + NOTE: 1) uses `self.model` and `is_ensemble` to determine whether or not an + ensembling dimension is present. 2) uses `self.ensemble_indices` to select the + value associated with a single sample in the ensemble. `ensemble_indices` + contains uniformly randomly sample indices for each element of the ensemble, but + is cached to make the evaluation of the acquisition function deterministic. + + Args: + values: A `batch_shape x num_draws x q [x num_ensemble] x m`-dim Tensor. + + Returns: + A`batch_shape x num_draws x q x m`-dim where each element is contains a + single sample from the ensemble, selected with `self.ensemble_indices`. + """ + if not is_ensemble(self.model): + return values + + ensemble_dim = -2 + # `ensemble_indices` are fixed so that the acquisition function becomes + # deterministic for the same input and can be optimized with LBFGS. + # ensemble indices have shape num_paths x 1 x m + self.ensemble_indices = self.ensemble_indices.to(device=values.device) + index = self.ensemble_indices + input_batch_shape = values.shape[:-3] + index = index.expand(*input_batch_shape, *index.shape) + # samples is batch_shape x q x num_ensemble x m + values_wo_ensemble = torch.gather(values, dim=ensemble_dim, index=index) + return values_wo_ensemble.squeeze( + ensemble_dim + ) # removing the ensemble dimension diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py index a930488680..00fbe35291 100644 --- a/botorch/acquisition/utils.py +++ b/botorch/acquisition/utils.py @@ -575,7 +575,11 @@ def get_optimal_samples( else: sample_transform = None - paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima])) + paths = get_matheron_path_model( + model=model, + sample_shape=torch.Size([num_optima]), + ensemble_as_batch=True, + ) optimal_inputs, optimal_outputs = optimize_posterior_samples( paths=paths, bounds=bounds, diff --git a/botorch/models/deterministic.py b/botorch/models/deterministic.py index 7ec568caed..4be43664d6 100644 --- a/botorch/models/deterministic.py +++ b/botorch/models/deterministic.py @@ -64,7 +64,12 @@ class GenericDeterministicModel(DeterministicModel): >>> model = GenericDeterministicModel(f) """ - def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None: + def __init__( + self, + f: Callable[[Tensor], Tensor], + num_outputs: int = 1, + batch_shape: torch.Size | None = None, + ) -> None: r""" Args: f: A callable mapping a `batch_shape x n x d`-dim input tensor `X` @@ -75,6 +80,12 @@ def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None: super().__init__() self._f = f self._num_outputs = num_outputs + self._batch_shape = batch_shape + + @property + def batch_shape(self) -> torch.Size | None: + r"""The batch shape of the model.""" + return self._batch_shape def subset_output(self, idcs: list[int]) -> GenericDeterministicModel: r"""Subset the model along the output dimension. @@ -100,7 +111,19 @@ def forward(self, X: Tensor) -> Tensor: Returns: A `batch_shape x n x m`-dimensional output tensor. """ - return self._f(X) + Y = self._f(X) + batch_shape = Y.shape[:-2] + # allowing for old behavior of not specifying the batch_shape + if self.batch_shape is not None: + try: + torch.broadcast_shapes(self.batch_shape, batch_shape) + except RuntimeError: + raise ValueError( + "GenericDeterministicModel was initialized with batch_shape=" + f"{self.batch_shape=} but the output of f has a batch_shape=" + f"{batch_shape=} that is not broadcastable with it." + ) + return Y class AffineDeterministicModel(DeterministicModel): diff --git a/botorch/sampling/pathwise/paths.py b/botorch/sampling/pathwise/paths.py index 0b64792502..175739112a 100644 --- a/botorch/sampling/pathwise/paths.py +++ b/botorch/sampling/pathwise/paths.py @@ -6,7 +6,7 @@ from __future__ import annotations -from abc import ABC +from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Iterator, Mapping from typing import Any @@ -24,6 +24,16 @@ class SamplePath(ABC, TransformedModuleMixin, Module): r"""Abstract base class for Botorch sample paths.""" + @abstractmethod + def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None: + """Sets whether the ensemble dimension is considered as a batch dimension. + + Args: + ensemble_as_batch: Whether the ensemble dimension is considered as a batch + dimension or not. + """ + pass # pragma: no cover + class PathDict(SamplePath): r"""A dictionary of SamplePaths.""" @@ -84,6 +94,16 @@ def __getitem__(self, key: str) -> SamplePath: def __setitem__(self, key: str, val: SamplePath) -> None: self.paths[key] = val + def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None: + """Sets whether the ensemble dimension is considered as a batch dimension. + + Args: + ensemble_as_batch: Whether the ensemble dimension is considered as a batch + dimension or not. + """ + for path in self.paths.values(): + path.set_ensemble_as_batch(ensemble_as_batch) + class PathList(SamplePath): r"""A list of SamplePaths.""" @@ -136,6 +156,16 @@ def __getitem__(self, key: int) -> SamplePath: def __setitem__(self, key: int, val: SamplePath) -> None: self.paths[key] = val + def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None: + """Sets whether the ensemble dimension is considered as a batch dimension. + + Args: + ensemble_as_batch: Whether the ensemble dimension is considered as a batch + dimension or not. + """ + for path in self.paths: + path.set_ensemble_as_batch(ensemble_as_batch) + class GeneralizedLinearPath(SamplePath): r"""A sample path in the form of a generalized linear model.""" @@ -147,6 +177,8 @@ def __init__( bias_module: Module | None = None, input_transform: TInputTransform | None = None, output_transform: TOutputTransform | None = None, + is_ensemble: bool = False, + ensemble_as_batch: bool = False, ): r"""Initializes a GeneralizedLinearPath instance. @@ -157,10 +189,17 @@ def __init__( Args: feature_map: A map used to featurize the module's inputs. - weight: A tensor of weights used to combine input features. + weight: A tensor of weights used to combine input features. When generated + with `draw_kernel_feature_paths`, `weight` is a Tensor with the shape + `sample_shape x batch_shape x num_outputs`. bias_module: An optional module used to define additive offsets. input_transform: An optional input transform for the module. output_transform: An optional output transform for the module. + is_ensemble: Whether the associated model is an ensemble model or not. + ensemble_as_batch: Whether the ensemble dimension is added as a batch + dimension or not. If `True`, the ensemble dimension is treated as a + batch dimension, which allows for the joint optimization of all members + of the ensemble. """ super().__init__() self.feature_map = feature_map @@ -170,8 +209,36 @@ def __init__( self.bias_module = bias_module self.input_transform = input_transform self.output_transform = output_transform + self.is_ensemble = is_ensemble + self.ensemble_as_batch = ensemble_as_batch def forward(self, x: Tensor, **kwargs) -> Tensor: + """Evaluates the path. + + Args: + x: The input tensor of shape `batch_shape x [num_ensemble x] q x d`, where + `num_ensemble` is the number of ensemble members and is required to + *only* be included if `is_ensemble=True` and `ensemble_as_batch=True`. + kwargs: Additional keyword arguments passed to the feature map. + + Returns: + A tensor of shape `batch_shape x [num_ensemble x] q x m`, where `m` is the + number of outputs, where `num_ensemble` is only included if `is_ensemble` + is `True`, and regardless of whether `ensemble_as_batch` is `True` or not. + """ + if self.is_ensemble and not self.ensemble_as_batch: + # assuming that the ensembling dimension is added after (n, d), but + # before the other batch dimensions, starting from the left. + x = x.unsqueeze(-3) feat = self.feature_map(x, **kwargs) out = (feat @ self.weight.unsqueeze(-1)).squeeze(-1) return out if self.bias_module is None else out + self.bias_module(x) + + def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None: + """Sets whether the ensemble dimension is considered as a batch dimension. + + Args: + ensemble_as_batch: Whether the ensemble dimension is considered as a batch + dimension or not. + """ + self.ensemble_as_batch = ensemble_as_batch diff --git a/botorch/sampling/pathwise/posterior_samplers.py b/botorch/sampling/pathwise/posterior_samplers.py index 33c8d5e029..09d14f114a 100644 --- a/botorch/sampling/pathwise/posterior_samplers.py +++ b/botorch/sampling/pathwise/posterior_samplers.py @@ -87,7 +87,7 @@ def __init__( def get_matheron_path_model( - model: GP, sample_shape: Size | None = None + model: GP, sample_shape: Size | None = None, ensemble_as_batch: bool = False ) -> GenericDeterministicModel: r"""Generates a deterministic model using a single Matheron path drawn from the model's posterior. @@ -102,12 +102,19 @@ def get_matheron_path_model( deterministic model will behave as if the `sample_shape` is prepended to the `batch_shape` of the model. The inputs used to evaluate the model must be adjusted to match. + ensemble_as_batch: If True, and model is an ensemble model, the resuling path + model will treat the ensemble dimension as a batch dimension, which means + that its inputs have to contain the ensemble dimension in the -3 position, + i.e. `batch_shape x ensemble_size x q x d`. This is used when optimizing the + paths of all members of an ensemble jointly, with distinct optima for each + member of the ensemble. Returns: A deterministic model that evaluates the Matheron path. """ sample_shape = Size() if sample_shape is None else sample_shape path = draw_matheron_paths(model, sample_shape=sample_shape) + path.set_ensemble_as_batch(ensemble_as_batch) num_outputs = model.num_outputs if isinstance(model, ModelList) and len(model.models) != num_outputs: raise UnsupportedError("A model-list of multi-output models is not supported.") @@ -121,7 +128,7 @@ def f(X: Tensor) -> Tensor: the model batch shape. Returns: - The output tensor of shape `batch_shape x q x m`. + The output tensor of shape `[sample_shape x] batch_shape x q x m`. """ if num_outputs == 1: # For single-output, we lack the output dimension. Add one. @@ -137,7 +144,11 @@ def f(X: Tensor) -> Tensor: res = path(X.unsqueeze(-3)).transpose(-1, -2) return res - path_model = GenericDeterministicModel(f=f, num_outputs=num_outputs) + path_model = GenericDeterministicModel( + f=f, + num_outputs=num_outputs, + batch_shape=sample_shape + model.batch_shape, + ) path_model._is_ensemble = is_ensemble(model) or len(sample_shape) > 0 return path_model diff --git a/botorch/sampling/pathwise/prior_samplers.py b/botorch/sampling/pathwise/prior_samplers.py index 9fe7bb46ba..37e152567c 100644 --- a/botorch/sampling/pathwise/prior_samplers.py +++ b/botorch/sampling/pathwise/prior_samplers.py @@ -24,6 +24,7 @@ ) from botorch.utils.dispatcher import Dispatcher from botorch.utils.sampling import draw_sobol_normal_samples +from botorch.utils.transforms import is_ensemble from gpytorch.kernels import Kernel from gpytorch.models import ApproximateGP, ExactGP, GP from gpytorch.variational import _VariationalStrategy @@ -61,6 +62,7 @@ def _draw_kernel_feature_paths_fallback( input_transform: TInputTransform | None = None, output_transform: TOutputTransform | None = None, weight_generator: Callable[[Size], Tensor] | None = None, + is_ensemble: bool = False, ) -> GeneralizedLinearPath: # Generate a kernel feature map feature_map = map_generator( @@ -71,6 +73,7 @@ def _draw_kernel_feature_paths_fallback( # Sample random weights with which to combine kernel features if weight_generator is None: + # weight is sample_shape x batch_shape x num_outputs weight = draw_sobol_normal_samples( n=sample_shape.numel() * covar_module.batch_shape.numel(), d=feature_map.num_outputs, @@ -89,6 +92,7 @@ def _draw_kernel_feature_paths_fallback( bias_module=mean_module, input_transform=input_transform, output_transform=output_transform, + is_ensemble=is_ensemble, ) @@ -103,6 +107,7 @@ def _draw_kernel_feature_paths_ExactGP( covar_module=model.covar_module, input_transform=get_input_transform(model), output_transform=get_output_transform(model), + is_ensemble=is_ensemble(model), **kwargs, ) @@ -150,5 +155,6 @@ def _draw_kernel_feature_paths_ApproximateGP_fallback( num_inputs=num_inputs, mean_module=model.mean_module, covar_module=model.covar_module, + is_ensemble=is_ensemble(model), **kwargs, ) diff --git a/botorch/sampling/pathwise/update_strategies.py b/botorch/sampling/pathwise/update_strategies.py index 7d92e04a1a..f78cb5535f 100644 --- a/botorch/sampling/pathwise/update_strategies.py +++ b/botorch/sampling/pathwise/update_strategies.py @@ -13,6 +13,7 @@ from typing import Any import torch + from botorch.models.approximate_gp import ApproximateGPyTorchModel from botorch.models.transforms.input import InputTransform from botorch.sampling.pathwise.features import KernelEvaluationMap @@ -24,6 +25,7 @@ TInputTransform, ) from botorch.utils.dispatcher import Dispatcher +from botorch.utils.transforms import is_ensemble from botorch.utils.types import DEFAULT from gpytorch.kernels.kernel import Kernel from gpytorch.likelihoods import _GaussianLikelihoodBase, Likelihood @@ -79,6 +81,7 @@ def _gaussian_update_exact( noise_covariance: Tensor | LinearOperator | None = None, scale_tril: Tensor | LinearOperator | None = None, input_transform: TInputTransform | None = None, + is_ensemble: bool = False, ) -> GeneralizedLinearPath: # Prepare Cholesky factor of `Cov(y, y)` and noise sample values as needed if isinstance(noise_covariance, (NoneType, ZeroLinearOperator)): @@ -103,7 +106,9 @@ def _gaussian_update_exact( points=points, input_transform=input_transform, ) - return GeneralizedLinearPath(feature_map=feature_map, weight=weight.squeeze(-1)) + return GeneralizedLinearPath( + feature_map=feature_map, weight=weight.squeeze(-1), is_ensemble=is_ensemble + ) @GaussianUpdate.register(ExactGP, _GaussianLikelihoodBase) @@ -134,6 +139,7 @@ def _gaussian_update_ExactGP( noise_covariance=noise_covariance, scale_tril=scale_tril, input_transform=get_input_transform(model), + is_ensemble=is_ensemble(model), ) @@ -194,4 +200,5 @@ def _gaussian_update_ApproximateGP_VariationalStrategy( sample_values=sample_values, scale_tril=L, input_transform=input_transform, + is_ensemble=is_ensemble(model), ) diff --git a/botorch/utils/sampling.py b/botorch/utils/sampling.py index 7066578b9d..80d85250b1 100644 --- a/botorch/utils/sampling.py +++ b/botorch/utils/sampling.py @@ -996,6 +996,11 @@ def sparse_to_dense_constraints( return A, b +# This is only used in get_optimal_samples, which in turn is only used in the input +# constructors of +# 1) qJointEntropySearch, +# 2) qSelfCorrectingBayesianOptimization, and +# 3) qTestSetInformationGain. def optimize_posterior_samples( paths: GenericDeterministicModel, bounds: Tensor, @@ -1037,7 +1042,7 @@ def path_func(x) -> Tensor: bounds=bounds, ) # queries all samples on all candidates - output shape - # raw_samples * num_optima * num_models + # raw_samples x num_optima x num_models candidate_queries = path_func(candidate_set) argtop_k = torch.topk(candidate_queries, num_restarts, dim=-1).indices X_top_k = candidate_set[argtop_k, :] @@ -1056,8 +1061,8 @@ def path_func(x) -> Tensor: # For each sample (and possibly for every model in the batch of models), this # retrieves the argmax. We flatten, pick out the indices and then reshape to # the original batch shapes (so instead of pickig out the argmax of a - # (3, 7, num_restarts, D)) along the num_restarts dim, we pick it out of a - # (21, num_restarts, D) + # (num_optima, num_models, num_restarts, D)-shaped Tensor along the num_restarts + # dim, we pick it out of (num_optima * num_models, num_restarts, D) final_shape = candidate_queries.shape[:-1] X_opt = X_top_k.reshape(final_shape.numel(), num_restarts, -1)[ torch.arange(final_shape.numel()), arg_opt.flatten() diff --git a/botorch/utils/test_helpers.py b/botorch/utils/test_helpers.py index 1d7ef928b6..c6d4be6e62 100644 --- a/botorch/utils/test_helpers.py +++ b/botorch/utils/test_helpers.py @@ -39,19 +39,6 @@ from torch.nn.functional import pad -def _get_mcmc_samples(num_samples: int, dim: int, infer_noise: bool, **tkwargs): - mcmc_samples = { - "lengthscale": 1 + torch.rand(num_samples, 1, dim, **tkwargs), - "outputscale": 1 + torch.rand(num_samples, **tkwargs), - "mean": torch.randn(num_samples, **tkwargs), - } - if infer_noise: - mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs) - mcmc_samples["lengthscale"] = mcmc_samples["lengthscale"] - - return mcmc_samples - - def get_model( train_X: Tensor, train_Y: Tensor, @@ -93,8 +80,8 @@ def get_fully_bayesian_model( train_X: Tensor, train_Y: Tensor, num_models: int, - standardize_model: bool, - infer_noise: bool, + standardize_model: bool = False, + infer_noise: bool = True, **tkwargs: Any, ) -> SaasFullyBayesianSingleTaskGP: num_objectives = train_Y.shape[-1] @@ -122,6 +109,20 @@ def get_fully_bayesian_model( return model +def _get_mcmc_samples( + num_samples: int, dim: int, infer_noise: bool, **tkwargs +) -> dict[str, Tensor]: + mcmc_samples = { + "lengthscale": 1 + torch.rand(num_samples, 1, dim, **tkwargs), + "outputscale": 1 + torch.rand(num_samples, **tkwargs), + "mean": torch.randn(num_samples, **tkwargs), + } + if infer_noise: + mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs) + + return mcmc_samples + + def get_fully_bayesian_model_list( train_X: Tensor, train_Y: Tensor, diff --git a/test/acquisition/test_thompson_sampling.py b/test/acquisition/test_thompson_sampling.py index 92ee6063e3..54619ce5b2 100644 --- a/test/acquisition/test_thompson_sampling.py +++ b/test/acquisition/test_thompson_sampling.py @@ -6,47 +6,23 @@ from itertools import product +from unittest import mock +from unittest.mock import PropertyMock + import torch +from botorch.acquisition.objective import ( + IdentityMCObjective, + ScalarizedPosteriorTransform, +) from botorch.acquisition.thompson_sampling import PathwiseThompsonSampling -from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP +from botorch.exceptions.errors import UnsupportedError from botorch.models.model import Model -from botorch.utils.test_helpers import get_model +from botorch.utils.test_helpers import get_fully_bayesian_model, get_model from botorch.utils.testing import BotorchTestCase -def _get_mcmc_samples(num_samples: int, dim: int, infer_noise: bool, **tkwargs): - mcmc_samples = { - "lengthscale": torch.rand(num_samples, 1, dim, **tkwargs), - "outputscale": torch.rand(num_samples, **tkwargs), - "mean": torch.randn(num_samples, **tkwargs), - } - if infer_noise: - mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs) - return mcmc_samples - - -def get_fully_bayesian_model( - train_X, - train_Y, - num_models, - **tkwargs, -): - model = SaasFullyBayesianSingleTaskGP( - train_X=train_X, - train_Y=train_Y, - ) - mcmc_samples = _get_mcmc_samples( - num_samples=num_models, - dim=train_X.shape[-1], - infer_noise=True, - **tkwargs, - ) - model.load_mcmc_samples(mcmc_samples) - return model - - class TestPathwiseThompsonSampling(BotorchTestCase): - def _test_thompson_sampling_base(self, model: Model): + def _test_thompson_sampling_base(self, model: Model) -> None: acq = PathwiseThompsonSampling( model=model, ) @@ -59,11 +35,43 @@ def _test_thompson_sampling_base(self, model: Model): acq_pass1 = acq(test_X) self.assertAllClose(acq_pass1, acq(test_X)) - acq.redraw() + acq.redraw(batch_size=acq.batch_size) acq_pass2 = acq(test_X) self.assertFalse(torch.allclose(acq_pass1, acq_pass2)) - def _test_thompson_sampling_batch(self, model: Model): + def _test_thompson_sampling_multi_output(self, model: Model) -> None: + # using multi-output model with a posterior transform + with self.assertRaisesRegex( + UnsupportedError, + "Must specify an objective or a posterior transform when using ", + ): + PathwiseThompsonSampling(model=model) + + X_observed = model.train_inputs[0] + input_dim = X_observed.shape[-1] + tkwargs = {"device": self.device, "dtype": X_observed.dtype} + test_X = torch.rand(4, 1, input_dim, **tkwargs) + weigths = torch.ones(2, **tkwargs) + posterior_transform = ScalarizedPosteriorTransform(weights=weigths) + acqf = PathwiseThompsonSampling( + model=model, posterior_transform=posterior_transform + ) + self.assertIsInstance(acqf.objective, IdentityMCObjective) + # testing that the acquisition function is deterministic and executes + # with the posterior transform + acq_val = acqf(test_X) + acq_val_2 = acqf(test_X) + self.assertAllClose(acq_val, acq_val_2) + + posterior_transform.scalarize = False + with self.assertRaisesRegex( + UnsupportedError, "posterior_transform must scalarize the output" + ): + PathwiseThompsonSampling( + model=model, posterior_transform=posterior_transform + ) + + def _test_thompson_sampling_batch(self, model: Model) -> None: X_observed = model.train_inputs[0] input_dim = X_observed.shape[-1] batch_acq = PathwiseThompsonSampling( @@ -92,27 +100,48 @@ def _test_thompson_sampling_batch(self, model: Model): def test_thompson_sampling_single_task(self): input_dim = 2 - num_objectives = 1 for dtype, standardize_model in product( (torch.float32, torch.float64), (True, False) ): tkwargs = {"device": self.device, "dtype": dtype} train_X = torch.rand(4, input_dim, **tkwargs) + num_objectives = 1 train_Y = 10 * torch.rand(4, num_objectives, **tkwargs) model = get_model(train_X, train_Y, standardize_model=standardize_model) self._test_thompson_sampling_base(model) self._test_thompson_sampling_batch(model) + # multi-output model + num_objectives = 2 + train_Y = 10 * torch.rand(4, num_objectives, **tkwargs) + model = get_model(train_X, train_Y, standardize_model=standardize_model) + self._test_thompson_sampling_multi_output(model) + def test_thompson_sampling_fully_bayesian(self): input_dim = 2 num_objectives = 1 tkwargs = {"device": self.device, "dtype": torch.float64} train_X = torch.rand(4, input_dim, **tkwargs) train_Y = 10 * torch.rand(4, num_objectives, **tkwargs) - fb_model = get_fully_bayesian_model(train_X, train_Y, num_models=3, **tkwargs) - with self.assertRaisesRegex( - NotImplementedError, - "PathwiseThompsonSampling is not supported for fully Bayesian models", - ): - PathwiseThompsonSampling(model=fb_model) + acqf = PathwiseThompsonSampling(model=fb_model) + acqf_vals = acqf(train_X) + acqf_vals_2 = acqf(train_X) + self.assertAllClose(acqf_vals, acqf_vals_2) + + batch_shape = (2, 5) + test_X = torch.randn(*batch_shape, *train_X.shape, **tkwargs) + batched_output = acqf(test_X) + self.assertEqual(batched_output.shape, batch_shape) + batched_output_2 = acqf(test_X) + self.assertAllClose(batched_output, batched_output_2) + + with mock.patch.object( + type(acqf.model), "batch_shape", new_callable=PropertyMock + ) as mock_batch_shape: + mock_batch_shape.return_value = (2, 3) + with self.assertRaisesRegex( + NotImplementedError, + "Ensemble models with more than one ensemble dimension", + ): + acqf.redraw(batch_size=2) diff --git a/test/models/test_deterministic.py b/test/models/test_deterministic.py index 47e440e9d6..30c8986a3f 100644 --- a/test/models/test_deterministic.py +++ b/test/models/test_deterministic.py @@ -58,7 +58,8 @@ def f(X): model = GenericDeterministicModel(f) self.assertEqual(model.num_outputs, 1) - X = torch.rand(3, 2) + d = 2 + X = torch.rand(3, d) # basic test p = model.posterior(X) self.assertIsInstance(p, EnsemblePosterior) @@ -81,6 +82,25 @@ def f(X): p_sub = subset_model.posterior(X) self.assertTrue(torch.equal(p_sub.mean, X[..., [0]])) + # testing batched model + batch_shape = torch.Size([2, 4]) + batch_coefficients = torch.rand(*batch_shape, 1, d) + + def batched_f(X): + return (X * batch_coefficients).sum(dim=-1, keepdim=True) + + model = GenericDeterministicModel(batched_f, batch_shape=batch_shape) + Y = model(X) + self.assertEqual(Y.shape, torch.Size([2, 4, 3, 1])) + + # testing with wrong batch shape + model = GenericDeterministicModel(batched_f, batch_shape=torch.Size([2])) + + with self.assertRaisesRegex( + ValueError, "GenericDeterministicModel was initialized with batch_shape=" + ): + model(X) + def test_AffineDeterministicModel(self): # test error on bad shape of a with self.assertRaises(ValueError): diff --git a/test/sampling/pathwise/test_paths.py b/test/sampling/pathwise/test_paths.py index 3b24430f53..207502ae04 100644 --- a/test/sampling/pathwise/test_paths.py +++ b/test/sampling/pathwise/test_paths.py @@ -14,9 +14,14 @@ class IdentityPath(SamplePath): + ensemble_as_batch: bool = False + def forward(self, x: torch.Tensor) -> torch.Tensor: return x + def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None: + self.ensemble_as_batch = ensemble_as_batch + class TestGenericPaths(BotorchTestCase): def test_path_dict(self): @@ -48,6 +53,12 @@ def test_path_dict(self): self.assertEqual(output.shape, (2,) + x.shape) self.assertTrue(output.eq(x).all()) + A.set_ensemble_as_batch(True) + self.assertTrue(A.ensemble_as_batch) + + A.set_ensemble_as_batch(False) + self.assertFalse(A.ensemble_as_batch) + # Test `dict`` methods self.assertEqual(len(path_dict), 2) for key, val, (key_0, val_0), (key_1, val_1), key_2 in zip( diff --git a/test/sampling/pathwise/test_posterior_samplers.py b/test/sampling/pathwise/test_posterior_samplers.py index f0ff1a79ed..0fe8acf79c 100644 --- a/test/sampling/pathwise/test_posterior_samplers.py +++ b/test/sampling/pathwise/test_posterior_samplers.py @@ -18,8 +18,14 @@ from botorch.sampling.pathwise import draw_matheron_paths, MatheronPath, PathList from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model from botorch.sampling.pathwise.utils import get_train_inputs -from botorch.utils.test_helpers import get_sample_moments, standardize_moments +from botorch.utils.test_helpers import ( + get_fully_bayesian_model, + get_sample_moments, + standardize_moments, +) + from botorch.utils.testing import BotorchTestCase +from botorch.utils.transforms import is_ensemble from gpytorch.kernels import MaternKernel, ScaleKernel from torch import Size from torch.nn.functional import pad @@ -131,15 +137,16 @@ def _test_draw_matheron_paths(self, model, paths, sample_shape, atol=3): def test_get_matheron_path_model(self) -> None: model_list = ModelListGP(self.inferred_noise_gp, self.observed_noise_gp) + n, d, m = 5, 2, 3 moo_model = SingleTaskGP( - train_X=torch.rand(5, 2, **self.tkwargs), - train_Y=torch.rand(5, 2, **self.tkwargs), + train_X=torch.rand(n, d, **self.tkwargs), + train_Y=torch.rand(n, m, **self.tkwargs), ) - test_X = torch.rand(5, 2, **self.tkwargs) - batch_test_X = torch.rand(3, 5, 2, **self.tkwargs) + test_X = torch.rand(n, d, **self.tkwargs) + batch_test_X = torch.rand(3, n, d, **self.tkwargs) sample_shape = Size([2]) - sample_shape_X = torch.rand(3, 2, 5, 2, **self.tkwargs) + sample_shape_X = torch.rand(3, 2, n, d, **self.tkwargs) for model in (self.inferred_noise_gp, moo_model, model_list): path_model = get_matheron_path_model(model=model) self.assertFalse(path_model._is_ensemble) @@ -163,25 +170,45 @@ def test_get_matheron_path_model(self) -> None: ) def test_get_matheron_path_model_batched(self) -> None: + n, d, m = 5, 2, 3 model = SingleTaskGP( - train_X=torch.rand(4, 5, 2, **self.tkwargs), - train_Y=torch.rand(4, 5, 2, **self.tkwargs), + train_X=torch.rand(4, n, d, **self.tkwargs), + train_Y=torch.rand(4, n, m, **self.tkwargs), ) - model._is_ensemble = True path_model = get_matheron_path_model(model=model) - self.assertTrue(path_model._is_ensemble) - test_X = torch.rand(5, 2, **self.tkwargs) + test_X = torch.rand(n, d, **self.tkwargs) # This mimics the behavior of the acquisition functions unsqueezing the # model batch dimension for ensemble models. - batch_test_X = torch.rand(3, 1, 5, 2, **self.tkwargs) + batch_test_X = torch.rand(3, 1, n, d, **self.tkwargs) # Explicitly matching X for completeness. - complete_test_X = torch.rand(3, 4, 5, 2, **self.tkwargs) + complete_test_X = torch.rand(3, 4, n, d, **self.tkwargs) for X in (test_X, batch_test_X, complete_test_X): + # shapes in each iteration of the loop are, respectively: + # torch.Size([4, 5, 2]) + # torch.Size([3, 4, 5, 2]) + # torch.Size([3, 4, 5, 2]) + # irrespective of whether `is_ensemble` is true or false. self.assertEqual( model.posterior(X).mean.shape, path_model.posterior(X).mean.shape ) # Test with sample_shape. path_model = get_matheron_path_model(model=model, sample_shape=Size([2, 6])) - test_X = torch.rand(3, 2, 6, 4, 5, 2, **self.tkwargs) - self.assertEqual(path_model.posterior(test_X).mean.shape, test_X.shape) + test_X = torch.rand(3, 2, 6, 4, n, d, **self.tkwargs) + self.assertEqual( + path_model.posterior(test_X).mean.shape, torch.Size([*test_X.shape[:-1], m]) + ) + m = 1 # required by fully Bayesian model + fully_bayesian_model = get_fully_bayesian_model( + train_X=torch.randn(n, d, **self.tkwargs), + train_Y=torch.randn(n, m, **self.tkwargs), + num_models=3, + **self.tkwargs, + ) + fully_bayesian_path_model = get_matheron_path_model(model=fully_bayesian_model) + self.assertTrue(is_ensemble(fully_bayesian_path_model)) + for X in (test_X, batch_test_X, complete_test_X): + self.assertEqual( + fully_bayesian_model.posterior(X).mean.shape, + fully_bayesian_path_model.posterior(X).mean.shape, + ) diff --git a/test_community/acquisition/test_input_constructors.py b/test_community/acquisition/test_input_constructors.py index 30a62703b3..92e523fba6 100644 --- a/test_community/acquisition/test_input_constructors.py +++ b/test_community/acquisition/test_input_constructors.py @@ -62,7 +62,10 @@ class TestFullyBayesianAcquisitionFunctionInputConstructors( ): def test_construct_inputs_scorebo(self) -> None: func = get_acqf_input_constructor(qSelfCorrectingBayesianOptimization) - num_samples, num_optima = 3, 7 + # num_ensemble controls the ensemble size of the SAAS model + # num_optima controls the number of Thompson samples used to infer the + # distribution of optima + num_ensemble, num_optima = 4, 7 model = SaasFullyBayesianSingleTaskGP( self.blockX_blockY[0].X, self.blockX_blockY[0].Y ) @@ -70,14 +73,14 @@ def test_construct_inputs_scorebo(self) -> None: model.load_mcmc_samples( { "lengthscale": torch.rand( - num_samples, + num_ensemble, 1, self.blockX_blockY[0].X.shape[-1], dtype=torch.double, ), - "outputscale": torch.rand(num_samples, dtype=torch.double), - "mean": torch.randn(num_samples, dtype=torch.double), - "noise": torch.rand(num_samples, 1, dtype=torch.double), + "outputscale": torch.rand(num_ensemble, dtype=torch.double), + "mean": torch.randn(num_ensemble, dtype=torch.double), + "noise": torch.rand(num_ensemble, 1, dtype=torch.double), } ) @@ -88,13 +91,15 @@ def test_construct_inputs_scorebo(self) -> None: num_optima=num_optima, distance_metric="kl_divergence", ) - self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype) - self.assertEqual(len(kwargs["optimal_inputs"]), num_optima) - self.assertEqual(len(kwargs["optimal_outputs"]), num_optima) + optimal_inputs = kwargs["optimal_inputs"] + optimal_outputs = kwargs["optimal_outputs"] + self.assertEqual(self.blockX_blockY[0].X.dtype, optimal_inputs.dtype) + d = self.blockX_blockY[0].X.shape[-1] + self.assertEqual(optimal_inputs.shape, (num_optima, num_ensemble, d)) + self.assertEqual(optimal_outputs.shape, (num_optima, num_ensemble, 1)) + # asserting that, for the non-batch case, the optimal inputs are # of shape num_models x N x D and outputs are num_models x N x 1 - self.assertEqual(len(kwargs["optimal_inputs"].shape), 3) - self.assertEqual(len(kwargs["optimal_outputs"].shape), 3) self.assertEqual(kwargs["distance_metric"], "kl_divergence") qSelfCorrectingBayesianOptimization(**kwargs)