From 7f29dda08aa3e0ef4b9cda48c91196650a5ac6f6 Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Fri, 13 Sep 2024 22:19:16 -0700 Subject: [PATCH] Use Standardize outcome transform by default in more models (#2532) Summary: Pull Request resolved: https://github.com/pytorch/botorch/pull/2532 Makes models which had their priors updated in https://github.com/pytorch/botorch/pull/2507 use the `Standardize` outcome transform by default, mimicking https://github.com/pytorch/botorch/pull/2458 Also removes some deprecated functionality in the process, namely the `data_fidelity` argument to `SingleTaskMultiFidelityGP` as well as the `FixedNoiseMultiFidelityGP` and `FixedNoiseLCEMGP` models. Reviewed By: saitcakmak, esantorella Differential Revision: D62552307 --- botorch/models/approximate_gp.py | 40 +++++- botorch/models/contextual_multioutput.py | 73 ++-------- botorch/models/fully_bayesian_multitask.py | 8 +- botorch/models/gp_regression_fidelity.py | 80 +++-------- botorch/models/gp_regression_mixed.py | 9 +- botorch/models/multitask.py | 29 ++-- botorch/utils/test_helpers.py | 57 ++++++-- test/models/test_approximate_gp.py | 21 +++ test/models/test_contextual_multioutput.py | 22 +-- test/models/test_converter.py | 30 +++- test/models/test_fully_bayesian_multitask.py | 142 ++++++++++++++----- test/models/test_gp_regression.py | 42 ++---- test/models/test_gp_regression_fidelity.py | 52 ++----- test/models/test_gp_regression_mixed.py | 17 ++- test/models/test_model_list_gp_regression.py | 25 +++- test/utils/test_transforms.py | 4 +- 16 files changed, 342 insertions(+), 309 deletions(-) diff --git a/botorch/models/approximate_gp.py b/botorch/models/approximate_gp.py index 5bd4beecb6..df55632dc3 100644 --- a/botorch/models/approximate_gp.py +++ b/botorch/models/approximate_gp.py @@ -35,6 +35,7 @@ from typing import Optional, TypeVar, Union import torch +from botorch.exceptions.warnings import UserInputWarning from botorch.models.gpytorch import GPyTorchModel from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform @@ -70,6 +71,14 @@ TApproxModel = TypeVar("TApproxModel", bound="ApproximateGPyTorchModel") +TRANSFORM_WARNING = ( + "Using an {ttype} transform with `SingleTaskVariationalGP`. If this " + "model is trained in minibatches, a {ttype} transform with learnable " + "parameters would update its parameters for each minibatch, which is " + "undesirable. If you do intend to train in minibatches, we recommend " + "you not use a {ttype} transform and instead pre-transform your whole " + "data set before fitting the model." +) class ApproximateGPyTorchModel(GPyTorchModel): @@ -325,9 +334,9 @@ def __init__( variational_distribution: Optional[_VariationalDistribution] = None, variational_strategy: type[_VariationalStrategy] = VariationalStrategy, inducing_points: Optional[Union[Tensor, int]] = None, - outcome_transform: Optional[OutcomeTransform] = None, - input_transform: Optional[InputTransform] = None, inducing_point_allocator: Optional[InducingPointAllocator] = None, + outcome_transform: OutcomeTransform | None = None, + input_transform: InputTransform | None = None, ) -> None: r""" Args: @@ -338,6 +347,8 @@ def __init__( either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of output responses per input (default: 1). + learn_inducing_points: If True, the inducing point locations are learned + jointly with the other model parameters. covar_module: Kernel function. If omitted, uses an `RBFKernel`. mean_module: Mean of GP model. If omitted, uses a `ConstantMean`. variational_distribution: Type of variational distribution to use @@ -351,6 +362,20 @@ def __init__( inducing_point_allocator: The `InducingPointAllocator` used to initialize the inducing point locations. If omitted, uses `GreedyVarianceReduction`. + outcome_transform: An outcome transform that is applied to the training + data during instantiation and to the posterior during inference. + NOTE: If this model is trained in minibatches, an outcome transform + with learnable parameters (such as `Standardize`) would update its + parameters for each minibatch, which is undesirable. If you do intend + to train in minibatches, we recommend you not use an outcome transform + and instead pre-transform your whole data set before fitting the model. + input_transform: An input transform that is applied in the model's + forward pass. + NOTE: If this model is trained in minibatches, an input transform + with learnable parameters (such as `Normalize`) would update its + parameters for each minibatch, which is undesirable. If you do intend + to train in minibatches, we recommend you not use an input transform + and instead pre-transform your whole data set before fitting the model. """ with torch.no_grad(): transformed_X = self.transform_inputs( @@ -358,6 +383,11 @@ def __init__( ) if train_Y is not None: if outcome_transform is not None: + warnings.warn( + TRANSFORM_WARNING.format(ttype="outcome"), + UserInputWarning, + stacklevel=3, + ) train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(X=transformed_X, Y=train_Y) validate_input_scaling(train_X=transformed_X, train_Y=train_Y) @@ -388,6 +418,7 @@ def __init__( "being further optimized during the model fit. If so " "then set `learn_inducing_points` to False.", UserWarning, + stacklevel=3, ) if inducing_point_allocator is None: @@ -412,6 +443,11 @@ def __init__( if outcome_transform is not None: self.outcome_transform = outcome_transform if input_transform is not None: + warnings.warn( + TRANSFORM_WARNING.format(ttype="input"), + UserInputWarning, + stacklevel=3, + ) self.input_transform = input_transform # for model fitting utilities diff --git a/botorch/models/contextual_multioutput.py b/botorch/models/contextual_multioutput.py index dc954a4df3..e303315d7b 100644 --- a/botorch/models/contextual_multioutput.py +++ b/botorch/models/contextual_multioutput.py @@ -13,7 +13,6 @@ Advances in Neural Information Processing Systems 33, NeurIPS 2020. """ -import warnings from typing import Any, Optional, Union import torch @@ -21,6 +20,7 @@ from botorch.models.transforms.input import InputTransform from botorch.models.transforms.outcome import OutcomeTransform from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset +from botorch.utils.types import _DefaultType, DEFAULT from gpytorch.constraints import Interval from gpytorch.kernels.rbf_kernel import RBFKernel from gpytorch.likelihoods.likelihood import Likelihood @@ -51,8 +51,8 @@ def __init__( embs_dim_list: Optional[list[int]] = None, output_tasks: Optional[list[int]] = None, all_tasks: Optional[list[int]] = None, + outcome_transform: OutcomeTransform | _DefaultType | None = DEFAULT, input_transform: Optional[InputTransform] = None, - outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r""" Args: @@ -85,12 +85,14 @@ def __init__( training data. Note that when a task is not observed, the corresponding task covariance will heavily depend on random initialization and may behave unexpectedly. - input_transform: An input transform that is applied in the model's - forward pass. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling - `.posterior` on the model will be on the original scale). + `.posterior` on the model will be on the original scale). We use a + `Standardize` transform if no `outcome_transform` is specified. + Pass down `None` to use no outcome transform. + input_transform: An input transform that is applied in the model's + forward pass. """ super().__init__( train_X=train_X, @@ -102,8 +104,8 @@ def __init__( likelihood=likelihood, output_tasks=output_tasks, all_tasks=all_tasks, - input_transform=input_transform, outcome_transform=outcome_transform, + input_transform=input_transform, ) self.device = train_X.device if all_tasks is None: @@ -247,62 +249,3 @@ def construct_inputs( if embs_dim_list is not None: base_inputs["embs_dim_list"] = embs_dim_list return base_inputs - - -class FixedNoiseLCEMGP(LCEMGP): - r"""The Multi-Task GP the latent context embedding multioutput - (LCE-M) kernel, with known observation noise. - - DEPRECATED: Please use `LCEMGP` with `train_Yvar` instead. - Will be removed in a future release (~v0.11). - """ - - def __init__( - self, - train_X: Tensor, - train_Y: Tensor, - train_Yvar: Tensor, - task_feature: int, - context_cat_feature: Optional[Tensor] = None, - context_emb_feature: Optional[Tensor] = None, - embs_dim_list: Optional[list[int]] = None, - output_tasks: Optional[list[int]] = None, - ) -> None: - r""" - Args: - train_X: (n x d) X training data. - train_Y: (n x 1) Y training data. - train_Yvar: (n x 1) Observed variances of each training Y. - task_feature: Column index of train_X to get context indices. - context_cat_feature: (n_contexts x k) one-hot encoded context - features. Rows are ordered by context indices, where k is the - number of categorical variables. If None, task indices will - be used and k = 1. - context_emb_feature: (n_contexts x m) pre-given continuous - embedding features. Rows are ordered by context indices. - embs_dim_list: Embedding dimension for each categorical variable. - The length equals to k. If None, the embedding dimension is set to - 1 for each categorical variable. - output_tasks: A list of task indices for which to compute model - outputs for. If omitted, return outputs for all task indices. - - """ - warnings.warn( - "`FixedNoiseLCEMGP` has been deprecated and will be removed in a " - "future release. Please use the `LCEMGP` model instead. " - "When `train_Yvar` is specified, `LCEMGP` behaves the same " - "as the `FixedNoiseLCEMGP`.", - DeprecationWarning, - stacklevel=2, - ) - - super().__init__( - train_X=train_X, - train_Y=train_Y, - task_feature=task_feature, - train_Yvar=train_Yvar, - context_cat_feature=context_cat_feature, - context_emb_feature=context_emb_feature, - embs_dim_list=embs_dim_list, - output_tasks=output_tasks, - ) diff --git a/botorch/models/fully_bayesian_multitask.py b/botorch/models/fully_bayesian_multitask.py index 94c3f30d94..44a74f5e89 100644 --- a/botorch/models/fully_bayesian_multitask.py +++ b/botorch/models/fully_bayesian_multitask.py @@ -167,7 +167,7 @@ class SaasFullyBayesianMultiTaskGP(MultiTaskGP): This model assumes that the inputs have been normalized to [0, 1]^d and that the output has been stratified standardized to have zero mean and unit variance for - each task.The SAAS model [Eriksson2021saasbo]_ with a Matern-5/2 is used as data + each task. The SAAS model [Eriksson2021saasbo]_ with a Matern-5/2 is used as data kernel by default. You are expected to use `fit_fully_bayesian_model_nuts` to fit this model as it @@ -243,6 +243,7 @@ def __init__( X=train_X, input_transform=input_transform ) if outcome_transform is not None: + outcome_transform.train() # Ensure we learn parameters here on init train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) if train_Yvar is not None: # Clamp after transforming train_Yvar = train_Yvar.clamp(MIN_INFERRED_NOISE_LEVEL) @@ -254,6 +255,11 @@ def __init__( task_feature=task_feature, output_tasks=output_tasks, rank=rank, + # We already transformed the data above, this avoids applying the + # default `Standardize` transform twice. As outcome_transform is + # set on `self` below, it will be applied to the posterior in the + # `posterior` method of `MultiTaskGP`. + outcome_transform=None, ) if all_tasks is not None and self._expected_task_values != set(all_tasks): raise NotImplementedError( diff --git a/botorch/models/gp_regression_fidelity.py b/botorch/models/gp_regression_fidelity.py index 99f7f02acc..29532b86ff 100644 --- a/botorch/models/gp_regression_fidelity.py +++ b/botorch/models/gp_regression_fidelity.py @@ -25,8 +25,7 @@ from __future__ import annotations -import warnings -from typing import Any, Optional, Union +from typing import Any, Sequence import torch from botorch.exceptions.errors import UnsupportedError @@ -40,6 +39,7 @@ from botorch.models.transforms.outcome import OutcomeTransform from botorch.models.utils.gpytorch_modules import get_covar_module_with_dim_scaled_prior from botorch.utils.datasets import SupervisedDataset +from botorch.utils.types import _DefaultType, DEFAULT from gpytorch.kernels.kernel import ProductKernel from gpytorch.kernels.scale_kernel import ScaleKernel from gpytorch.likelihoods.likelihood import Likelihood @@ -66,15 +66,14 @@ def __init__( self, train_X: Tensor, train_Y: Tensor, - train_Yvar: Optional[Tensor] = None, - iteration_fidelity: Optional[int] = None, - data_fidelities: Optional[Union[list[int], tuple[int]]] = None, - data_fidelity: Optional[int] = None, + train_Yvar: Tensor | None = None, + iteration_fidelity: int | None = None, + data_fidelities: Sequence[int] | None = None, linear_truncated: bool = True, nu: float = 2.5, - likelihood: Optional[Likelihood] = None, - outcome_transform: Optional[OutcomeTransform] = None, - input_transform: Optional[InputTransform] = None, + likelihood: Likelihood | None = None, + outcome_transform: OutcomeTransform | _DefaultType | None = DEFAULT, + input_transform: InputTransform | None = None, ) -> None: r""" Args: @@ -89,8 +88,6 @@ def __init__( data_fidelities: The column indices for the downsampling fidelity parameter. If a list/tuple of indices is provided, a kernel will be constructed for each index (optional). - data_fidelity: The column index for the downsampling fidelity parameter - (optional). Deprecated in favor of `data_fidelities`. linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead of the default kernel. nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or @@ -98,24 +95,14 @@ def __init__( likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. outcome_transform: An outcome transform that is applied to the - training data during instantiation and to the posterior during - inference (that is, the `Posterior` obtained by calling - `.posterior` on the model will be on the original scale). + training data during instantiation and to the posterior during + inference (that is, the `Posterior` obtained by calling + `.posterior` on the model will be on the original scale). We use a + `Standardize` transform if no `outcome_transform` is specified. + Pass down `None` to use no outcome transform. input_transform: An input transform that is applied in the model's forward pass. """ - if data_fidelity is not None: - warnings.warn( - "The `data_fidelity` argument is deprecated and will be removed in " - "a future release. Please use `data_fidelities` instead.", - DeprecationWarning, - ) - if data_fidelities is not None: - raise ValueError( - "Cannot specify both `data_fidelity` and `data_fidelities`." - ) - data_fidelities = [data_fidelity] - self._init_args = { "iteration_fidelity": iteration_fidelity, "data_fidelities": data_fidelities, @@ -179,47 +166,11 @@ def construct_inputs( return inputs -class FixedNoiseMultiFidelityGP(SingleTaskMultiFidelityGP): - def __init__( - self, - train_X: Tensor, - train_Y: Tensor, - train_Yvar: Tensor, - iteration_fidelity: Optional[int] = None, - data_fidelities: Optional[Union[list[int], tuple[int]]] = None, - data_fidelity: Optional[int] = None, - linear_truncated: bool = True, - nu: float = 2.5, - outcome_transform: Optional[OutcomeTransform] = None, - input_transform: Optional[InputTransform] = None, - ) -> None: - r"""DEPRECATED: Use `SingleTaskMultiFidelityGP` instead. - Will be removed in a future release (~v0.11). - """ - warnings.warn( - "`FixedNoiseMultiFidelityGP` has been deprecated. " - "Use `SingleTaskMultiFidelityGP` with `train_Yvar` instead.", - DeprecationWarning, - ) - super().__init__( - train_X=train_X, - train_Y=train_Y, - train_Yvar=train_Yvar, - iteration_fidelity=iteration_fidelity, - data_fidelities=data_fidelities, - data_fidelity=data_fidelity, - linear_truncated=linear_truncated, - nu=nu, - outcome_transform=outcome_transform, - input_transform=input_transform, - ) - - def _setup_multifidelity_covar_module( dim: int, aug_batch_shape: torch.Size, - iteration_fidelity: Optional[int], - data_fidelities: Optional[list[int]], + iteration_fidelity: int | None, + data_fidelities: Sequence[int] | None, linear_truncated: bool, nu: float, ) -> tuple[ScaleKernel, dict]: @@ -246,6 +197,7 @@ def _setup_multifidelity_covar_module( if iteration_fidelity is not None and iteration_fidelity < 0: iteration_fidelity = dim + iteration_fidelity if data_fidelities is not None: + data_fidelities = list(data_fidelities) for i in range(len(data_fidelities)): if data_fidelities[i] < 0: data_fidelities[i] = dim + data_fidelities[i] diff --git a/botorch/models/gp_regression_mixed.py b/botorch/models/gp_regression_mixed.py index 5dc4697f04..30941d27e3 100644 --- a/botorch/models/gp_regression_mixed.py +++ b/botorch/models/gp_regression_mixed.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, Union import torch from botorch.models.gp_regression import SingleTaskGP @@ -16,6 +16,7 @@ from botorch.models.utils.gpytorch_modules import get_covar_module_with_dim_scaled_prior from botorch.utils.datasets import SupervisedDataset from botorch.utils.transforms import normalize_indices +from botorch.utils.types import _DefaultType, DEFAULT from gpytorch.constraints import GreaterThan from gpytorch.kernels.kernel import Kernel from gpytorch.kernels.scale_kernel import ScaleKernel @@ -65,7 +66,7 @@ def __init__( Callable[[torch.Size, int, list[int]], Kernel] ] = None, likelihood: Optional[Likelihood] = None, - outcome_transform: Optional[OutcomeTransform] = None, # TODO + outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT, input_transform: Optional[InputTransform] = None, # TODO ) -> None: r"""A single-task exact GP model supporting categorical parameters. @@ -87,7 +88,9 @@ def __init__( outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling - `.posterior` on the model will be on the original scale). + `.posterior` on the model will be on the original scale). We use a + `Standardize` transform if no `outcome_transform` is specified. + Pass down `None` to use no outcome transform. input_transform: An input transform that is applied in the model's forward pass. Only input transforms are allowed which do not transform the categorical dimensions. If you want to use it diff --git a/botorch/models/multitask.py b/botorch/models/multitask.py index 807b4b55c9..092639f250 100644 --- a/botorch/models/multitask.py +++ b/botorch/models/multitask.py @@ -38,7 +38,7 @@ from botorch.models.gpytorch import GPyTorchModel, MultiTaskGPyTorchModel from botorch.models.model import FantasizeMixin from botorch.models.transforms.input import InputTransform -from botorch.models.transforms.outcome import OutcomeTransform +from botorch.models.transforms.outcome import OutcomeTransform, Standardize from botorch.models.utils.gpytorch_modules import ( get_covar_module_with_dim_scaled_prior, get_gaussian_likelihood_with_lognormal_prior, @@ -46,6 +46,7 @@ ) from botorch.posteriors.multitask import MultitaskGPPosterior from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset +from botorch.utils.types import _DefaultType, DEFAULT from gpytorch.constraints import GreaterThan from gpytorch.distributions.multitask_multivariate_normal import ( MultitaskMultivariateNormal, @@ -65,7 +66,7 @@ from gpytorch.priors.lkj_prior import LKJCovariancePrior from gpytorch.priors.prior import Prior from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior -from gpytorch.priors.torch_priors import GammaPrior +from gpytorch.priors.torch_priors import GammaPrior, LogNormalPrior from gpytorch.settings import detach_test_caches from gpytorch.utils.errors import CachingError from gpytorch.utils.memoize import cached, pop_from_cache @@ -108,7 +109,7 @@ def get_task_value_remapping( # Create a tensor that maps task values to new task values. # The number of tasks should be small, so this should be quite efficient. mapper = torch.full( - (task_values.max().item() + 1,), + (int(task_values.max().item()) + 1,), float("nan"), dtype=dtype, device=task_values.device, @@ -122,11 +123,11 @@ class MultiTaskGP(ExactGP, MultiTaskGPyTorchModel, FantasizeMixin): kernel. See [Bonilla2007MTGP]_ and [Swersky2013MTBO]_ for a reference on the model and its use in Bayesian optimization. - The model can be single-output or multi-output, determined by the `output_tasks`. This model uses relatively strong priors on the base Kernel hyperparameters, which work best when covariates are normalized to the unit cube and outcomes are - standardized (zero mean, unit variance). + standardized (zero mean, unit variance) - this standardization should be applied in + a stratified fashion at the level of the tasks, rather than across all data points. If the `train_Yvar` is None, this model infers the noise level. If you have known observation noise, you can set `train_Yvar` to a tensor containing @@ -147,8 +148,8 @@ def __init__( output_tasks: Optional[list[int]] = None, rank: Optional[int] = None, all_tasks: Optional[list[int]] = None, + outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT, input_transform: Optional[InputTransform] = None, - outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""Multi-Task GP model using an ICM kernel. @@ -180,12 +181,15 @@ def __init__( training data. Note that when a task is not observed, the corresponding task covariance will heavily depend on random initialization and may behave unexpectedly. - input_transform: An input transform that is applied in the model's - forward pass. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling - `.posterior` on the model will be on the original scale). + `.posterior` on the model will be on the original scale). We use a + `Standardize` transform if no `outcome_transform` is specified. + Pass down `None` to use no outcome transform. NOTE: Standardization + should be applied in a stratified fashion, separately for each task. + input_transform: An input transform that is applied in the model's + forward pass. Example: >>> X1, X2 = torch.rand(10, 2), torch.rand(20, 2) @@ -214,6 +218,8 @@ def __init__( ) all_tasks = all_tasks or all_tasks_inferred self.num_tasks = len(all_tasks) + if outcome_transform == DEFAULT: + outcome_transform = Standardize(m=1, batch_shape=train_X.shape[:-2]) if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(Y=train_Y, Yvar=train_Yvar) @@ -470,8 +476,7 @@ def __init__( if rank is None: rank = num_tasks if likelihood is None: - noise_prior = GammaPrior(1.1, 0.05) - noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate + noise_prior = LogNormalPrior(loc=-4.0, scale=1.0) likelihood = MultitaskGaussianLikelihood( num_tasks=num_tasks, batch_shape=batch_shape, @@ -479,7 +484,7 @@ def __init__( noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, - initial_value=noise_prior_mode, + initial_value=noise_prior.mode, ), rank=kwargs.get("likelihood_rank", 0), ) diff --git a/botorch/utils/test_helpers.py b/botorch/utils/test_helpers.py index e9977ae9db..6d99758c7b 100644 --- a/botorch/utils/test_helpers.py +++ b/botorch/utils/test_helpers.py @@ -19,13 +19,13 @@ from botorch.exceptions.errors import UnsupportedError from botorch.models import SingleTaskGP from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP -from botorch.models.gpytorch import GPyTorchModel +from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel, GPyTorchModel from botorch.models.model import FantasizeMixin, Model from botorch.models.model_list_gp_regression import ModelListGP from botorch.models.transforms.outcome import Standardize from botorch.models.utils import add_output_dim from botorch.models.utils.assorted import fantasize -from botorch.posteriors.posterior import Posterior +from botorch.posteriors.torch import TorchPosterior from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset from gpytorch.distributions.multivariate_normal import MultivariateNormal from gpytorch.kernels import RBFKernel, ScaleKernel @@ -244,21 +244,58 @@ def gen_multi_task_dataset( return dataset, (train_X, train_Y, train_Yvar) -def get_pvar_expected(posterior: Posterior, model: Model, X: Tensor, m: int) -> Tensor: +def get_pvar_expected( + posterior: TorchPosterior, model: Model, X: Tensor, m: int +) -> Tensor: """Computes the expected variance of a posterior after adding the predictive noise from the likelihood. + + Args: + posterior: The posterior to compute the variance of. Must be a + `TorchPosterior` object. + model: The model that generated the posterior. If `m > 1`, this must be + a `BatchedMultiOutputGPyTorchModel`. + X: The test inputs. + m: The number of outputs. + + Returns: + The expected variance of the posterior after adding the observation + noise from the likelihood. """ X = model.transform_inputs(X) lh_kwargs = {} + odim = -1 # this is the output dimension index + + if m > 1: + if not isinstance(model, BatchedMultiOutputGPyTorchModel): + raise UnsupportedError( + "`get_pvar_expected` only supports `BatchedMultiOutputGPyTorchModel`s." + ) + # We need to add a batch dimension to the input to be compatible with the + # augmented batch shape of the model. This also changes the output dimension + # index. + X, odim = add_output_dim(X=X, original_batch_shape=model._input_batch_shape) + if isinstance(model.likelihood, FixedNoiseGaussianLikelihood): - lh_kwargs["noise"] = model.likelihood.noise.mean().expand(X.shape[:-1]) + noise = model.likelihood.noise.mean(dim=-1, keepdim=True) + broadcasted_shape = torch.broadcast_shapes(noise.shape, X.shape[:-1]) + lh_kwargs["noise"] = noise.expand(broadcasted_shape) + + pvar_exp = model.likelihood(model(X), X, **lh_kwargs).variance if m == 1: - return model.likelihood( - posterior.distribution, X, **lh_kwargs - ).variance.unsqueeze(-1) - X_, odi = add_output_dim(X=X, original_batch_shape=model._input_batch_shape) - pvar_exp = model.likelihood(model(X_), X_, **lh_kwargs).variance - return torch.stack([pvar_exp.select(dim=odi, index=i) for i in range(m)], dim=-1) + pvar_exp = pvar_exp.unsqueeze(-1) + pvar_exp = torch.stack( + [pvar_exp.select(dim=odim, index=i) for i in range(m)], dim=-1 + ) + + # If the model has an outcome transform, we need to untransform the + # variance according to that transform. + if hasattr(model, "outcome_transform"): + _, pvar_exp = model.outcome_transform.untransform( + Y=torch.zeros_like(pvar_exp), Yvar=pvar_exp + ) + + return pvar_exp class DummyNonScalarizingPosteriorTransform(PosteriorTransform): diff --git a/test/models/test_approximate_gp.py b/test/models/test_approximate_gp.py index ed7cba18cf..2a05e689cf 100644 --- a/test/models/test_approximate_gp.py +++ b/test/models/test_approximate_gp.py @@ -8,6 +8,7 @@ import warnings import torch +from botorch.exceptions.warnings import UserInputWarning from botorch.fit import fit_gpytorch_mll from botorch.models.approximate_gp import ( _SingleTaskVariationalGP, @@ -190,6 +191,26 @@ def test_initializations(self): else: self.assertFalse(hasattr(model, "outcome_transform")) + # test user warnings when using transforms + with self.assertWarnsRegex( + UserInputWarning, + "Using an input transform with `SingleTaskVariationalGP`", + ): + SingleTaskVariationalGP( + train_X=train_X, + train_Y=train_Y, + input_transform=Normalize(d=1), + ) + with self.assertWarnsRegex( + UserInputWarning, + "Using an outcome transform with `SingleTaskVariationalGP`", + ): + SingleTaskVariationalGP( + train_X=train_X, + train_Y=train_Y, + outcome_transform=Log(), + ) + # test default inducing point allocator self.assertIsInstance(model._inducing_point_allocator, GreedyVarianceReduction) diff --git a/test/models/test_contextual_multioutput.py b/test/models/test_contextual_multioutput.py index 4b94d2a6b0..040c08be91 100644 --- a/test/models/test_contextual_multioutput.py +++ b/test/models/test_contextual_multioutput.py @@ -7,7 +7,7 @@ import torch from botorch.fit import fit_gpytorch_mll -from botorch.models.contextual_multioutput import FixedNoiseLCEMGP, LCEMGP +from botorch.models.contextual_multioutput import LCEMGP from botorch.models.multitask import MultiTaskGP from botorch.posteriors import GPyTorchPosterior from botorch.utils.test_helpers import gen_multi_task_dataset @@ -102,26 +102,6 @@ def test_LCEMGP(self): ).to_dense() self.assertAllClose(previous_covar, model.task_covar_module(task_idcs)) - def test_FixedNoiseLCEMGP(self): - for dtype in (torch.float, torch.double): - _, (train_x, train_y, train_yvar) = gen_multi_task_dataset( - yvar=0.01, dtype=dtype, device=self.device - ) - - with self.assertWarnsRegex(DeprecationWarning, "FixedNoiseLCEMGP"): - model = FixedNoiseLCEMGP( - train_X=train_x, - train_Y=train_y, - train_Yvar=train_yvar, - task_feature=0, - ) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - fit_gpytorch_mll(mll, optimizer_kwargs={"options": {"maxiter": 1}}) - self.assertIsInstance(model, FixedNoiseLCEMGP) - - test_x = train_x[:5] - self.assertIsInstance(model(test_x), MultivariateNormal) - def test_construct_inputs(self) -> None: for with_embedding_inputs, yvar, skip_task_features_in_datasets in zip( (True, False), (None, 0.01), (True, False), strict=True diff --git a/test/models/test_converter.py b/test/models/test_converter.py index ef6fad033b..db23e06479 100644 --- a/test/models/test_converter.py +++ b/test/models/test_converter.py @@ -211,8 +211,18 @@ def test_model_list_to_batched(self): batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp.likelihood, FixedNoiseGaussianLikelihood) # test SingleTaskMultiFidelityGP - gp1_ = SingleTaskMultiFidelityGP(train_X, train_Y1, iteration_fidelity=1) - gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=1) + gp1_ = SingleTaskMultiFidelityGP( + train_X, + train_Y1, + iteration_fidelity=1, + outcome_transform=None, + ) + gp2_ = SingleTaskMultiFidelityGP( + train_X, + train_Y2, + iteration_fidelity=1, + outcome_transform=None, + ) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=2) @@ -372,7 +382,11 @@ def test_roundtrip(self): # SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( - train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc + train_X=train_X, + train_Y=train_Y, + iteration_fidelity=1, + linear_truncated=lin_trunc, + outcome_transform=None, ) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) @@ -429,7 +443,10 @@ def test_batched_multi_output_to_single_output(self): self.assertEqual(batched_so_model.num_outputs, 1) # test SingleTaskMultiFidelityGP batched_mo_model = SingleTaskMultiFidelityGP( - train_X, train_Y, iteration_fidelity=1 + train_X, + train_Y, + iteration_fidelity=1, + outcome_transform=None, ) batched_so_model = batched_multi_output_to_single_output(batched_mo_model) self.assertIsInstance(batched_so_model, SingleTaskMultiFidelityGP) @@ -478,5 +495,8 @@ def test_batched_multi_output_to_single_output(self): batched_mo_model = SingleTaskGP( train_X, train_Y, outcome_transform=Standardize(m=2) ) - with self.assertRaises(NotImplementedError): + with self.assertRaisesRegex( + NotImplementedError, + "Converting batched multi-output models with outcome transforms", + ): batched_multi_output_to_single_output(batched_mo_model) diff --git a/test/models/test_fully_bayesian_multitask.py b/test/models/test_fully_bayesian_multitask.py index 9cd9e33009..e1c924b62b 100644 --- a/test/models/test_fully_bayesian_multitask.py +++ b/test/models/test_fully_bayesian_multitask.py @@ -11,21 +11,24 @@ import torch from botorch import fit_fully_bayesian_model_nuts from botorch.acquisition.analytic import ( - ExpectedImprovement, + LogExpectedImprovement, PosteriorMean, ProbabilityOfImprovement, UpperConfidenceBound, ) +from botorch.acquisition.logei import ( + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, +) from botorch.acquisition.monte_carlo import ( - qExpectedImprovement, - qNoisyExpectedImprovement, qProbabilityOfImprovement, qSimpleRegret, qUpperConfidenceBound, ) -from botorch.acquisition.multi_objective import ( - qExpectedHypervolumeImprovement, - qNoisyExpectedHypervolumeImprovement, + +from botorch.acquisition.multi_objective.logei import ( + qLogExpectedHypervolumeImprovement, + qLogNoisyExpectedHypervolumeImprovement, ) from botorch.models import ModelList, ModelListGP from botorch.models.deterministic import GenericDeterministicModel @@ -70,31 +73,38 @@ class TestFullyBayesianMultiTaskGP(BotorchTestCase): + def _get_data_and_model( self, task_rank: Optional[int] = None, output_tasks: Optional[list[int]] = None, infer_noise: bool = False, - **tkwargs + use_outcome_transform: bool = True, + **tkwargs, ): with torch.random.fork_rng(): torch.manual_seed(0) train_X = torch.rand(10, 4, **tkwargs) - task_indices = torch.cat( - [torch.zeros(5, 1, **tkwargs), torch.ones(5, 1, **tkwargs)], dim=0 - ) - self.num_tasks = 2 - train_X = torch.cat([train_X, task_indices], dim=1) - train_Y = torch.sin(train_X[:, :1]) - train_Yvar = 0.5 * torch.arange(10, **tkwargs).unsqueeze(-1) - model = SaasFullyBayesianMultiTaskGP( - train_X=train_X, - train_Y=train_Y, - train_Yvar=None if infer_noise else train_Yvar, - task_feature=4, - output_tasks=output_tasks, - rank=task_rank, - ) + task_indices = torch.cat( + [torch.zeros(5, 1, **tkwargs), torch.ones(5, 1, **tkwargs)], dim=0 + ) + self.num_tasks = 2 + train_X = torch.cat([train_X, task_indices], dim=1) + train_Y = torch.sin(train_X[:, :1]) + train_Yvar = 0.5 * torch.arange(10, **tkwargs).unsqueeze(-1) + model = SaasFullyBayesianMultiTaskGP( + train_X=train_X, + train_Y=train_Y, + train_Yvar=None if infer_noise else train_Yvar, + task_feature=4, + output_tasks=output_tasks, + rank=task_rank, + outcome_transform=( + Standardize(m=1, batch_shape=train_X.shape[:-2]) + if use_outcome_transform + else None + ), + ) return train_X, train_Y, train_Yvar, model def _get_unnormalized_data(self, **tkwargs): @@ -205,26 +215,37 @@ def test_fit_model( dtype: torch.dtype = torch.double, infer_noise: bool = False, task_rank: int = 1, + use_outcome_transform: bool = False, ): tkwargs = {"device": self.device, "dtype": dtype} train_X, train_Y, train_Yvar, model = self._get_data_and_model( - infer_noise=infer_noise, task_rank=task_rank, **tkwargs + infer_noise=infer_noise, + task_rank=task_rank, + use_outcome_transform=use_outcome_transform, + **tkwargs, ) n = train_X.shape[0] d = train_X.shape[1] - 1 + # Handle outcome transforms (if used) + train_Y_tf, train_Yvar_tf = train_Y, train_Yvar + if use_outcome_transform: + train_Y_tf, train_Yvar_tf = model.outcome_transform( + Y=train_Y, Yvar=train_Yvar + ) + # Test init self.assertIsNone(model.mean_module) self.assertIsNone(model.covar_module) self.assertIsNone(model.likelihood) self.assertIsInstance(model.pyro_model, MultitaskSaasPyroModel) self.assertAllClose(train_X, model.pyro_model.train_X) - self.assertAllClose(train_Y, model.pyro_model.train_Y) + self.assertAllClose(train_Y_tf, model.pyro_model.train_Y) if infer_noise: self.assertIsNone(model.pyro_model.train_Yvar) else: self.assertAllClose( - train_Yvar.clamp(MIN_INFERRED_NOISE_LEVEL), + train_Yvar_tf.clamp(MIN_INFERRED_NOISE_LEVEL), model.pyro_model.train_Yvar, ) @@ -345,14 +366,32 @@ def test_fit_model( # Check the keys in the state dict true_keys = EXPECTED_KEYS_NOISE if infer_noise else EXPECTED_KEYS + if use_outcome_transform: + true_keys = true_keys + [ + "outcome_transform.stdvs", + "outcome_transform._is_trained", + "outcome_transform._stdvs_sq", + "outcome_transform.means", + ] self.assertEqual(set(model.state_dict().keys()), set(true_keys)) # Check that we can load the state dict. state_dict = model.state_dict() _, _, _, m_new = self._get_data_and_model( - infer_noise=infer_noise, task_rank=task_rank, **tkwargs + infer_noise=infer_noise, + task_rank=task_rank, + use_outcome_transform=use_outcome_transform, + **tkwargs, ) - self.assertEqual(m_new.state_dict(), {}) + expected_state_dict = {} + if use_outcome_transform: + expected_state_dict.update( + { + "outcome_transform." + k: v + for k, v in model.outcome_transform.state_dict().items() + } + ) + self.assertEqual(m_new.state_dict(), expected_state_dict) m_new.load_state_dict(state_dict) self.assertEqual(model.state_dict().keys(), m_new.state_dict().keys()) for k in model.state_dict().keys(): @@ -377,12 +416,15 @@ def test_fit_model_float(self): def test_fit_model_infer_noise(self): self.test_fit_model(infer_noise=True, task_rank=2) + def test_fit_model_with_outcome_transform(self): + self.test_fit_model(use_outcome_transform=True) + def test_transforms(self, infer_noise: bool = False): tkwargs = {"device": self.device, "dtype": torch.double} train_X, train_Y, train_Yvar, test_X = self._get_unnormalized_data(**tkwargs) n, d = train_X.shape normalize_indices = torch.tensor( - list(range(train_X.shape[-1] - 1)), **{"device": self.device} + list(range(train_X.shape[-1] - 1)), device=self.device ) lb, ub = ( @@ -466,14 +508,14 @@ def test_acquisition_functions(self): posterior=mixed_list.posterior(test_X), sample_shape=torch.Size([2]) ) acquisition_functions = [ - ExpectedImprovement(model=model, best_f=train_Y.max()), + LogExpectedImprovement(model=model, best_f=train_Y.max()), ProbabilityOfImprovement(model=model, best_f=train_Y.max()), PosteriorMean(model=model), UpperConfidenceBound(model=model, beta=4), - qExpectedImprovement( + qLogExpectedImprovement( model=model, best_f=train_Y.max(), sampler=simple_sampler ), - qNoisyExpectedImprovement( + qLogNoisyExpectedImprovement( model=model, X_baseline=test_X, sampler=simple_sampler ), qProbabilityOfImprovement( @@ -481,13 +523,13 @@ def test_acquisition_functions(self): ), qSimpleRegret(model=model, sampler=simple_sampler), qUpperConfidenceBound(model=model, beta=4, sampler=simple_sampler), - qNoisyExpectedHypervolumeImprovement( + qLogNoisyExpectedHypervolumeImprovement( model=list_gp, X_baseline=test_X, ref_point=torch.zeros(2, **tkwargs), sampler=list_gp_sampler, ), - qExpectedHypervolumeImprovement( + qLogExpectedHypervolumeImprovement( model=list_gp, ref_point=torch.zeros(2, **tkwargs), sampler=list_gp_sampler, @@ -496,13 +538,13 @@ def test_acquisition_functions(self): ), ), # qEHVI/qNEHVI with mixed models - qNoisyExpectedHypervolumeImprovement( + qLogNoisyExpectedHypervolumeImprovement( model=mixed_list, X_baseline=test_X, ref_point=torch.zeros(2, **tkwargs), sampler=mixed_list_sampler, ), - qExpectedHypervolumeImprovement( + qLogExpectedHypervolumeImprovement( model=mixed_list, ref_point=torch.zeros(2, **tkwargs), sampler=mixed_list_sampler, @@ -522,14 +564,22 @@ def test_acquisition_functions(self): self.assertEqual(acqf(test_X).shape, torch.Size(batch_shape)) def test_load_samples(self): - for task_rank, dtype in itertools.product([1, 2], [torch.float, torch.double]): + for task_rank, dtype, use_outcome_transform in itertools.product( + [1, 2], [torch.float, torch.double], (False, True) + ): tkwargs = {"device": self.device, "dtype": dtype} train_X, train_Y, train_Yvar, model = self._get_data_and_model( - task_rank=task_rank, **tkwargs + task_rank=task_rank, + use_outcome_transform=use_outcome_transform, + **tkwargs, ) + d = train_X.shape[1] - 1 mcmc_samples = self._get_mcmc_samples( - num_samples=3, dim=d, task_rank=task_rank, **tkwargs + num_samples=3, + dim=d, + task_rank=task_rank, + **tkwargs, ) model.load_mcmc_samples(mcmc_samples) @@ -551,10 +601,24 @@ def test_load_samples(self): mcmc_samples["mean"], ) ) + + # Handle outcome transforms (if used) + train_Y_tf, train_Yvar_tf = train_Y, train_Yvar + if use_outcome_transform: + train_Y_tf, train_Yvar_tf = model.outcome_transform( + Y=train_Y, Yvar=train_Yvar + ) + + self.assertTrue( + torch.allclose( + model.pyro_model.train_Y, + train_Y_tf, + ) + ) self.assertTrue( torch.allclose( model.pyro_model.train_Yvar, - train_Yvar.clamp(MIN_INFERRED_NOISE_LEVEL), + train_Yvar_tf.clamp(MIN_INFERRED_NOISE_LEVEL), ) ) self.assertTrue( diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py index e2e3979f05..c26cd98f5b 100644 --- a/test/models/test_gp_regression.py +++ b/test/models/test_gp_regression.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import itertools +import math import warnings import torch @@ -115,9 +116,7 @@ def test_gp(self, double_only: bool = False): # test param sizes params = dict(model.named_parameters()) for p in params: - self.assertEqual( - params[p].numel(), m * torch.tensor(batch_shape).prod().item() - ) + self.assertEqual(params[p].numel(), m * math.prod(batch_shape)) # test posterior # test non batch evaluation @@ -133,18 +132,9 @@ def test_gp(self, double_only: bool = False): self.assertIsInstance(posterior_pred, GPyTorchPosterior) self.assertEqual(posterior_pred.mean.shape, expected_shape) self.assertEqual(posterior_pred.variance.shape, expected_shape) - if use_octf: - # ensure un-transformation is applied - tmp_tf = model.outcome_transform - del model.outcome_transform - pp_tf = model.posterior(X, observation_noise=True) - model.outcome_transform = tmp_tf - expected_var = tmp_tf.untransform_posterior(pp_tf).variance - self.assertAllClose(posterior_pred.variance, expected_var) - else: - pvar = posterior_pred.variance - pvar_exp = get_pvar_expected(posterior, model, X, m) - self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) + pvar = posterior_pred.variance + pvar_exp = get_pvar_expected(posterior=posterior, model=model, X=X, m=m) + self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) # Tensor valued observation noise. obs_noise = torch.rand(X.shape, **tkwargs) @@ -167,18 +157,9 @@ def test_gp(self, double_only: bool = False): posterior_pred = model.posterior(X, observation_noise=True) self.assertIsInstance(posterior_pred, GPyTorchPosterior) self.assertEqual(posterior_pred.mean.shape, expected_shape) - if use_octf: - # ensure un-transformation is applied - tmp_tf = model.outcome_transform - del model.outcome_transform - pp_tf = model.posterior(X, observation_noise=True) - model.outcome_transform = tmp_tf - expected_var = tmp_tf.untransform_posterior(pp_tf).variance - self.assertAllClose(posterior_pred.variance, expected_var) - else: - pvar = posterior_pred.variance - pvar_exp = get_pvar_expected(posterior, model, X, m) - self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) + pvar = posterior_pred.variance + pvar_exp = get_pvar_expected(posterior=posterior, model=model, X=X, m=m) + self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) # test batch evaluation with broadcasting for input_batch_shape in ([], [3], [1]): @@ -186,11 +167,10 @@ def test_gp(self, double_only: bool = False): if input_batch_shape == [3] and len(batch_shape) > 0: msg = ( - "Shape mismatch: objects cannot be broadcast to a" - " single shape" + "Shape mismatch: objects cannot be broadcast to a single shape" if m == 1 - else "The trailing batch dimensions of X must match" - " the trailing batch dimensions of the training inputs." + else "The trailing batch dimensions of X must match " + "the trailing batch dimensions of the training inputs." ) with self.assertRaisesRegex(RuntimeError, msg): model.posterior(X, observation_noise=True) diff --git a/test/models/test_gp_regression_fidelity.py b/test/models/test_gp_regression_fidelity.py index c44c403757..a297ef174b 100644 --- a/test/models/test_gp_regression_fidelity.py +++ b/test/models/test_gp_regression_fidelity.py @@ -11,10 +11,7 @@ from botorch.exceptions.errors import UnsupportedError from botorch.exceptions.warnings import OptimizationWarning from botorch.fit import fit_gpytorch_mll -from botorch.models.gp_regression_fidelity import ( - FixedNoiseMultiFidelityGP, - SingleTaskMultiFidelityGP, -) +from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP from botorch.models.transforms import Normalize, Standardize from botorch.posteriors import GPyTorchPosterior from botorch.sampling import SobolQMCNormalSampler @@ -45,6 +42,7 @@ def _get_random_data_with_fidelity( class TestSingleTaskMultiFidelityGP(BotorchTestCase): FIDELITY_TEST_PAIRS = ( + # (iteration_fidelity, data_fidelities) (None, [1]), (1, None), (None, [-1]), @@ -80,13 +78,10 @@ def _get_model_and_data( "train_Y": train_Y, "iteration_fidelity": iteration_fidelity, "linear_truncated": lin_truncated, + "outcome_transform": outcome_transform, + "input_transform": input_transform, } ) - - if outcome_transform is not None: - model_kwargs["outcome_transform"] = outcome_transform - if input_transform is not None: - model_kwargs["input_transform"] = input_transform model = SingleTaskMultiFidelityGP(**model_kwargs) return model, model_kwargs @@ -105,14 +100,6 @@ def test_init_error(self) -> None: SingleTaskMultiFidelityGP( train_X, train_Y, linear_truncated=lin_truncated, data_fidelities=[] ) - with self.assertRaises(ValueError): - SingleTaskMultiFidelityGP( - train_X, train_Y, data_fidelities=[1], data_fidelity=2 - ) - with self.assertWarnsRegex(DeprecationWarning, "data_fidelity"): - SingleTaskMultiFidelityGP( - train_X, train_Y, data_fidelity=1, linear_truncated=False - ) def test_gp(self) -> None: for iteration_fidelity, data_fidelities in self.FIDELITY_TEST_PAIRS: @@ -299,6 +286,7 @@ def test_condition_on_observations(self): "iteration_fidelity", "data_fidelities", "linear_truncated", + "outcome_transform", "input_transform", ): model_kwargs_non_batch[k] = v @@ -441,8 +429,7 @@ def test_construct_inputs(self): self.assertTrue(kwargs["train_Y"].equal(data_dict["train_Y"])) -class TestFixedNoiseMultiFidelityGP(TestSingleTaskMultiFidelityGP): - model_class = FixedNoiseMultiFidelityGP +class TestFixedNoiseSingleTaskMultiFidelityGP(TestSingleTaskMultiFidelityGP): def _get_model_and_data( self, @@ -471,17 +458,11 @@ def _get_model_and_data( "train_Yvar": train_Yvar, "iteration_fidelity": iteration_fidelity, "linear_truncated": lin_truncated, + "outcome_transform": outcome_transform, + "input_transform": input_transform, } ) - if outcome_transform is not None: - model_kwargs["outcome_transform"] = outcome_transform - if input_transform is not None: - model_kwargs["input_transform"] = input_transform - if self.model_class is FixedNoiseMultiFidelityGP: - with self.assertWarnsRegex(DeprecationWarning, "SingleTaskMultiFidelityGP"): - model = FixedNoiseMultiFidelityGP(**model_kwargs) - else: - model = self.model_class(**model_kwargs) + model = SingleTaskMultiFidelityGP(**model_kwargs) return model, model_kwargs def test_init_error(self): @@ -490,17 +471,9 @@ def test_init_error(self): train_Yvar = torch.full_like(train_Y, 0.01) for lin_truncated in (True, False): with self.assertRaises(UnsupportedError): - FixedNoiseMultiFidelityGP( + SingleTaskMultiFidelityGP( train_X, train_Y, train_Yvar, linear_truncated=lin_truncated ) - with self.assertRaises(ValueError): - FixedNoiseMultiFidelityGP( - train_X, train_Y, train_Yvar, data_fidelities=[1], data_fidelity=2 - ) - with self.assertWarnsRegex(DeprecationWarning, "data_fidelity"): - FixedNoiseMultiFidelityGP( - train_X, train_Y, train_Yvar, data_fidelity=1, linear_truncated=False - ) def test_fixed_noise_likelihood(self): for iteration_fidelity, data_fidelities in self.FIDELITY_TEST_PAIRS: @@ -571,8 +544,3 @@ def test_construct_inputs(self): self.assertEqual(data_dict.get("data_fidelities", None), [1]) self.assertTrue(kwargs["train_X"].equal(data_dict["train_X"])) self.assertTrue(kwargs["train_Y"].equal(data_dict["train_Y"])) - - -class TestFixedNoiseSingleTaskMultiFidelityGP(TestFixedNoiseMultiFidelityGP): - # Test SingleTaskMultiFidelityGP with observed noise. - model_class = SingleTaskMultiFidelityGP diff --git a/test/models/test_gp_regression_mixed.py b/test/models/test_gp_regression_mixed.py index ee2534622a..9429b47091 100644 --- a/test/models/test_gp_regression_mixed.py +++ b/test/models/test_gp_regression_mixed.py @@ -34,12 +34,16 @@ class TestMixedSingleTaskGP(BotorchTestCase): def test_gp(self): d = 3 bounds = torch.tensor([[-1.0] * d, [1.0] * d]) - for batch_shape, m, ncat, dtype, observed_noise in ( - (torch.Size(), 1, 0, torch.float, False), - (torch.Size(), 2, 1, torch.double, True), - (torch.Size([2]), 2, 3, torch.double, False), + for batch_shape, m, ncat, dtype, observed_noise, use_octf in ( + (torch.Size(), 1, 0, torch.float, False, False), + (torch.Size(), 2, 1, torch.double, True, True), + (torch.Size([2]), 2, 3, torch.double, False, True), ): tkwargs = {"device": self.device, "dtype": dtype} + # The model by default uses a `Standardize` outcome transform, so + # to test without that transform we need to explicitly pass in `None`. + outcome_transform_kwargs = {} if use_octf else {"outcome_transform": None} + train_X, train_Y = _get_random_data( batch_shape=batch_shape, m=m, d=d, **tkwargs ) @@ -70,6 +74,7 @@ def test_gp(self): train_Y=train_Y, cat_dims=cat_dims, train_Yvar=train_Yvar, + **outcome_transform_kwargs, ) self.assertEqual(model._ignore_X_dims_scaling_check, cat_dims) mll = ExactMarginalLogLikelihood(model.likelihood, model).to(**tkwargs) @@ -118,7 +123,7 @@ def test_gp(self): self.assertEqual(posterior_pred.mean.shape, expected_shape) self.assertEqual(posterior_pred.variance.shape, expected_shape) pvar = posterior_pred.variance - pvar_exp = get_pvar_expected(posterior, model, X, m) + pvar_exp = get_pvar_expected(posterior=posterior, model=model, X=X, m=m) self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) # test batch evaluation @@ -132,7 +137,7 @@ def test_gp(self): self.assertIsInstance(posterior_pred, GPyTorchPosterior) self.assertEqual(posterior_pred.mean.shape, expected_shape) pvar = posterior_pred.variance - pvar_exp = get_pvar_expected(posterior, model, X, m) + pvar_exp = get_pvar_expected(posterior=posterior, model=model, X=X, m=m) self.assertAllClose(pvar, pvar_exp, rtol=1e-4, atol=1e-5) # test that model converter throws an exception diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py index 2c901e47c7..f27080dafb 100644 --- a/test/models/test_model_list_gp_regression.py +++ b/test/models/test_model_list_gp_regression.py @@ -292,8 +292,11 @@ def test_ModelListGP_single(self): self.assertIsInstance(posterior, GPyTorchPosterior) self.assertIsInstance(posterior.distribution, MultivariateNormal) - def test_ModelListGP_multi_task(self): + def test_ModelListGP_multi_task(self, use_outcome_transform: bool = False): tkwargs = {"device": self.device, "dtype": torch.float} + outcome_transform_kwargs = ( + {} if use_outcome_transform else {"outcome_transform": None} + ) train_x_raw, train_y = _get_random_data( batch_shape=torch.Size(), m=1, n=10, **tkwargs ) @@ -306,6 +309,7 @@ def test_ModelListGP_multi_task(self): train_Y=train_y, task_feature=-1, output_tasks=[0], + **outcome_transform_kwargs, ) # Wrap a single single-output MTGP. model_list_gp = ModelListGP(model) @@ -326,6 +330,7 @@ def test_ModelListGP_multi_task(self): train_X=train_x, train_Y=train_y, task_feature=-1, + **outcome_transform_kwargs, ) model_list_gp = ModelListGP(model2) self.assertEqual(model_list_gp.num_outputs, 2) @@ -360,9 +365,7 @@ def test_ModelListGP_multi_task(self): self.assertEqual(len(subset_model.models), 2) # Test condition on observations model_s1 = SingleTaskGP( - train_X=train_x_raw, - train_Y=train_y, - outcome_transform=None, + train_X=train_x_raw, train_Y=train_y, **outcome_transform_kwargs ) model_list_gp = ModelListGP(model_s1, model2, deepcopy(model_s1)) model_list_gp.posterior(train_x_raw) @@ -375,11 +378,21 @@ def test_ModelListGP_multi_task(self): self.assertIsInstance(cm, ModelListGP) self.assertEqual(cm.num_outputs, 4) self.assertEqual(len(cm.models), 3) + # TODO: Figure out why the outcome transform changes the input shape... + exp_shape_stgp = ( + torch.Size([1, 15, 1]) if use_outcome_transform else torch.Size([15, 1]) + ) + exp_shape_mtgp = ( + torch.Size([1, 20, 2]) if use_outcome_transform else torch.Size([20, 2]) + ) for i in [0, 2]: self.assertIsInstance(cm.models[i], SingleTaskGP) - self.assertEqual(cm.models[i].train_inputs[0].shape, torch.Size([15, 1])) + self.assertEqual(cm.models[i].train_inputs[0].shape, exp_shape_stgp) self.assertIsInstance(cm.models[1], MultiTaskGP) - self.assertEqual(cm.models[1].train_inputs[0].shape, torch.Size([20, 2])) + self.assertEqual(cm.models[1].train_inputs[0].shape, exp_shape_mtgp) + + def test_ModelListGP_multi_task_outcome_transform(self): + self.test_ModelListGP_multi_task(use_outcome_transform=True) def test_transform_revert_train_inputs(self): tkwargs = {"device": self.device, "dtype": torch.float} diff --git a/test/utils/test_transforms.py b/test/utils/test_transforms.py index 8b95ecd971..07bdbc2791 100644 --- a/test/utils/test_transforms.py +++ b/test/utils/test_transforms.py @@ -340,7 +340,7 @@ def test_is_fully_bayesian(self): SingleTaskGP(train_X=X, train_Y=Y), MultiTaskGP(train_X=X, train_Y=Y, task_feature=-1), HigherOrderGP(train_X=X, train_Y=Y), - SingleTaskMultiFidelityGP(train_X=X, train_Y=Y, data_fidelity=3), + SingleTaskMultiFidelityGP(train_X=X, train_Y=Y, data_fidelities=[3]), MixedSingleTaskGP(train_X=X, train_Y=Y, cat_dims=[1]), PairwiseGP(datapoints=X, comparisons=None), ) @@ -382,7 +382,7 @@ def test_is_ensemble(self): SingleTaskGP(train_X=X, train_Y=Y), MultiTaskGP(train_X=X, train_Y=Y, task_feature=-1), HigherOrderGP(train_X=X, train_Y=Y), - SingleTaskMultiFidelityGP(train_X=X, train_Y=Y, data_fidelity=3), + SingleTaskMultiFidelityGP(train_X=X, train_Y=Y, data_fidelities=[3]), MixedSingleTaskGP(train_X=X, train_Y=Y, cat_dims=[1]), PairwiseGP(datapoints=X, comparisons=None), )