Skip to content

Commit

Permalink
Use Standardize outcome transform by default in more models (#2532)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2532

Makes models which had their priors updated in #2507 use the `Standardize` outcome transform by default, mimicking #2458

Also removes some deprecated functionality in the process, namely the `data_fidelity` argument to `SingleTaskMultiFidelityGP` as well as the `FixedNoiseMultiFidelityGP` and `FixedNoiseLCEMGP` models.

Reviewed By: saitcakmak, esantorella

Differential Revision: D62552307
  • Loading branch information
Balandat authored and facebook-github-bot committed Sep 14, 2024
1 parent c895a8d commit 7f29dda
Show file tree
Hide file tree
Showing 16 changed files with 342 additions and 309 deletions.
40 changes: 38 additions & 2 deletions botorch/models/approximate_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from typing import Optional, TypeVar, Union

import torch
from botorch.exceptions.warnings import UserInputWarning
from botorch.models.gpytorch import GPyTorchModel
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import OutcomeTransform
Expand Down Expand Up @@ -70,6 +71,14 @@


TApproxModel = TypeVar("TApproxModel", bound="ApproximateGPyTorchModel")
TRANSFORM_WARNING = (
"Using an {ttype} transform with `SingleTaskVariationalGP`. If this "
"model is trained in minibatches, a {ttype} transform with learnable "
"parameters would update its parameters for each minibatch, which is "
"undesirable. If you do intend to train in minibatches, we recommend "
"you not use a {ttype} transform and instead pre-transform your whole "
"data set before fitting the model."
)


class ApproximateGPyTorchModel(GPyTorchModel):
Expand Down Expand Up @@ -325,9 +334,9 @@ def __init__(
variational_distribution: Optional[_VariationalDistribution] = None,
variational_strategy: type[_VariationalStrategy] = VariationalStrategy,
inducing_points: Optional[Union[Tensor, int]] = None,
outcome_transform: Optional[OutcomeTransform] = None,
input_transform: Optional[InputTransform] = None,
inducing_point_allocator: Optional[InducingPointAllocator] = None,
outcome_transform: OutcomeTransform | None = None,
input_transform: InputTransform | None = None,
) -> None:
r"""
Args:
Expand All @@ -338,6 +347,8 @@ def __init__(
either a `GaussianLikelihood` (if `num_outputs=1`) or a
`MultitaskGaussianLikelihood`(if `num_outputs>1`).
num_outputs: Number of output responses per input (default: 1).
learn_inducing_points: If True, the inducing point locations are learned
jointly with the other model parameters.
covar_module: Kernel function. If omitted, uses an `RBFKernel`.
mean_module: Mean of GP model. If omitted, uses a `ConstantMean`.
variational_distribution: Type of variational distribution to use
Expand All @@ -351,13 +362,32 @@ def __init__(
inducing_point_allocator: The `InducingPointAllocator` used to
initialize the inducing point locations. If omitted,
uses `GreedyVarianceReduction`.
outcome_transform: An outcome transform that is applied to the training
data during instantiation and to the posterior during inference.
NOTE: If this model is trained in minibatches, an outcome transform
with learnable parameters (such as `Standardize`) would update its
parameters for each minibatch, which is undesirable. If you do intend
to train in minibatches, we recommend you not use an outcome transform
and instead pre-transform your whole data set before fitting the model.
input_transform: An input transform that is applied in the model's
forward pass.
NOTE: If this model is trained in minibatches, an input transform
with learnable parameters (such as `Normalize`) would update its
parameters for each minibatch, which is undesirable. If you do intend
to train in minibatches, we recommend you not use an input transform
and instead pre-transform your whole data set before fitting the model.
"""
with torch.no_grad():
transformed_X = self.transform_inputs(
X=train_X, input_transform=input_transform
)
if train_Y is not None:
if outcome_transform is not None:
warnings.warn(
TRANSFORM_WARNING.format(ttype="outcome"),
UserInputWarning,
stacklevel=3,
)
train_Y, _ = outcome_transform(train_Y)
self._validate_tensor_args(X=transformed_X, Y=train_Y)
validate_input_scaling(train_X=transformed_X, train_Y=train_Y)
Expand Down Expand Up @@ -388,6 +418,7 @@ def __init__(
"being further optimized during the model fit. If so "
"then set `learn_inducing_points` to False.",
UserWarning,
stacklevel=3,
)

if inducing_point_allocator is None:
Expand All @@ -412,6 +443,11 @@ def __init__(
if outcome_transform is not None:
self.outcome_transform = outcome_transform
if input_transform is not None:
warnings.warn(
TRANSFORM_WARNING.format(ttype="input"),
UserInputWarning,
stacklevel=3,
)
self.input_transform = input_transform

# for model fitting utilities
Expand Down
73 changes: 8 additions & 65 deletions botorch/models/contextual_multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
Advances in Neural Information Processing Systems 33, NeurIPS 2020.
"""

import warnings
from typing import Any, Optional, Union

import torch
from botorch.models.multitask import MultiTaskGP
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import OutcomeTransform
from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset
from botorch.utils.types import _DefaultType, DEFAULT
from gpytorch.constraints import Interval
from gpytorch.kernels.rbf_kernel import RBFKernel
from gpytorch.likelihoods.likelihood import Likelihood
Expand Down Expand Up @@ -51,8 +51,8 @@ def __init__(
embs_dim_list: Optional[list[int]] = None,
output_tasks: Optional[list[int]] = None,
all_tasks: Optional[list[int]] = None,
outcome_transform: OutcomeTransform | _DefaultType | None = DEFAULT,
input_transform: Optional[InputTransform] = None,
outcome_transform: Optional[OutcomeTransform] = None,
) -> None:
r"""
Args:
Expand Down Expand Up @@ -85,12 +85,14 @@ def __init__(
training data. Note that when a task is not observed, the corresponding
task covariance will heavily depend on random initialization and may
behave unexpectedly.
input_transform: An input transform that is applied in the model's
forward pass.
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
`.posterior` on the model will be on the original scale). We use a
`Standardize` transform if no `outcome_transform` is specified.
Pass down `None` to use no outcome transform.
input_transform: An input transform that is applied in the model's
forward pass.
"""
super().__init__(
train_X=train_X,
Expand All @@ -102,8 +104,8 @@ def __init__(
likelihood=likelihood,
output_tasks=output_tasks,
all_tasks=all_tasks,
input_transform=input_transform,
outcome_transform=outcome_transform,
input_transform=input_transform,
)
self.device = train_X.device
if all_tasks is None:
Expand Down Expand Up @@ -247,62 +249,3 @@ def construct_inputs(
if embs_dim_list is not None:
base_inputs["embs_dim_list"] = embs_dim_list
return base_inputs


class FixedNoiseLCEMGP(LCEMGP):
r"""The Multi-Task GP the latent context embedding multioutput
(LCE-M) kernel, with known observation noise.
DEPRECATED: Please use `LCEMGP` with `train_Yvar` instead.
Will be removed in a future release (~v0.11).
"""

def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
train_Yvar: Tensor,
task_feature: int,
context_cat_feature: Optional[Tensor] = None,
context_emb_feature: Optional[Tensor] = None,
embs_dim_list: Optional[list[int]] = None,
output_tasks: Optional[list[int]] = None,
) -> None:
r"""
Args:
train_X: (n x d) X training data.
train_Y: (n x 1) Y training data.
train_Yvar: (n x 1) Observed variances of each training Y.
task_feature: Column index of train_X to get context indices.
context_cat_feature: (n_contexts x k) one-hot encoded context
features. Rows are ordered by context indices, where k is the
number of categorical variables. If None, task indices will
be used and k = 1.
context_emb_feature: (n_contexts x m) pre-given continuous
embedding features. Rows are ordered by context indices.
embs_dim_list: Embedding dimension for each categorical variable.
The length equals to k. If None, the embedding dimension is set to
1 for each categorical variable.
output_tasks: A list of task indices for which to compute model
outputs for. If omitted, return outputs for all task indices.
"""
warnings.warn(
"`FixedNoiseLCEMGP` has been deprecated and will be removed in a "
"future release. Please use the `LCEMGP` model instead. "
"When `train_Yvar` is specified, `LCEMGP` behaves the same "
"as the `FixedNoiseLCEMGP`.",
DeprecationWarning,
stacklevel=2,
)

super().__init__(
train_X=train_X,
train_Y=train_Y,
task_feature=task_feature,
train_Yvar=train_Yvar,
context_cat_feature=context_cat_feature,
context_emb_feature=context_emb_feature,
embs_dim_list=embs_dim_list,
output_tasks=output_tasks,
)
8 changes: 7 additions & 1 deletion botorch/models/fully_bayesian_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class SaasFullyBayesianMultiTaskGP(MultiTaskGP):
This model assumes that the inputs have been normalized to [0, 1]^d and that the
output has been stratified standardized to have zero mean and unit variance for
each task.The SAAS model [Eriksson2021saasbo]_ with a Matern-5/2 is used as data
each task. The SAAS model [Eriksson2021saasbo]_ with a Matern-5/2 is used as data
kernel by default.
You are expected to use `fit_fully_bayesian_model_nuts` to fit this model as it
Expand Down Expand Up @@ -243,6 +243,7 @@ def __init__(
X=train_X, input_transform=input_transform
)
if outcome_transform is not None:
outcome_transform.train() # Ensure we learn parameters here on init
train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar)
if train_Yvar is not None: # Clamp after transforming
train_Yvar = train_Yvar.clamp(MIN_INFERRED_NOISE_LEVEL)
Expand All @@ -254,6 +255,11 @@ def __init__(
task_feature=task_feature,
output_tasks=output_tasks,
rank=rank,
# We already transformed the data above, this avoids applying the
# default `Standardize` transform twice. As outcome_transform is
# set on `self` below, it will be applied to the posterior in the
# `posterior` method of `MultiTaskGP`.
outcome_transform=None,
)
if all_tasks is not None and self._expected_task_values != set(all_tasks):
raise NotImplementedError(
Expand Down
80 changes: 16 additions & 64 deletions botorch/models/gp_regression_fidelity.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@

from __future__ import annotations

import warnings
from typing import Any, Optional, Union
from typing import Any, Sequence

import torch
from botorch.exceptions.errors import UnsupportedError
Expand All @@ -40,6 +39,7 @@
from botorch.models.transforms.outcome import OutcomeTransform
from botorch.models.utils.gpytorch_modules import get_covar_module_with_dim_scaled_prior
from botorch.utils.datasets import SupervisedDataset
from botorch.utils.types import _DefaultType, DEFAULT
from gpytorch.kernels.kernel import ProductKernel
from gpytorch.kernels.scale_kernel import ScaleKernel
from gpytorch.likelihoods.likelihood import Likelihood
Expand All @@ -66,15 +66,14 @@ def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
train_Yvar: Optional[Tensor] = None,
iteration_fidelity: Optional[int] = None,
data_fidelities: Optional[Union[list[int], tuple[int]]] = None,
data_fidelity: Optional[int] = None,
train_Yvar: Tensor | None = None,
iteration_fidelity: int | None = None,
data_fidelities: Sequence[int] | None = None,
linear_truncated: bool = True,
nu: float = 2.5,
likelihood: Optional[Likelihood] = None,
outcome_transform: Optional[OutcomeTransform] = None,
input_transform: Optional[InputTransform] = None,
likelihood: Likelihood | None = None,
outcome_transform: OutcomeTransform | _DefaultType | None = DEFAULT,
input_transform: InputTransform | None = None,
) -> None:
r"""
Args:
Expand All @@ -89,33 +88,21 @@ def __init__(
data_fidelities: The column indices for the downsampling fidelity parameter.
If a list/tuple of indices is provided, a kernel will be constructed for
each index (optional).
data_fidelity: The column index for the downsampling fidelity parameter
(optional). Deprecated in favor of `data_fidelities`.
linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead
of the default kernel.
nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or
5/2. Only used when `linear_truncated=True`.
likelihood: A likelihood. If omitted, use a standard GaussianLikelihood
with inferred noise level.
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale). We use a
`Standardize` transform if no `outcome_transform` is specified.
Pass down `None` to use no outcome transform.
input_transform: An input transform that is applied in the model's
forward pass.
"""
if data_fidelity is not None:
warnings.warn(
"The `data_fidelity` argument is deprecated and will be removed in "
"a future release. Please use `data_fidelities` instead.",
DeprecationWarning,
)
if data_fidelities is not None:
raise ValueError(
"Cannot specify both `data_fidelity` and `data_fidelities`."
)
data_fidelities = [data_fidelity]

self._init_args = {
"iteration_fidelity": iteration_fidelity,
"data_fidelities": data_fidelities,
Expand Down Expand Up @@ -179,47 +166,11 @@ def construct_inputs(
return inputs


class FixedNoiseMultiFidelityGP(SingleTaskMultiFidelityGP):
def __init__(
self,
train_X: Tensor,
train_Y: Tensor,
train_Yvar: Tensor,
iteration_fidelity: Optional[int] = None,
data_fidelities: Optional[Union[list[int], tuple[int]]] = None,
data_fidelity: Optional[int] = None,
linear_truncated: bool = True,
nu: float = 2.5,
outcome_transform: Optional[OutcomeTransform] = None,
input_transform: Optional[InputTransform] = None,
) -> None:
r"""DEPRECATED: Use `SingleTaskMultiFidelityGP` instead.
Will be removed in a future release (~v0.11).
"""
warnings.warn(
"`FixedNoiseMultiFidelityGP` has been deprecated. "
"Use `SingleTaskMultiFidelityGP` with `train_Yvar` instead.",
DeprecationWarning,
)
super().__init__(
train_X=train_X,
train_Y=train_Y,
train_Yvar=train_Yvar,
iteration_fidelity=iteration_fidelity,
data_fidelities=data_fidelities,
data_fidelity=data_fidelity,
linear_truncated=linear_truncated,
nu=nu,
outcome_transform=outcome_transform,
input_transform=input_transform,
)


def _setup_multifidelity_covar_module(
dim: int,
aug_batch_shape: torch.Size,
iteration_fidelity: Optional[int],
data_fidelities: Optional[list[int]],
iteration_fidelity: int | None,
data_fidelities: Sequence[int] | None,
linear_truncated: bool,
nu: float,
) -> tuple[ScaleKernel, dict]:
Expand All @@ -246,6 +197,7 @@ def _setup_multifidelity_covar_module(
if iteration_fidelity is not None and iteration_fidelity < 0:
iteration_fidelity = dim + iteration_fidelity
if data_fidelities is not None:
data_fidelities = list(data_fidelities)
for i in range(len(data_fidelities)):
if data_fidelities[i] < 0:
data_fidelities[i] = dim + data_fidelities[i]
Expand Down
Loading

0 comments on commit 7f29dda

Please sign in to comment.