Merge branch 'main' into fix_mo_posterior

pytorch · Oct 24, 2024 · 4a1de1b · 4a1de1b
2 parents 4f2af4f + ccf278a
commit 4a1de1b
Show file tree

Hide file tree

Showing 14 changed files with 443 additions and 136 deletions.
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
@@ -80,6 +80,7 @@
     LearnedObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
+    ScalarizedPosteriorTransform,
 )
 from botorch.acquisition.preference import (
     AnalyticExpectedUtilityOfBestOption,
@@ -1800,8 +1801,8 @@ def construct_inputs_qJES(
     model: Model,
     bounds: list[tuple[float, float]],
     num_optima: int = 64,
-    maximize: bool = True,
     condition_noiseless: bool = True,
+    posterior_transform: ScalarizedPosteriorTransform | None = None,
     X_pending: Tensor | None = None,
     estimation_type: str = "LB",
     num_samples: int = 64,
@@ -1811,15 +1812,16 @@ def construct_inputs_qJES(
         model=model,
         bounds=torch.as_tensor(bounds, dtype=dtype).T,
         num_optima=num_optima,
-        maximize=maximize,
+        posterior_transform=posterior_transform,
+        return_transformed=True,
     )
 
     inputs = {
         "model": model,
         "optimal_inputs": optimal_inputs,
         "optimal_outputs": optimal_outputs,
         "condition_noiseless": condition_noiseless,
-        "maximize": maximize,
+        "posterior_transform": posterior_transform,
         "X_pending": X_pending,
         "estimation_type": estimation_type,
         "num_samples": num_samples,

diff --git a/botorch/acquisition/joint_entropy_search.py b/botorch/acquisition/joint_entropy_search.py
@@ -74,7 +74,6 @@ def __init__(
         posterior_transform: PosteriorTransform | None = None,
         X_pending: Tensor | None = None,
         estimation_type: str = "LB",
-        maximize: bool = True,
         num_samples: int = 64,
     ) -> None:
         r"""Joint entropy search acquisition function.
@@ -91,11 +90,11 @@ def __init__(
                 [Tu2022joint]_. These are sampled identically, so this only controls
                 the fashion in which the GP is reshaped as a result of conditioning
                 on the optimum.
+            posterior_transform: PosteriorTransform to negate or scalarize the output.
             estimation_type: estimation_type: A string to determine which entropy
                 estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC").
                 Lower Bound is recommended due to the relatively high variance
                 of the MC estimator.
-            maximize: If true, we consider a maximization problem.
             X_pending: A `m x d`-dim Tensor of `m` design points that have been
                 submitted for function evaluation, but have not yet been evaluated.
             num_samples: The number of Monte Carlo samples used for the Monte Carlo
@@ -112,16 +111,13 @@ def __init__(
         # and three-dimensional otherwise.
         self.optimal_inputs = optimal_inputs.unsqueeze(-2)
         self.optimal_outputs = optimal_outputs.unsqueeze(-2)
+        self.optimal_output_values = (
+            posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
+            if posterior_transform
+            else self.optimal_outputs
+        )
         self.posterior_transform = posterior_transform
-        self.maximize = maximize
-
-        # The optima (can be maxima, can be minima) come in as the largest
-        # values if we optimize, or the smallest (likely substantially negative)
-        # if we minimize. Inside the acquisition function, however, we always
-        # want to consider MAX-values. As such, we need to flip them if
-        # we want to minimize.
-        if not self.maximize:
-            optimal_outputs = -optimal_outputs
+
         self.num_samples = optimal_inputs.shape[0]
         self.condition_noiseless = condition_noiseless
         self.initial_model = model
@@ -203,7 +199,9 @@ def _compute_lower_bound_information_gain(
             A `batch_shape`-dim Tensor of acquisition values at the given design
             points `X`.
         """
-        initial_posterior = self.initial_model.posterior(X, observation_noise=True)
+        initial_posterior = self.initial_model.posterior(
+            X, observation_noise=True, posterior_transform=self.posterior_transform
+        )
         # need to check if there is a two-dimensional batch shape -
         # the sampled optima appear in the dimension right after
         batch_shape = X.shape[:-2]
@@ -221,15 +219,17 @@ def _compute_lower_bound_information_gain(
 
         # Compute the mixture mean and variance
         posterior_m = self.conditional_model.posterior(
-            X.unsqueeze(MCMC_DIM), observation_noise=True
+            X.unsqueeze(MCMC_DIM),
+            observation_noise=True,
+            posterior_transform=self.posterior_transform,
         )
         noiseless_var = self.conditional_model.posterior(
-            X.unsqueeze(MCMC_DIM), observation_noise=False
+            X.unsqueeze(MCMC_DIM),
+            observation_noise=False,
+            posterior_transform=self.posterior_transform,
         ).variance
 
         mean_m = posterior_m.mean
-        if not self.maximize:
-            mean_m = -mean_m
         variance_m = posterior_m.variance
 
         check_no_nans(variance_m)
@@ -240,7 +240,7 @@ def _compute_lower_bound_information_gain(
             torch.zeros(1, device=X.device, dtype=X.dtype),
             torch.ones(1, device=X.device, dtype=X.dtype),
         )
-        normalized_mvs = (self.optimal_outputs - mean_m) / stdv
+        normalized_mvs = (self.optimal_output_values - mean_m) / stdv
         cdf_mvs = normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
         pdf_mvs = torch.exp(normal.log_prob(normalized_mvs))
 
@@ -294,7 +294,9 @@ def _compute_monte_carlo_information_gain(
             A `batch_shape`-dim Tensor of acquisition values at the given design
             points `X`.
         """
-        initial_posterior = self.initial_model.posterior(X, observation_noise=True)
+        initial_posterior = self.initial_model.posterior(
+            X, observation_noise=True, posterior_transform=self.posterior_transform
+        )
 
         batch_shape = X.shape[:-2]
         sample_dim = len(batch_shape)
@@ -311,15 +313,17 @@ def _compute_monte_carlo_information_gain(
 
         # Compute the mixture mean and variance
         posterior_m = self.conditional_model.posterior(
-            X.unsqueeze(MCMC_DIM), observation_noise=True
+            X.unsqueeze(MCMC_DIM),
+            observation_noise=True,
+            posterior_transform=self.posterior_transform,
         )
         noiseless_var = self.conditional_model.posterior(
-            X.unsqueeze(MCMC_DIM), observation_noise=False
+            X.unsqueeze(MCMC_DIM),
+            observation_noise=False,
+            posterior_transform=self.posterior_transform,
         ).variance
 
         mean_m = posterior_m.mean
-        if not self.maximize:
-            mean_m = -mean_m
         variance_m = posterior_m.variance.clamp_min(CLAMP_LB)
         conditional_samples, conditional_logprobs = self._compute_monte_carlo_variables(
             posterior_m

diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py
@@ -18,6 +18,7 @@
     IdentityMCObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
+    ScalarizedPosteriorTransform,
 )
 from botorch.exceptions.errors import (
     BotorchTensorDimensionError,
@@ -28,10 +29,11 @@
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
 from botorch.sampling.get_sampler import get_sampler
-from botorch.sampling.pathwise import draw_matheron_paths
+from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
 from botorch.utils.objective import compute_feasibility_indicator
 from botorch.utils.sampling import optimize_posterior_samples
 from botorch.utils.transforms import is_ensemble, normalize_indices
+from gpytorch.models import GP
 from torch import Tensor
 
 
@@ -486,36 +488,62 @@ def project_to_sample_points(X: Tensor, sample_points: Tensor) -> Tensor:
 
 
 def get_optimal_samples(
-    model: Model,
+    model: GP,
     bounds: Tensor,
     num_optima: int,
     raw_samples: int = 1024,
     num_restarts: int = 20,
-    maximize: bool = True,
+    posterior_transform: ScalarizedPosteriorTransform | None = None,
+    objective: MCAcquisitionObjective | None = None,
+    return_transformed: bool = False,
 ) -> tuple[Tensor, Tensor]:
     """Draws sample paths from the posterior and maximizes the samples using GD.
 
     Args:
-        model (Model): The model from which samples are drawn.
-        bounds: (Tensor): Bounds of the search space. If the model inputs are
+        model: The model from which samples are drawn.
+        bounds: Bounds of the search space. If the model inputs are
             normalized, the bounds should be normalized as well.
-        num_optima (int): The number of paths to be drawn and optimized.
-        raw_samples (int, optional): The number of candidates randomly sample.
+        num_optima: The number of paths to be drawn and optimized.
+        raw_samples: The number of candidates randomly sample.
             Defaults to 1024.
-        num_restarts (int, optional): The number of candidates to do gradient-based
+        num_restarts: The number of candidates to do gradient-based
             optimization on. Defaults to 20.
-        maximize: Whether to maximize or minimize the samples.
+        posterior_transform: A ScalarizedPosteriorTransform (may e.g. be used to
+            scalarize multi-output models or negate the objective).
+        objective: An MCAcquisitionObjective, used to negate the objective or otherwise
+            transform sample outputs. Cannot be combined with `posterior_transform`.
+        return_transformed: If True, return the transformed samples.
+
     Returns:
-        Tuple[Tensor, Tensor]: The optimal input locations and corresponding
-        outputs, x* and f*.
+        The optimal input locations and corresponding outputs, x* and f*.
 
     """
-    paths = draw_matheron_paths(model, sample_shape=torch.Size([num_optima]))
+    if posterior_transform and not isinstance(
+        posterior_transform, ScalarizedPosteriorTransform
+    ):
+        raise ValueError(
+            "Only the ScalarizedPosteriorTransform is supported for "
+            "get_optimal_samples."
+        )
+    if posterior_transform and objective:
+        raise ValueError(
+            "Only one of `posterior_transform` and `objective` can be specified."
+        )
+
+    if posterior_transform:
+        sample_transform = posterior_transform.evaluate
+    elif objective:
+        sample_transform = objective
+    else:
+        sample_transform = None
+
+    paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima]))
     optimal_inputs, optimal_outputs = optimize_posterior_samples(
-        paths,
+        paths=paths,
         bounds=bounds,
         raw_samples=raw_samples,
         num_restarts=num_restarts,
-        maximize=maximize,
+        sample_transform=sample_transform,
+        return_transformed=return_transformed,
     )
     return optimal_inputs, optimal_outputs
diff --git a/botorch/optim/optimize.py b/botorch/optim/optimize.py
@@ -1000,6 +1000,8 @@ def optimize_acqf_discrete(
     choices: Tensor,
     max_batch_size: int = 2048,
     unique: bool = True,
+    X_avoid: Tensor | None = None,
+    inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
 ) -> tuple[Tensor, Tensor]:
     r"""Optimize over a discrete set of points using batch evaluation.
 
@@ -1017,6 +1019,12 @@ def optimize_acqf_discrete(
             a large training set.
         unique: If True return unique choices, o/w choices may be repeated
             (only relevant if `q > 1`).
+        X_avoid: An `n x d` tensor of candidates that we aren't allowed to pick.
+            These will be removed from the set of choices.
+        inequality constraints: A list of tuples (indices, coefficients, rhs),
+            with each tuple encoding an inequality constraint of the form
+            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`.
+            Infeasible points will be removed from the set of choices.
 
     Returns:
         A two-element tuple containing
@@ -1029,8 +1037,31 @@ def optimize_acqf_discrete(
             "Discrete optimization is not supported for"
             "one-shot acquisition functions."
         )
-    if choices.numel() == 0:
-        raise InputDataError("`choices` must be non-emtpy.")
+    if X_avoid is not None and unique:
+        choices = _filter_invalid(X=choices, X_avoid=X_avoid)
+    if inequality_constraints is not None:
+        choices = _filter_infeasible(
+            X=choices, inequality_constraints=inequality_constraints
+        )
+    len_choices = len(choices)
+    if len_choices == 0:
+        message = "`choices` must be non-empty."
+        if X_avoid is not None or inequality_constraints is not None:
+            message += (
+                " No feasible points remain after removing `X_avoid` and "
+                "filtering out infeasible points."
+            )
+        raise InputDataError(message)
+    elif len_choices < q and unique:
+        warnings.warn(
+            (
+                f"Requested {q=} candidates from fully discrete search "
+                f"space, but only {len_choices} possible choices remain. "
+            ),
+            OptimizationWarning,
+            stacklevel=2,
+        )
+        q = len_choices
     choices_batched = choices.unsqueeze(-2)
     if q > 1:
         candidate_list, acq_value_list = [], []
@@ -1081,7 +1112,7 @@ def _generate_neighbors(
     discrete_choices: list[Tensor],
     X_avoid: Tensor,
     inequality_constraints: list[tuple[Tensor, Tensor, float]],
-):
+) -> Tensor:
     # generate all 1D perturbations
     npts = sum([len(c) for c in discrete_choices])
     X_loc = x.repeat(npts, 1)
@@ -1097,15 +1128,15 @@ def _generate_neighbors(
 
 def _filter_infeasible(
     X: Tensor, inequality_constraints: list[tuple[Tensor, Tensor, float]]
-):
+) -> Tensor:
     """Remove all points from `X` that don't satisfy the constraints."""
     is_feasible = torch.ones(X.shape[0], dtype=torch.bool, device=X.device)
     for inds, weights, bound in inequality_constraints:
         is_feasible &= (X[..., inds] * weights).sum(dim=-1) >= bound
     return X[is_feasible]
 
 
-def _filter_invalid(X: Tensor, X_avoid: Tensor):
+def _filter_invalid(X: Tensor, X_avoid: Tensor) -> Tensor:
     """Remove all occurences of `X_avoid` from `X`."""
     return X[~(X == X_avoid.unsqueeze(-2)).all(dim=-1).any(dim=-2)]