From dc5fb3cb62b05391189dfc80f591b7b04895f78c Mon Sep 17 00:00:00 2001 From: Carl Hvarfner Date: Mon, 21 Oct 2024 19:07:54 -0700 Subject: [PATCH] Remove posterior_transform from info-theoretic acquisition functions (#2590) Summary: Pull Request resolved: https://github.com/pytorch/botorch/pull/2590 Differential Revision: D64698976 --- botorch/acquisition/input_constructors.py | 16 +++++++-- botorch/acquisition/joint_entropy_search.py | 35 ++++++++++--------- .../acquisition/input_constructors.py | 10 ++++-- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py index deb8433321..04227216ea 100644 --- a/botorch/acquisition/input_constructors.py +++ b/botorch/acquisition/input_constructors.py @@ -1800,18 +1800,30 @@ def construct_inputs_qJES( model: Model, bounds: list[tuple[float, float]], num_optima: int = 64, - maximize: bool = True, condition_noiseless: bool = True, + maximize: bool = True, X_pending: Tensor | None = None, estimation_type: str = "LB", num_samples: int = 64, ): dtype = model.train_targets.dtype + if not maximize: + from botorch.acquisition.objective import LinearMCObjective + + negating_objective = ( + None + if maximize + else LinearMCObjective(weights=-torch.ones(model.num_outputs, dtype=dtype)) + ) + else: + negating_objective = None + optimal_inputs, optimal_outputs = get_optimal_samples( model=model, bounds=torch.as_tensor(bounds, dtype=dtype).T, num_optima=num_optima, - maximize=maximize, + objective=negating_objective, + return_transformed=True, ) inputs = { diff --git a/botorch/acquisition/joint_entropy_search.py b/botorch/acquisition/joint_entropy_search.py index afd8165f3b..e6c9773467 100644 --- a/botorch/acquisition/joint_entropy_search.py +++ b/botorch/acquisition/joint_entropy_search.py @@ -28,7 +28,6 @@ import torch from botorch import settings from botorch.acquisition.acquisition import AcquisitionFunction, MCSamplerMixin -from botorch.acquisition.objective import PosteriorTransform from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL @@ -71,7 +70,6 @@ def __init__( optimal_inputs: Tensor, optimal_outputs: Tensor, condition_noiseless: bool = True, - posterior_transform: PosteriorTransform | None = None, X_pending: Tensor | None = None, estimation_type: str = "LB", maximize: bool = True, @@ -95,9 +93,9 @@ def __init__( estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC"). Lower Bound is recommended due to the relatively high variance of the MC estimator. - maximize: If true, we consider a maximization problem. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation, but have not yet been evaluated. + maximize: If true, we consider a maximization problem. num_samples: The number of Monte Carlo samples used for the Monte Carlo estimate. """ @@ -110,18 +108,17 @@ def __init__( # inputs come as num_optima_per_model x (num_models) x d # but we want it four-dimensional in the Fully bayesian case, # and three-dimensional otherwise. - self.optimal_inputs = optimal_inputs.unsqueeze(-2) - self.optimal_outputs = optimal_outputs.unsqueeze(-2) - self.posterior_transform = posterior_transform - self.maximize = maximize - # The optima (can be maxima, can be minima) come in as the largest # values if we optimize, or the smallest (likely substantially negative) # if we minimize. Inside the acquisition function, however, we always # want to consider MAX-values. As such, we need to flip them if # we want to minimize. + self.maximize = maximize if not self.maximize: optimal_outputs = -optimal_outputs + self.optimal_inputs = optimal_inputs.unsqueeze(-2) + self.optimal_outputs = optimal_outputs.unsqueeze(-2) + self.num_samples = optimal_inputs.shape[0] self.condition_noiseless = condition_noiseless self.initial_model = model @@ -138,7 +135,8 @@ def __init__( with settings.propagate_grads(False): # We must do a forward pass one before conditioning. self.initial_model.posterior( - self.optimal_inputs[:1], observation_noise=False + self.optimal_inputs[:1], + observation_noise=False, ) # This equates to the JES version proposed by Hvarfner et. al. @@ -221,15 +219,15 @@ def _compute_lower_bound_information_gain( # Compute the mixture mean and variance posterior_m = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, ) noiseless_var = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=False + X.unsqueeze(MCMC_DIM), + observation_noise=False, ).variance mean_m = posterior_m.mean - if not self.maximize: - mean_m = -mean_m variance_m = posterior_m.variance check_no_nans(variance_m) @@ -294,7 +292,10 @@ def _compute_monte_carlo_information_gain( A `batch_shape`-dim Tensor of acquisition values at the given design points `X`. """ - initial_posterior = self.initial_model.posterior(X, observation_noise=True) + initial_posterior = self.initial_model.posterior( + X, + observation_noise=True, + ) batch_shape = X.shape[:-2] sample_dim = len(batch_shape) @@ -311,10 +312,12 @@ def _compute_monte_carlo_information_gain( # Compute the mixture mean and variance posterior_m = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, ) noiseless_var = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=False + X.unsqueeze(MCMC_DIM), + observation_noise=False, ).variance mean_m = posterior_m.mean diff --git a/botorch_community/acquisition/input_constructors.py b/botorch_community/acquisition/input_constructors.py index 9b10721698..2da8fe10bb 100644 --- a/botorch_community/acquisition/input_constructors.py +++ b/botorch_community/acquisition/input_constructors.py @@ -17,6 +17,7 @@ import torch from botorch.acquisition.input_constructors import acqf_input_constructor +from botorch.acquisition.objective import LinearMCObjective from botorch.acquisition.utils import get_optimal_samples from botorch.models.model import Model from botorch_community.acquisition.bayesian_active_learning import ( @@ -68,13 +69,18 @@ def construct_inputs_SCoreBO( ): dtype = model.train_targets.dtype # the number of optima are per model + negating_objective = ( + None + if maximize + else LinearMCObjective(weights=-torch.ones(model.num_outputs, dtype=dtype)) + ) optimal_inputs, optimal_outputs = get_optimal_samples( model=model, bounds=torch.as_tensor(bounds, dtype=dtype).T, num_optima=num_optima, - maximize=maximize, + objective=negating_objective, + return_transformed=True, ) - inputs = { "model": model, "optimal_inputs": optimal_inputs,