From 02f1b1a1f420fd2891a0ed7af8ee9d2d3f78f271 Mon Sep 17 00:00:00 2001 From: Carl Hvarfner Date: Tue, 22 Oct 2024 16:33:18 -0700 Subject: [PATCH] Remove `maximize` from info-theoretic acquisition functions (#2590) Summary: Removes `maximize` from some info-theoretic acquisition functions (those that use `get_optimal_samples`). Differential Revision: D64698976 --- botorch/acquisition/input_constructors.py | 5 ++ botorch/acquisition/joint_entropy_search.py | 48 ++++++++++--------- .../acquisition/input_constructors.py | 8 ++-- botorch_community/acquisition/scorebo.py | 34 ++++++++----- test/acquisition/test_input_constructors.py | 3 ++ test/acquisition/test_joint_entropy_search.py | 19 ++++++-- .../acquisition/test_input_constructors.py | 2 - test_community/acquisition/test_scorebo.py | 26 ++++++---- 8 files changed, 93 insertions(+), 52 deletions(-) diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py index d98fe13642..9fb1558afb 100644 --- a/botorch/acquisition/input_constructors.py +++ b/botorch/acquisition/input_constructors.py @@ -80,6 +80,7 @@ LearnedObjective, MCAcquisitionObjective, PosteriorTransform, + ScalarizedPosteriorTransform, ) from botorch.acquisition.preference import ( AnalyticExpectedUtilityOfBestOption, @@ -1801,6 +1802,7 @@ def construct_inputs_qJES( bounds: list[tuple[float, float]], num_optima: int = 64, condition_noiseless: bool = True, + posterior_transform: ScalarizedPosteriorTransform | None = None, X_pending: Tensor | None = None, estimation_type: str = "LB", num_samples: int = 64, @@ -1810,6 +1812,8 @@ def construct_inputs_qJES( model=model, bounds=torch.as_tensor(bounds, dtype=dtype).T, num_optima=num_optima, + posterior_transform=posterior_transform, + return_transformed=True, ) inputs = { @@ -1817,6 +1821,7 @@ def construct_inputs_qJES( "optimal_inputs": optimal_inputs, "optimal_outputs": optimal_outputs, "condition_noiseless": condition_noiseless, + "posterior_transform": posterior_transform, "X_pending": X_pending, "estimation_type": estimation_type, "num_samples": num_samples, diff --git a/botorch/acquisition/joint_entropy_search.py b/botorch/acquisition/joint_entropy_search.py index afd8165f3b..eed1828a1c 100644 --- a/botorch/acquisition/joint_entropy_search.py +++ b/botorch/acquisition/joint_entropy_search.py @@ -74,7 +74,6 @@ def __init__( posterior_transform: PosteriorTransform | None = None, X_pending: Tensor | None = None, estimation_type: str = "LB", - maximize: bool = True, num_samples: int = 64, ) -> None: r"""Joint entropy search acquisition function. @@ -91,11 +90,11 @@ def __init__( [Tu2022joint]_. These are sampled identically, so this only controls the fashion in which the GP is reshaped as a result of conditioning on the optimum. + posterior_transform: PosteriorTransform to negate or scalarize the output. estimation_type: estimation_type: A string to determine which entropy estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC"). Lower Bound is recommended due to the relatively high variance of the MC estimator. - maximize: If true, we consider a maximization problem. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation, but have not yet been evaluated. num_samples: The number of Monte Carlo samples used for the Monte Carlo @@ -112,16 +111,13 @@ def __init__( # and three-dimensional otherwise. self.optimal_inputs = optimal_inputs.unsqueeze(-2) self.optimal_outputs = optimal_outputs.unsqueeze(-2) + self.optimal_output_values = ( + posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1) + if posterior_transform + else self.optimal_outputs + ) self.posterior_transform = posterior_transform - self.maximize = maximize - - # The optima (can be maxima, can be minima) come in as the largest - # values if we optimize, or the smallest (likely substantially negative) - # if we minimize. Inside the acquisition function, however, we always - # want to consider MAX-values. As such, we need to flip them if - # we want to minimize. - if not self.maximize: - optimal_outputs = -optimal_outputs + self.num_samples = optimal_inputs.shape[0] self.condition_noiseless = condition_noiseless self.initial_model = model @@ -203,7 +199,9 @@ def _compute_lower_bound_information_gain( A `batch_shape`-dim Tensor of acquisition values at the given design points `X`. """ - initial_posterior = self.initial_model.posterior(X, observation_noise=True) + initial_posterior = self.initial_model.posterior( + X, observation_noise=True, posterior_transform=self.posterior_transform + ) # need to check if there is a two-dimensional batch shape - # the sampled optima appear in the dimension right after batch_shape = X.shape[:-2] @@ -221,15 +219,17 @@ def _compute_lower_bound_information_gain( # Compute the mixture mean and variance posterior_m = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, + posterior_transform=self.posterior_transform, ) noiseless_var = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=False + X.unsqueeze(MCMC_DIM), + observation_noise=False, + posterior_transform=self.posterior_transform, ).variance mean_m = posterior_m.mean - if not self.maximize: - mean_m = -mean_m variance_m = posterior_m.variance check_no_nans(variance_m) @@ -240,7 +240,7 @@ def _compute_lower_bound_information_gain( torch.zeros(1, device=X.device, dtype=X.dtype), torch.ones(1, device=X.device, dtype=X.dtype), ) - normalized_mvs = (self.optimal_outputs - mean_m) / stdv + normalized_mvs = (self.optimal_output_values - mean_m) / stdv cdf_mvs = normal.cdf(normalized_mvs).clamp_min(CLAMP_LB) pdf_mvs = torch.exp(normal.log_prob(normalized_mvs)) @@ -294,7 +294,9 @@ def _compute_monte_carlo_information_gain( A `batch_shape`-dim Tensor of acquisition values at the given design points `X`. """ - initial_posterior = self.initial_model.posterior(X, observation_noise=True) + initial_posterior = self.initial_model.posterior( + X, observation_noise=True, posterior_transform=self.posterior_transform + ) batch_shape = X.shape[:-2] sample_dim = len(batch_shape) @@ -311,15 +313,17 @@ def _compute_monte_carlo_information_gain( # Compute the mixture mean and variance posterior_m = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, + posterior_transform=self.posterior_transform, ) noiseless_var = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=False + X.unsqueeze(MCMC_DIM), + observation_noise=False, + posterior_transform=self.posterior_transform, ).variance mean_m = posterior_m.mean - if not self.maximize: - mean_m = -mean_m variance_m = posterior_m.variance.clamp_min(CLAMP_LB) conditional_samples, conditional_logprobs = self._compute_monte_carlo_variables( posterior_m diff --git a/botorch_community/acquisition/input_constructors.py b/botorch_community/acquisition/input_constructors.py index dbb40fc65a..a310483170 100644 --- a/botorch_community/acquisition/input_constructors.py +++ b/botorch_community/acquisition/input_constructors.py @@ -17,6 +17,7 @@ import torch from botorch.acquisition.input_constructors import acqf_input_constructor +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.acquisition.utils import get_optimal_samples from botorch.models.model import Model from botorch_community.acquisition.bayesian_active_learning import ( @@ -62,7 +63,7 @@ def construct_inputs_SCoreBO( model: Model, bounds: List[Tuple[float, float]], num_optima: int = 8, - maximize: bool = True, + posterior_transform: Optional[ScalarizedPosteriorTransform] = None, distance_metric: str = "hellinger", X_pending: Optional[Tensor] = None, ): @@ -72,14 +73,15 @@ def construct_inputs_SCoreBO( model=model, bounds=torch.as_tensor(bounds, dtype=dtype).T, num_optima=num_optima, + posterior_transform=posterior_transform, + return_transformed=True, ) - inputs = { "model": model, "optimal_inputs": optimal_inputs, "optimal_outputs": optimal_outputs, "distance_metric": distance_metric, - "maximize": maximize, + "posterior_transform": posterior_transform, "X_pending": X_pending, } return inputs diff --git a/botorch_community/acquisition/scorebo.py b/botorch_community/acquisition/scorebo.py index a09d4bdef9..b27a0eedc0 100644 --- a/botorch_community/acquisition/scorebo.py +++ b/botorch_community/acquisition/scorebo.py @@ -29,6 +29,7 @@ from botorch.acquisition.bayesian_active_learning import ( FullyBayesianAcquisitionFunction, ) +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.models.fully_bayesian import MCMC_DIM, SaasFullyBayesianSingleTaskGP from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL from botorch.models.utils import fantasize as fantasize_flag @@ -50,7 +51,7 @@ def __init__( optimal_inputs: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, distance_metric: Optional[str] = "hellinger", - maximize: bool = True, + posterior_transform: Optional[ScalarizedPosteriorTransform] = None, ) -> None: r"""Self-correcting Bayesian optimization [hvarfner2023scorebo]_ acquisition function. SCoreBO seeks to find accurate hyperparameters during the course @@ -71,14 +72,15 @@ def __init__( super().__init__(model=model) # To enable fully bayesian GP conditioning, we need to unsqueeze # to get num_optima x num_gps unique GPs - self.maximize = maximize - if not self.maximize: - optimal_outputs = -optimal_outputs - - # inputs come as num_optima_per_model x num_models x d - # but we want it four-dimensional to condition one per model. - + self.optimal_inputs = optimal_inputs.unsqueeze(-2) self.optimal_outputs = optimal_outputs.unsqueeze(-2) + self.optimal_output_values = ( + posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1) + if posterior_transform + else self.optimal_outputs + ) + self.posterior_transform = posterior_transform + # JES-like version of SCoreBO if optimal inputs are provided if optimal_inputs is not None: with warnings.catch_warnings(): @@ -122,13 +124,19 @@ def forward(self, X: Tensor) -> Tensor: # since we have two MC dims (over models and optima), we need to # unsqueeze a second dim to accomodate the posterior pass prev_posterior = self.model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, + posterior_transform=self.posterior_transform, ) noiseless_posterior = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=False + X.unsqueeze(MCMC_DIM), + observation_noise=False, + posterior_transform=self.posterior_transform, ) posterior = self.conditional_model.posterior( - X.unsqueeze(MCMC_DIM), observation_noise=True + X.unsqueeze(MCMC_DIM), + observation_noise=True, + posterior_transform=self.posterior_transform, ) marg_mean = prev_posterior.mean.mean(dim=MCMC_DIM, keepdim=True) @@ -139,7 +147,9 @@ def forward(self, X: Tensor) -> Tensor: # the mixture variance is squeezed, need it unsqueezed marg_covar = prev_posterior.mixture_covariance_matrix.unsqueeze(MCMC_DIM) noiseless_var = noiseless_posterior.variance - normalized_mvs = (self.optimal_outputs - cond_means) / noiseless_var.sqrt() + normalized_mvs = ( + self.optimal_output_values - cond_means + ) / noiseless_var.sqrt() cdf_mvs = self.normal.cdf(normalized_mvs).clamp_min(CLAMP_LB) pdf_mvs = torch.exp(self.normal.log_prob(normalized_mvs)) mean_truncated = cond_means - noiseless_var.sqrt() * pdf_mvs / cdf_mvs diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py index 968da03aab..90dc93bc65 100644 --- a/test/acquisition/test_input_constructors.py +++ b/test/acquisition/test_input_constructors.py @@ -1620,6 +1620,9 @@ def test_construct_inputs_jes(self) -> None: training_data=self.blockX_blockY, bounds=self.bounds, num_optima=17, + posterior_transform=ScalarizedPosteriorTransform( + torch.rand(1, dtype=self.blockX_blockY[0].Y.dtype) + ), ) self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype) diff --git a/test/acquisition/test_joint_entropy_search.py b/test/acquisition/test_joint_entropy_search.py index 6e8f8a0d98..9509769d20 100644 --- a/test/acquisition/test_joint_entropy_search.py +++ b/test/acquisition/test_joint_entropy_search.py @@ -8,6 +8,7 @@ import torch from botorch.acquisition.joint_entropy_search import qJointEntropySearch +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP from botorch.sampling.normal import SobolQMCNormalSampler from botorch.utils.test_helpers import get_model @@ -15,7 +16,7 @@ class TestQJointEntropySearch(BotorchTestCase): - def test_joint_entropy_search(self): + def test_singleobj_joint_entropy_search(self): torch.manual_seed(1) tkwargs = {"device": self.device} estimation_types = ("LB", "MC") @@ -26,7 +27,6 @@ def test_joint_entropy_search(self): estimation_type, use_model_list, standardize_model, - maximize, condition_noiseless, ) in product( (torch.float, torch.double), @@ -34,7 +34,6 @@ def test_joint_entropy_search(self): (False, True), (False, True), (False, True), - (False, True), ): tkwargs["dtype"] = dtype input_dim = 2 @@ -61,7 +60,6 @@ def test_joint_entropy_search(self): num_samples=64, X_pending=X_pending, condition_noiseless=condition_noiseless, - maximize=maximize, ) self.assertIsInstance(acq.sampler, SobolQMCNormalSampler) @@ -77,6 +75,18 @@ def test_joint_entropy_search(self): # assess shape self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2]) + acq = qJointEntropySearch( + model=model, + optimal_inputs=optimal_inputs, + optimal_outputs=optimal_outputs, + posterior_transform=ScalarizedPosteriorTransform( + weights=-torch.ones(1, **tkwargs) + ), + ) + self.assertTrue(torch.all(acq.optimal_output_values == -acq.optimal_outputs)) + acq_X = acq(test_Xs[j]) + self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2]) + with self.assertRaises(ValueError): acq = qJointEntropySearch( model=model, @@ -86,7 +96,6 @@ def test_joint_entropy_search(self): num_samples=64, X_pending=X_pending, condition_noiseless=condition_noiseless, - maximize=maximize, ) acq_X = acq(test_Xs[j]) diff --git a/test_community/acquisition/test_input_constructors.py b/test_community/acquisition/test_input_constructors.py index f77c4588be..30a62703b3 100644 --- a/test_community/acquisition/test_input_constructors.py +++ b/test_community/acquisition/test_input_constructors.py @@ -86,10 +86,8 @@ def test_construct_inputs_scorebo(self) -> None: training_data=self.blockX_blockY, bounds=self.bounds, num_optima=num_optima, - maximize=False, distance_metric="kl_divergence", ) - self.assertFalse(kwargs["maximize"]) self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype) self.assertEqual(len(kwargs["optimal_inputs"]), num_optima) self.assertEqual(len(kwargs["optimal_outputs"]), num_optima) diff --git a/test_community/acquisition/test_scorebo.py b/test_community/acquisition/test_scorebo.py index 2c1afeaf5f..135d34512f 100644 --- a/test_community/acquisition/test_scorebo.py +++ b/test_community/acquisition/test_scorebo.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import torch +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.utils.test_helpers import get_fully_bayesian_model from botorch.utils.testing import BotorchTestCase from botorch_community.acquisition.scorebo import qSelfCorrectingBayesianOptimization @@ -21,13 +22,12 @@ def test_q_self_correcting_bayesian_optimization(self): distance_metric, only_maxval, standardize_model, - maximize, ) in [ - (torch.float, "hellinger", False, True, True), - (torch.double, "hellinger", True, False, False), - (torch.float, "kl_divergence", False, True, True), - (torch.double, "kl_divergence", True, False, False), - (torch.double, "kl_divergence", True, True, False), + (torch.float, "hellinger", True, True), + (torch.double, "hellinger", False, False), + (torch.float, "kl_divergence", True, True), + (torch.double, "kl_divergence", False, False), + (torch.double, "kl_divergence", True, False), ]: tkwargs["dtype"] = dtype input_dim = 2 @@ -66,7 +66,6 @@ def test_q_self_correcting_bayesian_optimization(self): optimal_outputs=optimal_outputs, distance_metric=distance_metric, X_pending=X_pending, - maximize=maximize, ) test_Xs = [ @@ -81,6 +80,18 @@ def test_q_self_correcting_bayesian_optimization(self): # assess shape self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2]) + acq = qSelfCorrectingBayesianOptimization( + model=model, + optimal_inputs=optimal_inputs, + optimal_outputs=optimal_outputs, + posterior_transform=ScalarizedPosteriorTransform( + weights=-torch.ones(1, **tkwargs) + ), + ) + self.assertTrue(torch.all(acq.optimal_output_values == -acq.optimal_outputs)) + acq_X = acq(test_Xs[j]) + self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2]) + with self.assertRaises(ValueError): acq = qSelfCorrectingBayesianOptimization( model=model, @@ -88,5 +99,4 @@ def test_q_self_correcting_bayesian_optimization(self): optimal_outputs=optimal_outputs, distance_metric="NOT_A_DISTANCE", X_pending=X_pending, - maximize=maximize, )