Skip to content

Commit

Permalink
Remove maximize from info-theoretic acquisition functions (pytorch#…
Browse files Browse the repository at this point in the history
…2590)

Summary:
Removes `maximize` from some info-theoretic acquisition functions (those that use `get_optimal_samples`). 



Differential Revision: D64698976
  • Loading branch information
Carl Hvarfner authored and facebook-github-bot committed Oct 22, 2024
1 parent 30e19a8 commit 02f1b1a
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 52 deletions.
5 changes: 5 additions & 0 deletions botorch/acquisition/input_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
LearnedObjective,
MCAcquisitionObjective,
PosteriorTransform,
ScalarizedPosteriorTransform,
)
from botorch.acquisition.preference import (
AnalyticExpectedUtilityOfBestOption,
Expand Down Expand Up @@ -1801,6 +1802,7 @@ def construct_inputs_qJES(
bounds: list[tuple[float, float]],
num_optima: int = 64,
condition_noiseless: bool = True,
posterior_transform: ScalarizedPosteriorTransform | None = None,
X_pending: Tensor | None = None,
estimation_type: str = "LB",
num_samples: int = 64,
Expand All @@ -1810,13 +1812,16 @@ def construct_inputs_qJES(
model=model,
bounds=torch.as_tensor(bounds, dtype=dtype).T,
num_optima=num_optima,
posterior_transform=posterior_transform,
return_transformed=True,
)

inputs = {
"model": model,
"optimal_inputs": optimal_inputs,
"optimal_outputs": optimal_outputs,
"condition_noiseless": condition_noiseless,
"posterior_transform": posterior_transform,
"X_pending": X_pending,
"estimation_type": estimation_type,
"num_samples": num_samples,
Expand Down
48 changes: 26 additions & 22 deletions botorch/acquisition/joint_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def __init__(
posterior_transform: PosteriorTransform | None = None,
X_pending: Tensor | None = None,
estimation_type: str = "LB",
maximize: bool = True,
num_samples: int = 64,
) -> None:
r"""Joint entropy search acquisition function.
Expand All @@ -91,11 +90,11 @@ def __init__(
[Tu2022joint]_. These are sampled identically, so this only controls
the fashion in which the GP is reshaped as a result of conditioning
on the optimum.
posterior_transform: PosteriorTransform to negate or scalarize the output.
estimation_type: estimation_type: A string to determine which entropy
estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC").
Lower Bound is recommended due to the relatively high variance
of the MC estimator.
maximize: If true, we consider a maximization problem.
X_pending: A `m x d`-dim Tensor of `m` design points that have been
submitted for function evaluation, but have not yet been evaluated.
num_samples: The number of Monte Carlo samples used for the Monte Carlo
Expand All @@ -112,16 +111,13 @@ def __init__(
# and three-dimensional otherwise.
self.optimal_inputs = optimal_inputs.unsqueeze(-2)
self.optimal_outputs = optimal_outputs.unsqueeze(-2)
self.optimal_output_values = (
posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
if posterior_transform
else self.optimal_outputs
)
self.posterior_transform = posterior_transform
self.maximize = maximize

# The optima (can be maxima, can be minima) come in as the largest
# values if we optimize, or the smallest (likely substantially negative)
# if we minimize. Inside the acquisition function, however, we always
# want to consider MAX-values. As such, we need to flip them if
# we want to minimize.
if not self.maximize:
optimal_outputs = -optimal_outputs

self.num_samples = optimal_inputs.shape[0]
self.condition_noiseless = condition_noiseless
self.initial_model = model
Expand Down Expand Up @@ -203,7 +199,9 @@ def _compute_lower_bound_information_gain(
A `batch_shape`-dim Tensor of acquisition values at the given design
points `X`.
"""
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
initial_posterior = self.initial_model.posterior(
X, observation_noise=True, posterior_transform=self.posterior_transform
)
# need to check if there is a two-dimensional batch shape -
# the sampled optima appear in the dimension right after
batch_shape = X.shape[:-2]
Expand All @@ -221,15 +219,17 @@ def _compute_lower_bound_information_gain(

# Compute the mixture mean and variance
posterior_m = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)
noiseless_var = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=False
X.unsqueeze(MCMC_DIM),
observation_noise=False,
posterior_transform=self.posterior_transform,
).variance

mean_m = posterior_m.mean
if not self.maximize:
mean_m = -mean_m
variance_m = posterior_m.variance

check_no_nans(variance_m)
Expand All @@ -240,7 +240,7 @@ def _compute_lower_bound_information_gain(
torch.zeros(1, device=X.device, dtype=X.dtype),
torch.ones(1, device=X.device, dtype=X.dtype),
)
normalized_mvs = (self.optimal_outputs - mean_m) / stdv
normalized_mvs = (self.optimal_output_values - mean_m) / stdv
cdf_mvs = normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
pdf_mvs = torch.exp(normal.log_prob(normalized_mvs))

Expand Down Expand Up @@ -294,7 +294,9 @@ def _compute_monte_carlo_information_gain(
A `batch_shape`-dim Tensor of acquisition values at the given design
points `X`.
"""
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
initial_posterior = self.initial_model.posterior(
X, observation_noise=True, posterior_transform=self.posterior_transform
)

batch_shape = X.shape[:-2]
sample_dim = len(batch_shape)
Expand All @@ -311,15 +313,17 @@ def _compute_monte_carlo_information_gain(

# Compute the mixture mean and variance
posterior_m = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)
noiseless_var = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=False
X.unsqueeze(MCMC_DIM),
observation_noise=False,
posterior_transform=self.posterior_transform,
).variance

mean_m = posterior_m.mean
if not self.maximize:
mean_m = -mean_m
variance_m = posterior_m.variance.clamp_min(CLAMP_LB)
conditional_samples, conditional_logprobs = self._compute_monte_carlo_variables(
posterior_m
Expand Down
8 changes: 5 additions & 3 deletions botorch_community/acquisition/input_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import torch
from botorch.acquisition.input_constructors import acqf_input_constructor
from botorch.acquisition.objective import ScalarizedPosteriorTransform
from botorch.acquisition.utils import get_optimal_samples
from botorch.models.model import Model
from botorch_community.acquisition.bayesian_active_learning import (
Expand Down Expand Up @@ -62,7 +63,7 @@ def construct_inputs_SCoreBO(
model: Model,
bounds: List[Tuple[float, float]],
num_optima: int = 8,
maximize: bool = True,
posterior_transform: Optional[ScalarizedPosteriorTransform] = None,
distance_metric: str = "hellinger",
X_pending: Optional[Tensor] = None,
):
Expand All @@ -72,14 +73,15 @@ def construct_inputs_SCoreBO(
model=model,
bounds=torch.as_tensor(bounds, dtype=dtype).T,
num_optima=num_optima,
posterior_transform=posterior_transform,
return_transformed=True,
)

inputs = {
"model": model,
"optimal_inputs": optimal_inputs,
"optimal_outputs": optimal_outputs,
"distance_metric": distance_metric,
"maximize": maximize,
"posterior_transform": posterior_transform,
"X_pending": X_pending,
}
return inputs
34 changes: 22 additions & 12 deletions botorch_community/acquisition/scorebo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from botorch.acquisition.bayesian_active_learning import (
FullyBayesianAcquisitionFunction,
)
from botorch.acquisition.objective import ScalarizedPosteriorTransform
from botorch.models.fully_bayesian import MCMC_DIM, SaasFullyBayesianSingleTaskGP
from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL
from botorch.models.utils import fantasize as fantasize_flag
Expand All @@ -50,7 +51,7 @@ def __init__(
optimal_inputs: Optional[Tensor] = None,
X_pending: Optional[Tensor] = None,
distance_metric: Optional[str] = "hellinger",
maximize: bool = True,
posterior_transform: Optional[ScalarizedPosteriorTransform] = None,
) -> None:
r"""Self-correcting Bayesian optimization [hvarfner2023scorebo]_ acquisition
function. SCoreBO seeks to find accurate hyperparameters during the course
Expand All @@ -71,14 +72,15 @@ def __init__(
super().__init__(model=model)
# To enable fully bayesian GP conditioning, we need to unsqueeze
# to get num_optima x num_gps unique GPs
self.maximize = maximize
if not self.maximize:
optimal_outputs = -optimal_outputs

# inputs come as num_optima_per_model x num_models x d
# but we want it four-dimensional to condition one per model.

self.optimal_inputs = optimal_inputs.unsqueeze(-2)
self.optimal_outputs = optimal_outputs.unsqueeze(-2)
self.optimal_output_values = (
posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
if posterior_transform
else self.optimal_outputs
)
self.posterior_transform = posterior_transform

# JES-like version of SCoreBO if optimal inputs are provided
if optimal_inputs is not None:
with warnings.catch_warnings():
Expand Down Expand Up @@ -122,13 +124,19 @@ def forward(self, X: Tensor) -> Tensor:
# since we have two MC dims (over models and optima), we need to
# unsqueeze a second dim to accomodate the posterior pass
prev_posterior = self.model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)
noiseless_posterior = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=False
X.unsqueeze(MCMC_DIM),
observation_noise=False,
posterior_transform=self.posterior_transform,
)
posterior = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)

marg_mean = prev_posterior.mean.mean(dim=MCMC_DIM, keepdim=True)
Expand All @@ -139,7 +147,9 @@ def forward(self, X: Tensor) -> Tensor:
# the mixture variance is squeezed, need it unsqueezed
marg_covar = prev_posterior.mixture_covariance_matrix.unsqueeze(MCMC_DIM)
noiseless_var = noiseless_posterior.variance
normalized_mvs = (self.optimal_outputs - cond_means) / noiseless_var.sqrt()
normalized_mvs = (
self.optimal_output_values - cond_means
) / noiseless_var.sqrt()
cdf_mvs = self.normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
pdf_mvs = torch.exp(self.normal.log_prob(normalized_mvs))
mean_truncated = cond_means - noiseless_var.sqrt() * pdf_mvs / cdf_mvs
Expand Down
3 changes: 3 additions & 0 deletions test/acquisition/test_input_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,9 @@ def test_construct_inputs_jes(self) -> None:
training_data=self.blockX_blockY,
bounds=self.bounds,
num_optima=17,
posterior_transform=ScalarizedPosteriorTransform(
torch.rand(1, dtype=self.blockX_blockY[0].Y.dtype)
),
)

self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)
Expand Down
19 changes: 14 additions & 5 deletions test/acquisition/test_joint_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

import torch
from botorch.acquisition.joint_entropy_search import qJointEntropySearch
from botorch.acquisition.objective import ScalarizedPosteriorTransform
from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.utils.test_helpers import get_model
from botorch.utils.testing import BotorchTestCase


class TestQJointEntropySearch(BotorchTestCase):
def test_joint_entropy_search(self):
def test_singleobj_joint_entropy_search(self):
torch.manual_seed(1)
tkwargs = {"device": self.device}
estimation_types = ("LB", "MC")
Expand All @@ -26,15 +27,13 @@ def test_joint_entropy_search(self):
estimation_type,
use_model_list,
standardize_model,
maximize,
condition_noiseless,
) in product(
(torch.float, torch.double),
estimation_types,
(False, True),
(False, True),
(False, True),
(False, True),
):
tkwargs["dtype"] = dtype
input_dim = 2
Expand All @@ -61,7 +60,6 @@ def test_joint_entropy_search(self):
num_samples=64,
X_pending=X_pending,
condition_noiseless=condition_noiseless,
maximize=maximize,
)
self.assertIsInstance(acq.sampler, SobolQMCNormalSampler)

Expand All @@ -77,6 +75,18 @@ def test_joint_entropy_search(self):
# assess shape
self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2])

acq = qJointEntropySearch(
model=model,
optimal_inputs=optimal_inputs,
optimal_outputs=optimal_outputs,
posterior_transform=ScalarizedPosteriorTransform(
weights=-torch.ones(1, **tkwargs)
),
)
self.assertTrue(torch.all(acq.optimal_output_values == -acq.optimal_outputs))
acq_X = acq(test_Xs[j])
self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2])

with self.assertRaises(ValueError):
acq = qJointEntropySearch(
model=model,
Expand All @@ -86,7 +96,6 @@ def test_joint_entropy_search(self):
num_samples=64,
X_pending=X_pending,
condition_noiseless=condition_noiseless,
maximize=maximize,
)
acq_X = acq(test_Xs[j])

Expand Down
2 changes: 0 additions & 2 deletions test_community/acquisition/test_input_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,8 @@ def test_construct_inputs_scorebo(self) -> None:
training_data=self.blockX_blockY,
bounds=self.bounds,
num_optima=num_optima,
maximize=False,
distance_metric="kl_divergence",
)
self.assertFalse(kwargs["maximize"])
self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)
self.assertEqual(len(kwargs["optimal_inputs"]), num_optima)
self.assertEqual(len(kwargs["optimal_outputs"]), num_optima)
Expand Down
Loading

0 comments on commit 02f1b1a

Please sign in to comment.