diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py index deb8433321..d98fe13642 100644 --- a/botorch/acquisition/input_constructors.py +++ b/botorch/acquisition/input_constructors.py @@ -1800,7 +1800,6 @@ def construct_inputs_qJES( model: Model, bounds: list[tuple[float, float]], num_optima: int = 64, - maximize: bool = True, condition_noiseless: bool = True, X_pending: Tensor | None = None, estimation_type: str = "LB", @@ -1811,7 +1810,6 @@ def construct_inputs_qJES( model=model, bounds=torch.as_tensor(bounds, dtype=dtype).T, num_optima=num_optima, - maximize=maximize, ) inputs = { @@ -1819,7 +1817,6 @@ def construct_inputs_qJES( "optimal_inputs": optimal_inputs, "optimal_outputs": optimal_outputs, "condition_noiseless": condition_noiseless, - "maximize": maximize, "X_pending": X_pending, "estimation_type": estimation_type, "num_samples": num_samples, diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py index e53bc0a7d0..d486629b76 100644 --- a/botorch/acquisition/utils.py +++ b/botorch/acquisition/utils.py @@ -18,6 +18,7 @@ IdentityMCObjective, MCAcquisitionObjective, PosteriorTransform, + ScalarizedPosteriorTransform, ) from botorch.exceptions.errors import ( BotorchTensorDimensionError, @@ -28,10 +29,11 @@ from botorch.models.model import Model from botorch.sampling.base import MCSampler from botorch.sampling.get_sampler import get_sampler -from botorch.sampling.pathwise import draw_matheron_paths +from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model from botorch.utils.objective import compute_feasibility_indicator from botorch.utils.sampling import optimize_posterior_samples from botorch.utils.transforms import is_ensemble, normalize_indices +from gpytorch.models import GP from torch import Tensor @@ -486,36 +488,62 @@ def project_to_sample_points(X: Tensor, sample_points: Tensor) -> Tensor: def get_optimal_samples( - model: Model, + model: GP, bounds: Tensor, num_optima: int, raw_samples: int = 1024, num_restarts: int = 20, - maximize: bool = True, + posterior_transform: ScalarizedPosteriorTransform | None = None, + objective: MCAcquisitionObjective | None = None, + return_transformed: bool = False, ) -> tuple[Tensor, Tensor]: """Draws sample paths from the posterior and maximizes the samples using GD. Args: - model (Model): The model from which samples are drawn. - bounds: (Tensor): Bounds of the search space. If the model inputs are + model: The model from which samples are drawn. + bounds: Bounds of the search space. If the model inputs are normalized, the bounds should be normalized as well. - num_optima (int): The number of paths to be drawn and optimized. - raw_samples (int, optional): The number of candidates randomly sample. + num_optima: The number of paths to be drawn and optimized. + raw_samples: The number of candidates randomly sample. Defaults to 1024. - num_restarts (int, optional): The number of candidates to do gradient-based + num_restarts: The number of candidates to do gradient-based optimization on. Defaults to 20. - maximize: Whether to maximize or minimize the samples. + posterior_transform: A ScalarizedPosteriorTransform (may e.g. be used to + scalarize multi-output models or negate the objective). + objective: An MCAcquisitionObjective, used to negate the objective or otherwise + transform sample outputs. Cannot be combined with `posterior_transform`. + return_transformed: If True, return the transformed samples. + Returns: - Tuple[Tensor, Tensor]: The optimal input locations and corresponding - outputs, x* and f*. + The optimal input locations and corresponding outputs, x* and f*. """ - paths = draw_matheron_paths(model, sample_shape=torch.Size([num_optima])) + if posterior_transform and not isinstance( + posterior_transform, ScalarizedPosteriorTransform + ): + raise ValueError( + "Only the ScalarizedPosteriorTransform is supported for " + "get_optimal_samples." + ) + if posterior_transform and objective: + raise ValueError( + "Only one of `posterior_transform` and `objective` can be specified." + ) + + if posterior_transform: + sample_transform = posterior_transform.evaluate + elif objective: + sample_transform = objective + else: + sample_transform = None + + paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima])) optimal_inputs, optimal_outputs = optimize_posterior_samples( - paths, + paths=paths, bounds=bounds, raw_samples=raw_samples, num_restarts=num_restarts, - maximize=maximize, + sample_transform=sample_transform, + return_transformed=return_transformed, ) return optimal_inputs, optimal_outputs diff --git a/botorch/utils/sampling.py b/botorch/utils/sampling.py index da4df94849..52fe54fbb2 100644 --- a/botorch/utils/sampling.py +++ b/botorch/utils/sampling.py @@ -19,9 +19,9 @@ import warnings from abc import ABC, abstractmethod -from collections.abc import Generator, Iterable +from collections.abc import Callable, Generator, Iterable from contextlib import contextmanager -from typing import Any, TYPE_CHECKING +from typing import TYPE_CHECKING import numpy as np import numpy.typing as npt @@ -37,7 +37,9 @@ if TYPE_CHECKING: - from botorch.sampling.pathwise.paths import SamplePath # pragma: no cover + from botorch.models.deterministic import ( # pragma: no cover + GenericDeterministicModel, + ) @contextmanager @@ -989,45 +991,45 @@ def sparse_to_dense_constraints( def optimize_posterior_samples( - paths: SamplePath, + paths: GenericDeterministicModel, bounds: Tensor, - candidates: Tensor | None = None, - raw_samples: int | None = 1024, + raw_samples: int = 1024, num_restarts: int = 20, - maximize: bool = True, - **kwargs: Any, + sample_transform: Callable[[Tensor], Tensor] | None = None, + return_transformed: bool = False, ) -> tuple[Tensor, Tensor]: - r"""Cheaply maximizes posterior samples by random querying followed by vanilla - gradient descent on the best num_restarts points. + r"""Cheaply maximizes posterior samples by random querying followed by + gradient-based optimization using SciPy's L-BFGS-B routine. Args: paths: Random Fourier Feature-based sample paths from the GP bounds: The bounds on the search space. - candidates: A priori good candidates (typically previous design points) - which acts as extra initial guesses for the optimization routine. raw_samples: The number of samples with which to query the samples initially. num_restarts: The number of points selected for gradient-based optimization. - maximize: Boolean indicating whether to maimize or minimize + sample_transform: A callable transform of the sample outputs (e.g. + MCAcquisitionObjective or ScalarizedPosteriorTransform.evaluate) used to + negate the objective or otherwise transform the output. + return_transformed: A boolean indicating whether to return the transformed + or non-transformed samples. Returns: A two-element tuple containing: - X_opt: A `num_optima x [batch_size] x d`-dim tensor of optimal inputs x*. - - f_opt: A `num_optima x [batch_size] x 1`-dim tensor of optimal outputs f*. + - f_opt: A `num_optima x [batch_size] x m`-dim, optionally + `num_optima x [batch_size] x 1`-dim, tensor of optimal outputs f*. """ - if maximize: - - def path_func(x): - return paths(x) - else: + def path_func(x) -> Tensor: + res = paths(x) + if sample_transform: + res = sample_transform(res) - def path_func(x): - return -paths(x) + return res.squeeze(-1) candidate_set = unnormalize( - SobolEngine(dimension=bounds.shape[1], scramble=True).draw(raw_samples), bounds + SobolEngine(dimension=bounds.shape[1], scramble=True).draw(n=raw_samples), + bounds=bounds, ) - # queries all samples on all candidates - output shape # raw_samples * num_optima * num_models candidate_queries = path_func(candidate_set) @@ -1035,10 +1037,13 @@ def path_func(x): X_top_k = candidate_set[argtop_k, :] # to avoid circular import, the import occurs here - from botorch.generation.gen import gen_candidates_torch + from botorch.generation.gen import gen_candidates_scipy - X_top_k, f_top_k = gen_candidates_torch( - X_top_k, path_func, lower_bounds=bounds[0], upper_bounds=bounds[1], **kwargs + X_top_k, f_top_k = gen_candidates_scipy( + X_top_k, + path_func, + lower_bounds=bounds[0], + upper_bounds=bounds[1], ) f_opt, arg_opt = f_top_k.max(dim=-1, keepdim=True) @@ -1046,11 +1051,16 @@ def path_func(x): # retrieves the argmax. We flatten, pick out the indices and then reshape to # the original batch shapes (so instead of pickig out the argmax of a # (3, 7, num_restarts, D)) along the num_restarts dim, we pick it out of a - # (21 , num_restarts, D) + # (21, num_restarts, D) final_shape = candidate_queries.shape[:-1] X_opt = X_top_k.reshape(final_shape.numel(), num_restarts, -1)[ torch.arange(final_shape.numel()), arg_opt.flatten() ].reshape(*final_shape, -1) - if not maximize: - f_opt = -f_opt + + # if we return transformed, we do not need to pass the samples through paths + # paths a second time but rather just return the transformed optimal values + if return_transformed: + return X_opt, f_opt + + f_opt = paths(X_opt.unsqueeze(-2)).squeeze(-2) return X_opt, f_opt diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py index 208637083b..968da03aab 100644 --- a/test/acquisition/test_input_constructors.py +++ b/test/acquisition/test_input_constructors.py @@ -1620,10 +1620,8 @@ def test_construct_inputs_jes(self) -> None: training_data=self.blockX_blockY, bounds=self.bounds, num_optima=17, - maximize=False, ) - self.assertFalse(kwargs["maximize"]) self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype) self.assertEqual(len(kwargs["optimal_inputs"]), 17) self.assertEqual(len(kwargs["optimal_outputs"]), 17) diff --git a/test/acquisition/test_utils.py b/test/acquisition/test_utils.py index 61845a387a..7e4718150e 100644 --- a/test/acquisition/test_utils.py +++ b/test/acquisition/test_utils.py @@ -10,7 +10,13 @@ import torch -from botorch.acquisition.objective import GenericMCObjective, LearnedObjective +from botorch.acquisition.objective import ( + ExpectationPosteriorTransform, + GenericMCObjective, + LearnedObjective, + LinearMCObjective, + ScalarizedPosteriorTransform, +) from botorch.acquisition.utils import ( compute_best_feasible_objective, expand_trace_observations, @@ -412,32 +418,108 @@ class TestGetOptimalSamples(BotorchTestCase): def test_get_optimal_samples(self): dims = 3 dtype = torch.float64 - for_testing_speed_kwargs = {"raw_samples": 50, "num_restarts": 3} + for_testing_speed_kwargs = {"raw_samples": 20, "num_restarts": 2} num_optima = 7 batch_shape = (3,) bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T X = torch.rand(*batch_shape, 4, dims, dtype=dtype) - Y = torch.sin(X).sum(dim=-1, keepdim=True).to(dtype) - model = SingleTaskGP(X, Y) - X_opt, f_opt = get_optimal_samples( - model, bounds, num_optima=num_optima, **for_testing_speed_kwargs + Y = torch.sin(2 * 3.1415 * X).sum(dim=-1, keepdim=True).to(dtype) + model = SingleTaskGP(train_X=X, train_Y=Y) + posterior_transform = ScalarizedPosteriorTransform( + weights=torch.ones(1, dtype=dtype) ) - X_opt, f_opt_min = get_optimal_samples( - model, - bounds, - num_optima=num_optima, - maximize=False, - **for_testing_speed_kwargs, + posterior_transform_neg = ScalarizedPosteriorTransform( + weights=-torch.ones(1, dtype=dtype) ) - + with torch.random.fork_rng(): + torch.manual_seed(0) + X_opt_def, f_opt_def = get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + **for_testing_speed_kwargs, + ) correct_X_shape = (num_optima,) + batch_shape + (dims,) correct_f_shape = (num_optima,) + batch_shape + (1,) - self.assertEqual(X_opt.shape, correct_X_shape) - self.assertEqual(f_opt.shape, correct_f_shape) - # asserting that the solutions found by minimization the samples are smaller - # than those found by maximization - self.assertTrue(torch.all(f_opt_min < f_opt)) + self.assertEqual(X_opt_def.shape, correct_X_shape) + self.assertEqual(f_opt_def.shape, correct_f_shape) + with torch.random.fork_rng(): + torch.manual_seed(0) + X_opt_ps, f_opt_ps = get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + posterior_transform=posterior_transform, + **for_testing_speed_kwargs, + ) + self.assertAllClose(X_opt_def, X_opt_ps) + + with torch.random.fork_rng(): + torch.manual_seed(0) + X_opt_ps_neg, f_opt_ps_neg = get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + posterior_transform=posterior_transform_neg, + **for_testing_speed_kwargs, + ) + # maxima larger than minima when the seed is fixed + self.assertTrue(torch.all(f_opt_ps_neg < f_opt_ps)) + + obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype)) + with torch.random.fork_rng(): + torch.manual_seed(0) + X_opt_obj_neg, f_opt_obj_neg = get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + objective=obj, + **for_testing_speed_kwargs, + ) + # check that the minimum is the same for negative objective and + # negative posterior transform + self.assertAllClose(X_opt_ps_neg, X_opt_obj_neg) + + obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype)) + with torch.random.fork_rng(): + torch.manual_seed(0) + _, f_opt_obj_pos = get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + objective=obj, + return_transformed=True, + **for_testing_speed_kwargs, + ) + # check that the transformed return value is the negation of the + # non-transformed return value + self.assertAllClose(f_opt_obj_pos, -f_opt_obj_neg) + + with self.assertRaisesRegex( + ValueError, + "Only the ScalarizedPosteriorTransform is supported for " + "get_optimal_samples.", + ): + get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + posterior_transform=ExpectationPosteriorTransform(n_w=5), + **for_testing_speed_kwargs, + ) + with self.assertRaisesRegex( + ValueError, + "Only one of `posterior_transform` and `objective` can be specified.", + ): + get_optimal_samples( + model=model, + bounds=bounds, + num_optima=num_optima, + posterior_transform=posterior_transform, + objective=obj, + **for_testing_speed_kwargs, + ) class TestPreferenceUtils(BotorchTestCase): diff --git a/test/utils/test_sampling.py b/test/utils/test_sampling.py index 78418d2879..84e4f54a28 100644 --- a/test/utils/test_sampling.py +++ b/test/utils/test_sampling.py @@ -14,10 +14,14 @@ import numpy as np import torch +from botorch.acquisition.objective import ( + LinearMCObjective, + ScalarizedPosteriorTransform, +) from botorch.exceptions.errors import BotorchError from botorch.exceptions.warnings import UserInputWarning from botorch.models.gp_regression import SingleTaskGP -from botorch.sampling.pathwise import draw_matheron_paths +from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model from botorch.utils.sampling import ( _convert_bounds_to_inequality_constraints, batched_multinomial, @@ -552,23 +556,35 @@ def test_optimize_posterior_samples(self): torch.manual_seed(1) dims = 2 dtype = torch.float64 - eps = 1e-6 - for_testing_speed_kwargs = {"raw_samples": 512, "num_restarts": 10} + eps = 1e-4 + for_testing_speed_kwargs = {"raw_samples": 128, "num_restarts": 4} nums_optima = (1, 7) - batch_shapes = ((), (3,), (5, 2)) - for num_optima, batch_shape in itertools.product(nums_optima, batch_shapes): + batch_shapes = ((), (2,), (3, 2)) + posterior_transforms = ( + None, + ScalarizedPosteriorTransform(weights=-torch.ones(1, dtype=dtype)), + ) + for num_optima, batch_shape, posterior_transform in itertools.product( + nums_optima, batch_shapes, posterior_transforms + ): bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T - X = torch.rand(*batch_shape, 13, dims, dtype=dtype) + X = torch.rand(*batch_shape, 4, dims, dtype=dtype) Y = torch.pow(X - 0.5, 2).sum(dim=-1, keepdim=True) # having a noiseless model all but guarantees that the found optima # will be better than the observations model = SingleTaskGP(X, Y, torch.full_like(Y, eps)) - paths = draw_matheron_paths( + model.covar_module.lengthscale = 0.5 + paths = get_matheron_path_model( model=model, sample_shape=torch.Size([num_optima]) ) X_opt, f_opt = optimize_posterior_samples( - paths, bounds, **for_testing_speed_kwargs + paths=paths, + bounds=bounds, + sample_transform=( + posterior_transform.evaluate if posterior_transform else None + ), + **for_testing_speed_kwargs, ) correct_X_shape = (num_optima,) + batch_shape + (dims,) @@ -581,4 +597,72 @@ def test_optimize_posterior_samples(self): # Check that the all found optima are larger than the observations # This is not 100% deterministic, but just about. - self.assertTrue(torch.all(f_opt > Y.max(dim=-2).values)) + Y_queries = paths(X) + # this is when we negate, so the values should be smaller + if posterior_transform: + self.assertTrue(torch.all(f_opt < Y_queries.min(dim=-2).values)) + + # otherwise, larger + else: + self.assertTrue(torch.all(f_opt > Y_queries.max(dim=-2).values)) + + obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype)) + X_opt, f_opt = optimize_posterior_samples( + paths=paths, + bounds=bounds, + sample_transform=obj, + **for_testing_speed_kwargs, + ) + self.assertTrue(torch.all(f_opt < Y_queries.max(dim=-2).values)) + + def test_optimize_posterior_samples_multi_objective(self): + # Fix the random seed to prevent flaky failures. + torch.manual_seed(1) + dims = 2 + dtype = torch.float64 + eps = 1e-4 + for_testing_speed_kwargs = {"raw_samples": 128, "num_restarts": 4} + num_optima = 5 + batch_shape = (3,) + + # test that multi-output models are supported if there is an appropriate + # scalarization + bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T + X = torch.rand(*batch_shape, 4, dims, dtype=dtype) + Y1 = torch.pow(X - 0.5, 2).sum(dim=-1, keepdim=True) + Y2 = torch.cos(X * 3).sum(dim=-1, keepdim=True) + Y = torch.cat([Y1, Y2], dim=-1) + # having a noiseless model all but guarantees that the found optima + # will be better than the observations + model = SingleTaskGP(X, Y, torch.full_like(Y, eps)) + model.covar_module.lengthscale = 0.5 + posterior_transform = ScalarizedPosteriorTransform( + weights=torch.ones(2, dtype=dtype) + ) + paths = get_matheron_path_model( + model=model, + sample_shape=torch.Size([num_optima]), + ) + X_opt, f_opt = optimize_posterior_samples( + paths=paths, + bounds=bounds, + sample_transform=posterior_transform.evaluate, + **for_testing_speed_kwargs, + ) + + correct_X_shape = (num_optima,) + batch_shape + (dims,) + correct_f_shape = (num_optima,) + batch_shape + (2,) + self.assertEqual(X_opt.shape, correct_X_shape) + self.assertEqual(f_opt.shape, correct_f_shape) + self.assertTrue(torch.all(X_opt >= bounds[0])) + self.assertTrue(torch.all(X_opt <= bounds[1])) + + X_opt, f_opt = optimize_posterior_samples( + paths=paths, + bounds=bounds, + sample_transform=posterior_transform.evaluate, + return_transformed=True, + **for_testing_speed_kwargs, + ) + correct_f_shape = (num_optima,) + batch_shape + (1,) + self.assertEqual(f_opt.shape, correct_f_shape)