diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py index 7ff09b30c5..5bd208cd81 100644 --- a/botorch/acquisition/__init__.py +++ b/botorch/acquisition/__init__.py @@ -16,6 +16,8 @@ AnalyticAcquisitionFunction, ConstrainedExpectedImprovement, ExpectedImprovement, + LogExpectedImprovement, + LogNoisyExpectedImprovement, NoisyExpectedImprovement, PosteriorMean, ProbabilityOfImprovement, @@ -32,6 +34,10 @@ qKnowledgeGradient, qMultiFidelityKnowledgeGradient, ) +from botorch.acquisition.logei import ( + LogImprovementMCAcquisitionFunction, + qLogExpectedImprovement, +) from botorch.acquisition.max_value_entropy_search import ( MaxValueBase, qLowerBoundMaxValueEntropy, @@ -46,6 +52,7 @@ qProbabilityOfImprovement, qSimpleRegret, qUpperConfidenceBound, + SampleReducingMCAcquisitionFunction, ) from botorch.acquisition.multi_step_lookahead import qMultiStepLookahead from botorch.acquisition.objective import ( @@ -71,6 +78,8 @@ "AnalyticExpectedUtilityOfBestOption", "ConstrainedExpectedImprovement", "ExpectedImprovement", + "LogExpectedImprovement", + "LogNoisyExpectedImprovement", "FixedFeatureAcquisitionFunction", "GenericCostAwareUtility", "InverseCostWeightedUtility", @@ -85,6 +94,8 @@ "UpperConfidenceBound", "qAnalyticProbabilityOfImprovement", "qExpectedImprovement", + "LogImprovementMCAcquisitionFunction", + "qLogExpectedImprovement", "qKnowledgeGradient", "MaxValueBase", "qMultiFidelityKnowledgeGradient", @@ -104,6 +115,7 @@ "LearnedObjective", "LinearMCObjective", "MCAcquisitionFunction", + "SampleReducingMCAcquisitionFunction", "MCAcquisitionObjective", "ScalarizedPosteriorTransform", "get_acquisition_function", diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py index 49deb81512..cff41d46e1 100644 --- a/botorch/acquisition/input_constructors.py +++ b/botorch/acquisition/input_constructors.py @@ -47,6 +47,7 @@ qKnowledgeGradient, qMultiFidelityKnowledgeGradient, ) +from botorch.acquisition.logei import qLogExpectedImprovement from botorch.acquisition.max_value_entropy_search import ( qMaxValueEntropy, qMultiFidelityMaxValueEntropy, @@ -449,7 +450,7 @@ def construct_inputs_qSimpleRegret( ) -@acqf_input_constructor(qExpectedImprovement) +@acqf_input_constructor(qExpectedImprovement, qLogExpectedImprovement) def construct_inputs_qEI( model: Model, training_data: MaybeDict[SupervisedDataset], diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py new file mode 100644 index 0000000000..d9cf0b7368 --- /dev/null +++ b/botorch/acquisition/logei.py @@ -0,0 +1,261 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +r""" +Batch implementations of the LogEI family of improvements-based acquisition functions. +""" + + +from __future__ import annotations + +from functools import partial + +from typing import Callable, List, Optional, TypeVar, Union + +import torch +from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction +from botorch.acquisition.objective import ( + ConstrainedMCObjective, + MCAcquisitionObjective, + PosteriorTransform, +) +from botorch.exceptions.errors import BotorchError +from botorch.models.model import Model +from botorch.sampling.base import MCSampler +from botorch.utils.safe_math import ( + fatmax, + log_fatplus, + log_softplus, + logmeanexp, + smooth_amax, +) +from torch import Tensor + +""" +NOTE: On the default temperature parameters: + +tau_relu: It is generally important to set `tau_relu` to be very small, in particular, +smaller than the expected improvement value. Otherwise, the optimization can stagnate. +By setting `tau_relu=1e-6` by default, stagnation is exceedingly unlikely to occur due +to the smooth ReLU approximation for practical applications of BO. +IDEA: We could consider shrinking `tau_relu` with the progression of the optimization. + +tau_max: This is only relevant for the batch (`q > 1`) case, and `tau_max=1e-2` is +sufficient to get a good approximation to the maximum improvement in the batch of +candidates. If `fatten=False`, the smooth approximation to the maximum can saturate +numerically. It is therefore recommended to use `fatten=True` when optimizing batches +of `q > 1` points. +""" +TAU_RELU = 1e-6 +TAU_MAX = 1e-2 +FloatOrTensor = TypeVar("FloatOrTensor", float, Tensor) + + +class LogImprovementMCAcquisitionFunction(SampleReducingMCAcquisitionFunction): + r""" + Abstract base class for Monte-Carlo-based batch LogEI acquisition functions. + + :meta private: + """ + + _log: bool = True + + def __init__( + self, + model: Model, + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fatten: bool = True, + tau_max: float = TAU_MAX, + ) -> None: + r""" + Args: + model: A fitted model. + sampler: The sampler used to draw base samples. If not given, + a sampler is generated using `get_sampler`. + NOTE: For posteriors that do not support base samples, + a sampler compatible with intended use case must be provided. + See `ForkedRNGSampler` and `StochasticSampler` as examples. + objective: The MCAcquisitionObjective under which the samples are + evaluated. Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points + that have points that have been submitted for function evaluation + but have not yet been evaluated. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are satisfied if `constraint(samples) < 0`. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. See the docs of + `compute_(log_)constraint_indicator` for more details on this parameter. + fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the + approximation to the `max` operator over the `q` candidate points. + """ + if isinstance(objective, ConstrainedMCObjective): + raise BotorchError( + "Log-Improvement should not be used with `ConstrainedMCObjective`." + "Please pass the `constraints` directly to the constructor of the " + "acquisition function." + ) + q_reduction = partial(fatmax if fatten else smooth_amax, tau=tau_max) + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + sample_reduction=logmeanexp, + q_reduction=q_reduction, + constraints=constraints, + eta=eta, + fatten=fatten, + ) + self.tau_max = tau_max + + +class qLogExpectedImprovement(LogImprovementMCAcquisitionFunction): + r"""MC-based batch Log Expected Improvement. + + This computes qLogEI by + (1) sampling the joint posterior over q points, + (2) evaluating the smoothed log improvement over the current best for each sample, + (3) smoothly maximizing over q, and + (4) averaging over the samples in log space. + + `qLogEI(X) ~ log(qEI(X)) = log(E(max(max Y - best_f, 0)))`, + + where `Y ~ f(X)`, and `X = (x_1,...,x_q)`. + + Example: + >>> model = SingleTaskGP(train_X, train_Y) + >>> best_f = train_Y.max()[0] + >>> sampler = SobolQMCNormalSampler(1024) + >>> qLogEI = qLogExpectedImprovement(model, best_f, sampler) + >>> qei = qLogEI(test_X) + """ + + def __init__( + self, + model: Model, + best_f: Union[float, Tensor], + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fatten: bool = True, + tau_max: float = TAU_MAX, + tau_relu: float = TAU_RELU, + ) -> None: + r"""q-Log Expected Improvement. + + Args: + model: A fitted model. + best_f: The best objective value observed so far (assumed noiseless). Can be + a `batch_shape`-shaped tensor, which in case of a batched model + specifies potentially different values for each element of the batch. + sampler: The sampler used to draw base samples. See `MCAcquisitionFunction` + more details. + objective: The MCAcquisitionObjective under which the samples are evaluated. + Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `m x d`-dim Tensor of `m` design points that have been + submitted for function evaluation but have not yet been evaluated. + Concatenated into `X` upon forward call. Copied and set to have no + gradient. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are satisfied if `constraint(samples) < 0`. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. See the docs of + `compute_(log_)smoothed_constraint_indicator` for details. + fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the smooth + approximations to max. + tau_relu: Temperature parameter controlling the sharpness of the smooth + approximations to ReLU. + """ + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + constraints=constraints, + eta=eta, + tau_max=check_tau(tau_max, name="tau_max"), + fatten=fatten, + ) + self.register_buffer("best_f", torch.as_tensor(best_f)) + self.tau_relu = check_tau(tau_relu, name="tau_relu") + + def _sample_forward(self, obj: Tensor) -> Tensor: + r"""Evaluate qLogExpectedImprovement on the candidate set `X`. + + Args: + obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values. + + Returns: + A `mc_shape x batch_shape x q`-dim Tensor of expected improvement values. + """ + li = _log_improvement( + Y=obj, + best_f=self.best_f, + tau=self.tau_relu, + fatten=self._fatten, + ) + return li + + +""" +###################################### utils ########################################## +""" + + +def _log_improvement( + Y: Tensor, + best_f: Tensor, + tau: Union[float, Tensor], + fatten: bool, +) -> Tensor: + """Computes the logarithm of the softplus-smoothed improvement, i.e. + `log_softplus(Y - best_f, beta=(1 / tau))`. + Note that softplus is an approximation to the regular ReLU objective whose maximum + pointwise approximation error is linear with respect to tau as tau goes to zero. + + Args: + obj: `mc_samples x batch_shape x q`-dim Tensor of output samples. + best_f: Best previously observed objective value(s), broadcastable with `obj`. + tau: Temperature parameter for smooth approximation of ReLU. + as `tau -> 0`, maximum pointwise approximation error is linear w.r.t. `tau`. + fatten: Toggles the logarithmic / linear asymptotic behavior of the + smooth approximation to ReLU. + + Returns: + A `mc_samples x batch_shape x q`-dim Tensor of improvement values. + """ + log_soft_clamp = log_fatplus if fatten else log_softplus + Z = Y - best_f.to(Y) + return log_soft_clamp(Z, tau=tau) # ~ ((Y - best_f) / Y_std).clamp(0) + + +def check_tau(tau: FloatOrTensor, name: str) -> FloatOrTensor: + """Checks the validity of the tau arguments of the functions below, and returns + `tau` if it is valid.""" + if isinstance(tau, Tensor) and tau.numel() != 1: + raise ValueError(name + f" is not a scalar: {tau.numel() = }.") + if not (tau > 0): + raise ValueError(name + f" is non-positive: {tau = }.") + return tau diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py index 0b07c4d852..fcc7c8d944 100644 --- a/botorch/acquisition/monte_carlo.py +++ b/botorch/acquisition/monte_carlo.py @@ -170,6 +170,8 @@ class SampleReducingMCAcquisitionFunction(MCAcquisitionFunction): forward pass. These problems are circumvented by the design of this class. """ + _log: bool = False # whether the acquisition utilities are in log-space + def __init__( self, model: Model, @@ -181,6 +183,7 @@ def __init__( q_reduction: SampleReductionProtocol = torch.amax, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, eta: Union[Tensor, float] = 1e-3, + fatten: bool = False, ): r"""Constructor of SampleReducingMCAcquisitionFunction. @@ -216,6 +219,8 @@ def __init__( eta: Temperature parameter(s) governing the smoothness of the sigmoid approximation to the constraint indicators. For more details, on this parameter, see the docs of `compute_smoothed_feasibility_indicator`. + fatten: Wether to apply a fat-tailed smooth approximation to the feasibility + indicator or the canonical sigmoid approximation. """ if constraints is not None and isinstance(objective, ConstrainedMCObjective): raise ValueError( @@ -236,6 +241,7 @@ def __init__( self._q_reduction = partial(q_reduction, dim=-1) self._constraints = constraints self._eta = eta + self._fatten = fatten @concatenate_pending_points @t_batch_mode_transform() @@ -300,14 +306,19 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor: multiplied by a smoothed constraint indicator per sample. """ if self._constraints is not None: - if (acqval < 0).any(): + if not self._log and (acqval < 0).any(): raise ValueError( "Constraint-weighting requires unconstrained " "acquisition values to be non-negative." ) - acqval = acqval * compute_smoothed_feasibility_indicator( - constraints=self._constraints, samples=samples, eta=self._eta + ind = compute_smoothed_feasibility_indicator( + constraints=self._constraints, + samples=samples, + eta=self._eta, + log=self._log, + fatten=self._fatten, ) + acqval = acqval.add(ind) if self._log else acqval.mul(ind) return acqval diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py index d8034d49eb..c751ea5837 100644 --- a/botorch/utils/objective.py +++ b/botorch/utils/objective.py @@ -13,6 +13,7 @@ from typing import Callable, List, Optional, Union import torch +from botorch.utils.safe_math import log_fatmoid, logexpit from torch import Tensor @@ -120,12 +121,17 @@ def compute_smoothed_feasibility_indicator( constraints: List[Callable[[Tensor], Tensor]], samples: Tensor, eta: Union[Tensor, float], + log: bool = False, + fatten: bool = False, ) -> Tensor: r"""Computes the smoothed feasibility indicator of a list of constraints. Given posterior samples, using a sigmoid to smoothly approximate the feasibility indicator of each individual constraint to ensure differentiability and high - gradient signal. + gradient signal. The `fatten` and `log` options improve the numerical behavior of + the smooth approximation. + + NOTE: *Negative* constraint values are associated with feasibility. Args: constraints: A list of callables, each mapping a Tensor of size `b x q x m` @@ -138,6 +144,8 @@ def compute_smoothed_feasibility_indicator( constraint in constraints. In case of a tensor the length of the tensor must match the number of provided constraints. The i-th constraint is then estimated with the i-th eta value. + log: Toggles the computation of the log-feasibility indicator. + fatten: Toggles the computation of the fat-tailed feasibility indicator. Returns: A `n_samples x b x q`-dim tensor of feasibility indicator values. @@ -148,12 +156,14 @@ def compute_smoothed_feasibility_indicator( raise ValueError( "Number of provided constraints and number of provided etas do not match." ) - is_feasible = torch.ones_like(samples[..., 0]) + if not (eta > 0).all(): + raise ValueError("eta must be positive.") + is_feasible = torch.zeros_like(samples[..., 0]) + log_sigmoid = log_fatmoid if fatten else logexpit for constraint, e in zip(constraints, eta): - w = soft_eval_constraint(constraint(samples), eta=e) - is_feasible = is_feasible.mul(w) # TODO: add log version. + is_feasible = is_feasible + log_sigmoid(-constraint(samples) / e) - return is_feasible + return is_feasible if log else is_feasible.exp() def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor: @@ -172,7 +182,7 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor: `value(x) -> 1` as x becomes negative. """ if eta <= 0: - raise ValueError("eta must be positive") + raise ValueError("eta must be positive.") return torch.sigmoid(-lhs / eta) diff --git a/botorch/utils/safe_math.py b/botorch/utils/safe_math.py index 83a2155a68..a0c79020cc 100644 --- a/botorch/utils/safe_math.py +++ b/botorch/utils/safe_math.py @@ -20,10 +20,13 @@ from typing import Tuple, Union import torch +from botorch.exceptions import UnsupportedError from botorch.utils.constants import get_constants_like from torch import finfo, Tensor +from torch.nn.functional import softplus _log2 = math.log(2) +_inv_sqrt_3 = math.sqrt(1 / 3) # Unary ops @@ -76,6 +79,23 @@ def log1mexp(x: Tensor) -> Tensor: ) +def log1pexp(x: Tensor) -> Tensor: + """Numerically accurate evaluation of log(1 + exp(x)). + See [Maechler2012accurate]_ for details. + """ + mask = x <= 18 + return torch.where( + mask, + (lambda z: z.exp().log1p())(x.masked_fill(~mask, 0)), + (lambda z: z + (-z).exp())(x.masked_fill(mask, 0)), + ) + + +def logexpit(X: Tensor) -> Tensor: + """Computes the logarithm of the expit (a.k.a. sigmoid) function.""" + return -log1pexp(-X) + + def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor: """Computes log(b - a) accurately given log(a) and log(b). Assumes, log_b > log_a, i.e. b > a > 0. @@ -93,7 +113,7 @@ def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor: def logmeanexp( X: Tensor, dim: Union[int, Tuple[int, ...]], keepdim: bool = False ) -> Tensor: - """Computes log(mean(exp(X), dim=dim, keepdim=keepdim)). + """Computes `log(mean(exp(X), dim=dim, keepdim=keepdim))`. Args: X: Values of which to compute the logmeanexp. @@ -101,7 +121,139 @@ def logmeanexp( keepdim: If True, keeps the reduced dimensions. Returns: - A Tensor of values corresponding to log(mean(exp(X), dim=dim)). + A Tensor of values corresponding to `log(mean(exp(X), dim=dim))`. """ n = X.shape[dim] if isinstance(dim, int) else math.prod(X.shape[i] for i in dim) return torch.logsumexp(X, dim=dim, keepdim=keepdim) - math.log(n) + + +def log_softplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the softplus function with high numerical accuracy. + + Args: + x: Input tensor, should have single or double precision floats. + tau: Decreasing tau increases the tightness of the + approximation to ReLU. Non-negative and defaults to 1.0. + + Returns: + Tensor corresponding to `log(softplus(x))`. + """ + check_dtype_float32_or_float64(x) + tau = torch.as_tensor(tau, dtype=x.dtype, device=x.device) + # cutoff chosen to achieve accuracy to machine epsilon + upper = 16 if x.dtype == torch.float32 else 32 + lower = -15 if x.dtype == torch.float32 else -35 + mask = x / tau > lower + return torch.where( + mask, + softplus(x.masked_fill(~mask, lower), beta=(1 / tau), threshold=upper).log(), + x / tau + tau.log(), + ) + + +def smooth_amax(X: Tensor, tau: Union[float, Tensor] = 1e-3, dim: int = -1) -> Tensor: + """Computes a smooth approximation to `max(X, dim=dim)`, i.e the maximum value of + `X` over dimension `dim`, using the logarithm of the `l_(1/tau)` norm of `exp(X)`. + Note that when `X = log(U)` is the *logarithm* of an acquisition utility `U`, + + `logsumexp(log(U) / tau) * tau = log(sum(U^(1/tau))^tau) = log(norm(U, ord=(1/tau))` + + Args: + X: A Tensor from which to compute the smoothed amax. + tau: Temperature parameter controlling the smooth approximation + to max operator, becomes tighter as tau goes to 0. Needs to be positive. + + Returns: + A Tensor of smooth approximations to `max(X, dim=dim)`. + """ + # consider normalizing by log_n = math.log(X.shape[dim]) to reduce error + return torch.logsumexp(X / tau, dim=dim) * tau # ~ X.amax(dim=dim) + + +def check_dtype_float32_or_float64(X: Tensor) -> None: + if X.dtype != torch.float32 and X.dtype != torch.float64: + raise UnsupportedError( + f"Only dtypes float32 and float64 are supported, but received {X.dtype}." + ) + + +def log_fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the fat-tailed softplus. + + NOTE: Separated out in case the complexity of the `log` implementation increases + in the future. + """ + return fatplus(x, tau=tau).log() + + +def fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a fat-tailed approximation to `ReLU(x) = max(x, 0)` by linearly + combining a regular softplus function and the density function of a Cauchy + distribution. The coefficient `alpha` of the Cauchy density is chosen to guarantee + monotonicity and convexity. + + Args: + x: A Tensor on whose values to compute the smoothed function. + + Returns: + A Tensor of values of the fat-tailed softplus. + """ + + def _fatplus(x: Tensor) -> Tensor: + alpha = 1e-1 # guarantees monotonicity and convexity (TODO: ref + Lemma 4) + return softplus(x) + alpha * cauchy(x) + + return tau * _fatplus(x / tau) + + +def fatmax(X: Tensor, dim: int, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a smooth approximation to amax(X, dim=dim) with a fat tail. + + Args: + X: A Tensor from which to compute the smoothed amax. + tau: Temperature parameter controlling the smooth approximation + to max operator, becomes tighter as tau goes to 0. Needs to be positive. + standardize: Toggles the temperature standardization of the smoothed function. + + Returns: + A Tensor of smooth approximations to `max(X, dim=dim)` with a fat tail. + """ + if X.shape[dim] == 1: + return X.squeeze(dim) + + M = X.amax(dim=dim, keepdim=True) + Y = (X - M) / tau # NOTE: this would cause NaNs when X has Infs. + M = M.squeeze(dim) + return M + tau * cauchy(Y).sum(dim=dim).log() # could change to mean + + +def log_fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the fatmoid. Separated out in case the implementation + of the logarithm becomes more complex in the future to ensure numerical stability. + """ + return fatmoid(X, tau=tau).log() + + +def fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a twice continuously differentiable approximation to the Heaviside + step function with a fat tail, i.e. `O(1 / x^2)` as `x` goes to -inf. + + Args: + X: A Tensor from which to compute the smoothed step function. + tau: Temperature parameter controlling the smoothness of the approximation. + + Returns: + A tensor of fat-tailed approximations to the Heaviside step function. + """ + X = X / tau + m = _inv_sqrt_3 # this defines the inflection point + return torch.where( + X < 0, + 2 / 3 * cauchy(X - m), + 1 - 2 / 3 * cauchy(X + m), + ) + + +def cauchy(x: Tensor) -> Tensor: + """Computes a Lorentzian, i.e. an un-normalized Cauchy density function.""" + return 1 / (1 + x.square()) diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst index 3aae7e277b..c46c906a89 100644 --- a/sphinx/source/acquisition.rst +++ b/sphinx/source/acquisition.rst @@ -60,6 +60,9 @@ Monte-Carlo Acquisition Functions :members: :exclude-members: MCAcquisitionFunction +.. automodule:: botorch.acquisition.logei + :members: + Multi-Objective Analytic Acquisition Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.multi_objective.analytic diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py new file mode 100644 index 0000000000..672d7acbac --- /dev/null +++ b/test/acquisition/test_logei.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import warnings +from unittest import mock + +import torch +from botorch import settings +from botorch.acquisition import ( + LogImprovementMCAcquisitionFunction, + qLogExpectedImprovement, +) +from botorch.acquisition.input_constructors import ACQF_INPUT_CONSTRUCTOR_REGISTRY +from botorch.acquisition.monte_carlo import qExpectedImprovement +from botorch.acquisition.objective import ( + ConstrainedMCObjective, + IdentityMCObjective, + PosteriorTransform, +) +from botorch.exceptions import BotorchWarning, UnsupportedError +from botorch.exceptions.errors import BotorchError +from botorch.sampling.normal import IIDNormalSampler, SobolQMCNormalSampler +from botorch.utils.testing import BotorchTestCase, MockModel, MockPosterior +from torch import Tensor + + +def infeasible_con(samples: Tensor) -> Tensor: + return torch.ones_like(samples[..., 0]) + + +def feasible_con(samples: Tensor) -> Tensor: + return -torch.ones_like(samples[..., 0]) + + +class DummyLogImprovementAcquisitionFunction(LogImprovementMCAcquisitionFunction): + def _sample_forward(self, X): + pass + + +class DummyNonScalarizingPosteriorTransform(PosteriorTransform): + scalarize = False + + def evaluate(self, Y): + pass # pragma: no cover + + def forward(self, posterior): + pass # pragma: no cover + + +class TestLogImprovementAcquisitionFunction(BotorchTestCase): + def test_abstract_raises(self): + with self.assertRaises(TypeError): + LogImprovementMCAcquisitionFunction() + # raise if model is multi-output, but no outcome transform or objective + # are given + no = "botorch.utils.testing.MockModel.num_outputs" + with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs: + mock_num_outputs.return_value = 2 + mm = MockModel(MockPosterior()) + with self.assertRaises(UnsupportedError): + DummyLogImprovementAcquisitionFunction(model=mm) + # raise if model is multi-output, but outcome transform does not + # scalarize and no objetive is given + with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs: + mock_num_outputs.return_value = 2 + mm = MockModel(MockPosterior()) + ptf = DummyNonScalarizingPosteriorTransform() + with self.assertRaises(UnsupportedError): + DummyLogImprovementAcquisitionFunction( + model=mm, posterior_transform=ptf + ) + + mm = MockModel(MockPosterior()) + objective = ConstrainedMCObjective( + IdentityMCObjective(), + constraints=[lambda samples: torch.zeros_like(samples[..., 0])], + ) + with self.assertRaisesRegex( + BotorchError, + "Log-Improvement should not be used with `ConstrainedMCObjective`.", + ): + DummyLogImprovementAcquisitionFunction(model=mm, objective=objective) + + +class TestQLogExpectedImprovement(BotorchTestCase): + def test_q_log_expected_improvement(self): + self.assertIn(qLogExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys()) + for dtype in (torch.float, torch.double): + tkwargs = {"device": self.device, "dtype": dtype} + # the event shape is `b x q x t` = 1 x 1 x 1 + samples = torch.zeros(1, 1, 1, **tkwargs) + mm = MockModel(MockPosterior(samples=samples)) + # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking + X = torch.zeros(1, 1, **tkwargs) + + # basic test + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + self.assertFalse(acqf._fatten) # different default behavior + self.assertTrue(log_acqf._fatten) + # test initialization + for k in ["objective", "sampler"]: + self.assertIn(k, acqf._modules) + self.assertIn(k, log_acqf._modules) + + res = acqf(X).item() + self.assertEqual(res, 0.0) + exp_log_res = log_acqf(X).exp().item() + # Due to the smooth approximation, the value at zero should be close to, but + # not exactly zero, and upper-bounded by the tau hyperparameter. + self.assertTrue(0 < exp_log_res) + self.assertTrue(exp_log_res <= log_acqf.tau_relu) + + # test shifting best_f value downward to see non-zero improvement + best_f = -1 + acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + res, exp_log_res = acqf(X), log_acqf(X).exp() + expected_val = -best_f + + self.assertEqual(res.dtype, dtype) + self.assertEqual(res.device.type, self.device.type) + self.assertEqual(res.item(), expected_val) + # Further away from zero, the value is numerically indistinguishable with + # single precision arithmetic. + self.assertTrue(expected_val <= exp_log_res.item()) + self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu) + + # test shifting best_f value upward to see advantage of LogEI + best_f = 1 + acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + res, log_res = acqf(X), log_acqf(X) + exp_log_res = log_res.exp() + expected_val = 0 + self.assertEqual(res.item(), expected_val) + self.assertTrue(expected_val <= exp_log_res.item()) + self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu) + # However, the log value is large and negative with non-vanishing gradients + self.assertGreater(-1, log_res.item()) + self.assertGreater(log_res.item(), -100) + + # NOTE: The following tests are adapted from the qEI tests. + # basic test, no resample + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + res = acqf(X) + self.assertTrue(0 < res.exp().item()) + self.assertTrue(res.exp().item() < acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) + bs = acqf.sampler.base_samples.clone() + res = acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # basic test, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + res = acqf(X) + self.assertTrue(0 < res.exp().item()) + self.assertTrue(res.exp().item() < acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # basic test for X_pending and warning + acqf.set_X_pending() + self.assertIsNone(acqf.X_pending) + acqf.set_X_pending(None) + self.assertIsNone(acqf.X_pending) + acqf.set_X_pending(X) + self.assertEqual(acqf.X_pending, X) + mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs) + res = acqf(X) + X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True) + with warnings.catch_warnings(record=True) as ws, settings.debug(True): + acqf.set_X_pending(X2) + self.assertEqual(acqf.X_pending, X2) + self.assertEqual( + sum(issubclass(w.category, BotorchWarning) for w in ws), 1 + ) + + # testing with illegal taus + with self.assertRaisesRegex(ValueError, "tau_max is not a scalar:"): + qLogExpectedImprovement( + model=mm, best_f=0, tau_max=torch.tensor([1, 2]) + ) + with self.assertRaisesRegex(ValueError, "tau_relu is non-positive:"): + qLogExpectedImprovement(model=mm, best_f=0, tau_relu=-2) + + def test_q_log_expected_improvement_batch(self): + for dtype in (torch.float, torch.double): + # the event shape is `b x q x t` = 2 x 2 x 1 + samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) + samples[0, 0, 0] = 1.0 + mm = MockModel(MockPosterior(samples=samples)) + + # X is a dummy and unused b/c of mocking + X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + # in the batch case, the values get adjusted toward + self.assertEqual(exp_log_res.dtype, dtype) + self.assertEqual(exp_log_res.device.type, self.device.type) + self.assertTrue(1.0 <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertAllClose(exp_log_res[0], torch.ones_like(exp_log_res[0]), ) + + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0 < exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu) + + # test batch model, batched best_f values + sampler = IIDNormalSampler(sample_shape=torch.Size([3])) + acqf = qLogExpectedImprovement( + model=mm, best_f=torch.Tensor([0, 0]), sampler=sampler + ) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + self.assertTrue(1.0 <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0 < exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu) + + # test shifting best_f value + acqf = qLogExpectedImprovement(model=mm, best_f=-1, sampler=sampler) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 2.0) + # TODO: figure out numerically stable tests and principled tolerances + # With q > 1, maximum value can get moved down due to L_q-norm approximation + # of the maximum over the q-batch. + safe_upper_lower_bound = 1.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 2.0 + acqf.tau_relu + acqf.tau_max) + # with no approximations (qEI): self.assertEqual(res[1].item(), 1.0) + self.assertTrue(1.0 <= exp_log_res[1].item()) + # ocurring ~tau_max error when all candidates in a q-batch have the + # acquisition value + self.assertTrue(exp_log_res[1].item() <= 1.0 + acqf.tau_relu + acqf.tau_max) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + # res = acqf(X) # 1-dim batch + exp_log_res = acqf(X).exp() # 1-dim batch + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + exp_log_res = acqf(X.expand(2, 2, 1)).exp() # 2-dim batch + # self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + # the base samples should have the batch dim collapsed + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X.expand(2, 2, 1)) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # test batch mode, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + exp_log_res = acqf(X).exp() + # self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # # TODO: Test different objectives (incl. constraints) diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py index c7000b712f..f3ac65d564 100644 --- a/test/utils/test_objective.py +++ b/test/utils/test_objective.py @@ -10,6 +10,7 @@ from botorch.utils.objective import ( compute_feasibility_indicator, compute_smoothed_feasibility_indicator, + soft_eval_constraint, ) from botorch.utils.testing import BotorchTestCase from torch import Tensor @@ -65,7 +66,7 @@ def test_apply_constraints(self): # nonnegative objective, one constraint, eta = 0 samples = torch.randn(1) obj = ones_f(samples) - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, "eta must be positive."): apply_constraints( obj=obj, constraints=[zeros_f], @@ -74,6 +75,11 @@ def test_apply_constraints(self): eta=0.0, ) + # soft_eval_constraint is not in the path of apply_constraints, adding this test + # for coverage. + with self.assertRaisesRegex(ValueError, "eta must be positive."): + soft_eval_constraint(lhs=obj, eta=0.0) + def test_apply_constraints_multi_output(self): # nonnegative objective, one constraint tkwargs = {"device": self.device} diff --git a/test/utils/test_safe_math.py b/test/utils/test_safe_math.py index 3167928f71..32135f6f61 100644 --- a/test/utils/test_safe_math.py +++ b/test/utils/test_safe_math.py @@ -12,15 +12,40 @@ from typing import Callable import torch +from botorch.exceptions import UnsupportedError from botorch.utils import safe_math from botorch.utils.constants import get_constants_like -from botorch.utils.safe_math import logmeanexp +from botorch.utils.objective import compute_smoothed_feasibility_indicator +from botorch.utils.safe_math import ( + cauchy, + fatmax, + fatmoid, + fatplus, + log_fatmoid, + log_fatplus, + log_softplus, + logmeanexp, + smooth_amax, +) from botorch.utils.testing import BotorchTestCase from torch import finfo, Tensor +from torch.nn.functional import softplus INF = float("inf") +def sum_constraint(samples: Tensor) -> Tensor: + """Represents the constraint `samples.sum(dim=-1) > 0`. + + Args: + samples: A `b x q x m`-dim Tensor. + + Returns: + A `b x q`-dim Tensor representing constraint feasibility. + """ + return -samples.sum(dim=-1) + + class UnaryOpTestMixin: op: Callable[[Tensor], Tensor] safe_op: Callable[[Tensor], Tensor] @@ -233,3 +258,143 @@ def test_log_mean_exp(self): logmeanexp(X.log(), dim=(0, -1), keepdim=True).exp(), X.mean(dim=(0, -1), keepdim=True), ) + + +class TestSmoothNonLinearities(BotorchTestCase): + def test_smooth_non_linearities(self): + for dtype in (torch.float, torch.double): + n = 17 + X = torch.randn(n, dtype=dtype, device=self.device) + self.assertAllClose(cauchy(X), 1 / (X.square() + 1)) + + # testing softplus and fatplus + tau = 1e-2 + X = torch.randn(n, dtype=dtype, device=self.device) + fatplus_X = fatplus(X, tau=tau) + self.assertAllClose(fatplus_X, X.clamp(0), atol=tau) + self.assertTrue((fatplus_X > 0).all()) + self.assertAllClose(fatplus_X.log(), log_fatplus(X, tau=tau)) + self.assertAllClose( + softplus(X, beta=1 / tau), log_softplus(X, tau=tau).exp() + ) + + # testing fatplus differentiability + X = torch.randn(n, dtype=dtype, device=self.device) + X.requires_grad = True + log_fatplus(X, tau=tau).sum().backward() + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + # always increasing, could also test convexity (mathematically guaranteed) + self.assertTrue((X.grad > 0).all()) + + X_soft = X.detach().clone() + X_soft.requires_grad = True + log_softplus(X_soft, tau=tau).sum().backward() + + # for positive values away from zero, log_softplus and log_fatplus are close + is_positive = X > 100 * tau # i.e. 1 for tau = 1e-2 + self.assertAllClose(X.grad[is_positive], 1 / X[is_positive], atol=tau) + self.assertAllClose(X_soft.grad[is_positive], 1 / X[is_positive], atol=tau) + + is_negative = X < -100 * tau # i.e. -1 + # the softplus has very large gradients, which can saturate the smooth + # approximation to the maximum over the q-batch. + asym_val = torch.full_like(X_soft.grad[is_negative], 1 / tau) + self.assertAllClose(X_soft.grad[is_negative], asym_val, atol=tau, rtol=tau) + # the fatplus on the other hand has smaller, though non-vanishing gradients. + self.assertTrue((X_soft.grad[is_negative] > X.grad[is_negative]).all()) + + # testing smoothmax and fatmax + d = 3 + X = torch.randn(n, d, dtype=dtype, device=self.device) + fatmax_X = fatmax(X, dim=-1, tau=tau) + true_max = X.amax(dim=-1) + self.assertAllClose(fatmax_X, true_max, atol=tau) + self.assertAllClose(smooth_amax(X, dim=-1, tau=tau), true_max, atol=tau) + + # special case for d = 1 + d = 1 + X = torch.randn(n, d, dtype=dtype, device=self.device) + fatmax_X = fatmax(X, dim=-1, tau=tau) + self.assertAllClose(fatmax_X, X[..., 0]) + + # testing fatmax differentiability + X = torch.randn(n, dtype=dtype, device=self.device) + X.requires_grad = True + fatmax(X, dim=-1, tau=tau).sum().backward() + + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + self.assertTrue(X.grad.min() > 0) + + # testing fatmoid + X = torch.randn(n, dtype=dtype, device=self.device) + fatmoid_X = fatmoid(X, tau=tau) + # output is in [0, 1] + self.assertTrue((fatmoid_X > 0).all()) + self.assertTrue((fatmoid_X < 1).all()) + # skew symmetry + atol = 1e-6 if dtype == torch.float32 else 1e-12 + self.assertAllClose(1 - fatmoid_X, fatmoid(-X, tau=tau), atol=atol) + zero = torch.tensor(0.0, dtype=dtype, device=self.device) + half = torch.tensor(0.5, dtype=dtype, device=self.device) + self.assertAllClose(fatmoid(zero), half, atol=atol) + + self.assertAllClose(fatmoid_X.log(), log_fatmoid(X, tau=tau)) + + is_center = X.abs() < 100 * tau + self.assertAllClose( + fatmoid_X[~is_center], (X[~is_center] > 0).to(fatmoid_X), atol=1e-3 + ) + + # testing differentiability + X.requires_grad = True + log_fatmoid(X, tau=tau).sum().backward() + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + self.assertTrue((X.grad > 0).all()) + + # testing constraint indicator + constraints = [sum_constraint] + b = 3 + q = 4 + m = 5 + samples = torch.randn(b, q, m) + eta = 1e-3 + fatten = True + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fatten=fatten, + ) + self.assertTrue(log_feas_vals.shape == torch.Size([b, q])) + expected_feas_vals = sum_constraint(samples) < 0 + hard_feas_vals = log_feas_vals.exp() > 1 / 2 + self.assertAllClose(hard_feas_vals, expected_feas_vals) + + # with deterministic inputs: + samples = torch.ones(1, 1, m) # sum is greater than 0 + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fatten=fatten, + ) + self.assertTrue((log_feas_vals.exp() > 1 / 2).item()) + + # with deterministic inputs: + samples = -torch.ones(1, 1, m) # sum is smaller than 0 + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fatten=fatten, + ) + self.assertFalse((log_feas_vals.exp() > 1 / 2).item()) + + with self.assertRaisesRegex(UnsupportedError, "Only dtypes"): + log_softplus(torch.randn(2, dtype=torch.float16))