qLogEI (pytorch#1936)

Summary: Pull Request resolved: pytorch#1936 This commit introduces `qLogExpectedImprovement` (`qLogEI`), which computes the logarithm of a smooth approximation to the regular EI utility. As EI is known to suffer from vanishing gradients, especially for challenging, constrained, or high-dimensional problems, using `qLogEI` can lead to significant optimization improvements. Differential Revision: D47439148 fbshipit-source-id: 6c6d23d0d102e91ed4d3607b69724dbc8a80595a
SebastianAment · Jul 14, 2023 · 00e7f65 · 00e7f65
1 parent d333163
commit 00e7f65
Show file tree

Hide file tree

Showing 9 changed files with 904 additions and 11 deletions.
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
@@ -47,6 +47,7 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import qLogExpectedImprovement
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -449,7 +450,7 @@ def construct_inputs_qSimpleRegret(
     )
 
 
-@acqf_input_constructor(qExpectedImprovement)
+@acqf_input_constructor(qExpectedImprovement, qLogExpectedImprovement)
 def construct_inputs_qEI(
     model: Model,
     training_data: MaybeDict[SupervisedDataset],

diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py
@@ -0,0 +1,248 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""
+Batch implementations of the LogEI family of improvements-based acquisition functions.
+"""
+
+
+from __future__ import annotations
+
+from functools import partial
+
+from typing import Callable, List, Optional, TypeVar, Union
+
+import torch
+from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
+from botorch.acquisition.objective import (
+    ConstrainedMCObjective,
+    MCAcquisitionObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions.errors import BotorchError
+from botorch.models.model import Model
+from botorch.sampling.base import MCSampler
+from botorch.utils.safe_math import (
+    fatmax,
+    log_fatplus,
+    log_softplus,
+    logmeanexp,
+    smooth_amax,
+)
+from torch import Tensor
+
+
+TAU_RELU = 1e-6
+TAU_MAX = 1e-2
+FloatOrTensor = TypeVar("FloatOrTensor", float, Tensor)
+
+
+class LogImprovementMCAcquisitionFunction(SampleReducingMCAcquisitionFunction):
+    r"""
+    Abstract base class for Monte-Carlo-based batch LogEI acquisition functions.
+
+    :meta private:
+    """
+
+    _log: bool = True
+
+    def __init__(
+        self,
+        model: Model,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = True,
+        tau_max: float = TAU_MAX,
+    ) -> None:
+        r"""
+        Args:
+            model: A fitted model.
+            sampler: The sampler used to draw base samples. If not given,
+                a sampler is generated using `get_sampler`.
+                NOTE: For posteriors that do not support base samples,
+                a sampler compatible with intended use case must be provided.
+                See `ForkedRNGSampler` and `StochasticSampler` as examples.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points
+                that have points that have been submitted for function evaluation
+                but have not yet been evaluated.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)constraint_indicator` for more details on this parameter.
+            fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the
+                approximation to the `max` operator over the `q` candidate points.
+        """
+        if isinstance(objective, ConstrainedMCObjective):
+            raise BotorchError(
+                "Log-Improvement should not be used with `ConstrainedMCObjective`."
+                "Please pass the `constraints` directly to the constructor of the "
+                "acquisition function."
+            )
+        q_reduction = partial(fatmax if fatten else smooth_amax, tau=tau_max)
+        sample_reduction = logmeanexp
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            sample_reduction=sample_reduction,
+            q_reduction=q_reduction,
+            constraints=constraints,
+            eta=eta,
+            fatten=fatten,
+        )
+        self.tau_max = tau_max
+
+
+class qLogExpectedImprovement(LogImprovementMCAcquisitionFunction):
+    r"""MC-based batch logarithm of the expected smoothed improvement.
+
+    This computes qLogEI by
+    (1) sampling the joint posterior over q points,
+    (2) evaluating the smoothed log improvement over the current best for each sample,
+    (3) smoothly maximizing over q, and
+    (4) averaging over the samples in log space.
+
+    `qLogEI(X) ~ log(qEI(X)) = log(E(max(max Y - best_f, 0)))`,
+
+    where `Y ~ f(X)`, and `X = (x_1,...,x_q)`.
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> best_f = train_Y.max()[0]
+        >>> sampler = SobolQMCNormalSampler(1024)
+        >>> qLogEI = qLogExpectedImprovement(model, best_f, sampler)
+        >>> qei = qLogEI(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        best_f: Union[float, Tensor],
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = True,
+        tau_max: float = TAU_MAX,
+        tau_relu: float = TAU_RELU,
+    ) -> None:
+        r"""q-Log Expected Improvement.
+
+        Args:
+            model: A fitted model.
+            best_f: The best objective value observed so far (assumed noiseless). Can be
+                a `batch_shape`-shaped tensor, which in case of a batched model
+                specifies potentially different values for each element of the batch.
+            sampler: The sampler used to draw base samples. See `MCAcquisitionFunction`
+                more details.
+            objective: The MCAcquisitionObjective under which the samples are evaluated.
+                Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending:  A `m x d`-dim Tensor of `m` design points that have been
+                submitted for function evaluation but have not yet been evaluated.
+                Concatenated into `X` upon forward call. Copied and set to have no
+                gradient.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)smoothed_constraint_indicator` for details.
+            fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the smooth
+                approximations to max.
+            tau_relu: Temperature parameter controlling the sharpness of the smooth
+                approximations to ReLU.
+        """
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
+            tau_max=check_tau(tau_max, name="tau_max"),
+            fatten=fatten,
+        )
+        self.register_buffer("best_f", torch.as_tensor(best_f, dtype=float))
+        self.tau_relu = check_tau(tau_relu, name="tau_relu")
+
+    def _sample_forward(self, obj: Tensor) -> Tensor:
+        r"""Evaluate qLogExpectedImprovement on the candidate set `X`.
+
+        Args:
+            obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values.
+
+        Returns:
+            A `mc_shape x batch_shape x q`-dim Tensor of expected improvement values.
+        """
+        li = _log_improvement(
+            Y=obj,
+            best_f=self.best_f,
+            tau=self.tau_relu,
+            fatten=self._fatten,
+        )
+        return li
+
+
+"""
+###################################### utils ##########################################
+"""
+
+
+def _log_improvement(
+    Y: Tensor,
+    best_f: Tensor,
+    tau: Union[float, Tensor],
+    fatten: bool,
+) -> Tensor:
+    """Computes the logarithm of the softplus-smoothed improvement, i.e.
+    `log_softplus(Y - best_f, beta=(1 / tau))`.
+    Note that softplus is an approximation to the regular ReLU objective whose maximum
+    pointwise approximation error is linear with respect to tau as tau goes to zero.
+
+    Args:
+        obj: `mc_samples x batch_shape x q`-dim Tensor of output samples.
+        best_f: Best previously observed objective value(s), broadcastable with `obj`.
+        tau: Temperature parameter for smooth approximation of ReLU.
+            as `tau -> 0`, maximum pointwise approximation error is linear w.r.t. `tau`.
+        fatten: Toggles the logarithmic / linear asymptotic behavior of the
+            smooth approximation to ReLU.
+
+    Returns:
+        A `mc_samples x batch_shape x q`-dim Tensor of improvement values.
+    """
+    log_soft_clamp = log_fatplus if fatten else log_softplus
+    Z = Y - best_f.to(Y)
+    return log_soft_clamp(Z, tau=tau)  # ~ ((Y - best_f) / Y_std).clamp(0)
+
+
+def check_tau(tau: FloatOrTensor, name: str) -> FloatOrTensor:
+    """Checks the validity of the tau arguments of the functions below, and returns
+    `tau` if it is valid."""
+    if isinstance(tau, Tensor) and tau.numel() != 1:
+        raise ValueError(name + f" is not a scalar: {tau.numel() = }.")
+    if not (tau > 0):
+        raise ValueError(name + f" is non-positive: {tau = }.")
+    return tau
diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
@@ -170,6 +170,8 @@ class SampleReducingMCAcquisitionFunction(MCAcquisitionFunction):
     forward pass. These problems are circumvented by the design of this class.
     """
 
+    _log: bool = False  # whether the acquisition utilities are in log-space
+
     def __init__(
         self,
         model: Model,
@@ -181,6 +183,7 @@ def __init__(
         q_reduction: SampleReductionProtocol = torch.amax,
         constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
         eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = False,
     ):
         r"""Constructor of SampleReducingMCAcquisitionFunction.
 
@@ -216,6 +219,8 @@ def __init__(
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
                 parameter, see the docs of `compute_smoothed_feasibility_indicator`.
+            fatten: Wether to apply a fat-tailed smooth approximation to the feasibility
+                indicator or the canonical sigmoid approximation.
         """
         if constraints is not None and isinstance(objective, ConstrainedMCObjective):
             raise ValueError(
@@ -236,6 +241,7 @@ def __init__(
         self._q_reduction = partial(q_reduction, dim=-1)
         self._constraints = constraints
         self._eta = eta
+        self._fatten = fatten
 
     @concatenate_pending_points
     @t_batch_mode_transform()
@@ -300,14 +306,19 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor:
                 multiplied by a smoothed constraint indicator per sample.
         """
         if self._constraints is not None:
-            if (acqval < 0).any():
+            if not self._log and (acqval < 0).any():
                 raise ValueError(
                     "Constraint-weighting requires unconstrained "
                     "acquisition values to be non-negative."
                 )
-            acqval = acqval * compute_smoothed_feasibility_indicator(
-                constraints=self._constraints, samples=samples, eta=self._eta
+            ind = compute_smoothed_feasibility_indicator(
+                constraints=self._constraints,
+                samples=samples,
+                eta=self._eta,
+                log=self._log,
+                fatten=self._fatten,
             )
+            acqval = acqval.add(ind) if self._log else acqval.mul(ind)
         return acqval
 
 

diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
@@ -13,6 +13,7 @@
 from typing import Callable, List, Optional, Union
 
 import torch
+from botorch.utils.safe_math import log_fatmoid, logexpit
 from torch import Tensor
 
 
@@ -120,12 +121,17 @@ def compute_smoothed_feasibility_indicator(
     constraints: List[Callable[[Tensor], Tensor]],
     samples: Tensor,
     eta: Union[Tensor, float],
+    log: bool = False,
+    fatten: bool = False,
 ) -> Tensor:
     r"""Computes the smoothed feasibility indicator of a list of constraints.
 
     Given posterior samples, using a sigmoid to smoothly approximate the feasibility
     indicator of each individual constraint to ensure differentiability and high
-    gradient signal.
+    gradient signal. The `fatten` and `log` options improve the numerical behavior of
+    the smooth approximation.
+
+    NOTE: *Negative* constraint values are associated with feasibility.
 
     Args:
         constraints: A list of callables, each mapping a Tensor of size `b x q x m`
@@ -138,6 +144,8 @@ def compute_smoothed_feasibility_indicator(
             constraint in constraints. In case of a tensor the length of the tensor
             must match the number of provided constraints. The i-th constraint is
             then estimated with the i-th eta value.
+        log: Toggles the computation of the log-feasibility indicator.
+        fatten: Toggles the computation of the fat-tailed feasibility indicator.
 
     Returns:
         A `n_samples x b x q`-dim tensor of feasibility indicator values.
@@ -148,12 +156,14 @@ def compute_smoothed_feasibility_indicator(
         raise ValueError(
             "Number of provided constraints and number of provided etas do not match."
         )
-    is_feasible = torch.ones_like(samples[..., 0])
+    if not (eta > 0).all():
+        raise ValueError("eta must be positive.")
+    is_feasible = torch.zeros_like(samples[..., 0])
+    log_sigmoid = log_fatmoid if fatten else logexpit
     for constraint, e in zip(constraints, eta):
-        w = soft_eval_constraint(constraint(samples), eta=e)
-        is_feasible = is_feasible.mul(w)  # TODO: add log version.
+        is_feasible = is_feasible + log_sigmoid(-constraint(samples) / e)
 
-    return is_feasible
+    return is_feasible if log else is_feasible.exp()
 
 
 def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor: