diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
index 7ff09b30c5..5bd208cd81 100644
--- a/botorch/acquisition/__init__.py
+++ b/botorch/acquisition/__init__.py
@@ -16,6 +16,8 @@
     AnalyticAcquisitionFunction,
     ConstrainedExpectedImprovement,
     ExpectedImprovement,
+    LogExpectedImprovement,
+    LogNoisyExpectedImprovement,
     NoisyExpectedImprovement,
     PosteriorMean,
     ProbabilityOfImprovement,
@@ -32,6 +34,10 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import (
+    LogImprovementMCAcquisitionFunction,
+    qLogExpectedImprovement,
+)
 from botorch.acquisition.max_value_entropy_search import (
     MaxValueBase,
     qLowerBoundMaxValueEntropy,
@@ -46,6 +52,7 @@
     qProbabilityOfImprovement,
     qSimpleRegret,
     qUpperConfidenceBound,
+    SampleReducingMCAcquisitionFunction,
 )
 from botorch.acquisition.multi_step_lookahead import qMultiStepLookahead
 from botorch.acquisition.objective import (
@@ -71,6 +78,8 @@
     "AnalyticExpectedUtilityOfBestOption",
     "ConstrainedExpectedImprovement",
     "ExpectedImprovement",
+    "LogExpectedImprovement",
+    "LogNoisyExpectedImprovement",
     "FixedFeatureAcquisitionFunction",
     "GenericCostAwareUtility",
     "InverseCostWeightedUtility",
@@ -85,6 +94,8 @@
     "UpperConfidenceBound",
     "qAnalyticProbabilityOfImprovement",
     "qExpectedImprovement",
+    "LogImprovementMCAcquisitionFunction",
+    "qLogExpectedImprovement",
     "qKnowledgeGradient",
     "MaxValueBase",
     "qMultiFidelityKnowledgeGradient",
@@ -104,6 +115,7 @@
     "LearnedObjective",
     "LinearMCObjective",
     "MCAcquisitionFunction",
+    "SampleReducingMCAcquisitionFunction",
     "MCAcquisitionObjective",
     "ScalarizedPosteriorTransform",
     "get_acquisition_function",
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index 49deb81512..cff41d46e1 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -47,6 +47,7 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import qLogExpectedImprovement
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -449,7 +450,7 @@ def construct_inputs_qSimpleRegret(
     )
 
 
-@acqf_input_constructor(qExpectedImprovement)
+@acqf_input_constructor(qExpectedImprovement, qLogExpectedImprovement)
 def construct_inputs_qEI(
     model: Model,
     training_data: MaybeDict[SupervisedDataset],
diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py
new file mode 100644
index 0000000000..d9cf0b7368
--- /dev/null
+++ b/botorch/acquisition/logei.py
@@ -0,0 +1,261 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""
+Batch implementations of the LogEI family of improvements-based acquisition functions.
+"""
+
+
+from __future__ import annotations
+
+from functools import partial
+
+from typing import Callable, List, Optional, TypeVar, Union
+
+import torch
+from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
+from botorch.acquisition.objective import (
+    ConstrainedMCObjective,
+    MCAcquisitionObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions.errors import BotorchError
+from botorch.models.model import Model
+from botorch.sampling.base import MCSampler
+from botorch.utils.safe_math import (
+    fatmax,
+    log_fatplus,
+    log_softplus,
+    logmeanexp,
+    smooth_amax,
+)
+from torch import Tensor
+
+"""
+NOTE: On the default temperature parameters:
+
+tau_relu: It is generally important to set `tau_relu` to be very small, in particular,
+smaller than the expected improvement value. Otherwise, the optimization can stagnate.
+By setting `tau_relu=1e-6` by default, stagnation is exceedingly unlikely to occur due
+to the smooth ReLU approximation for practical applications of BO.
+IDEA: We could consider shrinking `tau_relu` with the progression of the optimization.
+
+tau_max: This is only relevant for the batch (`q > 1`) case, and `tau_max=1e-2` is
+sufficient to get a good approximation to the maximum improvement in the batch of
+candidates. If `fatten=False`, the smooth approximation to the maximum can saturate
+numerically. It is therefore recommended to use `fatten=True` when optimizing batches
+of `q > 1` points.
+"""
+TAU_RELU = 1e-6
+TAU_MAX = 1e-2
+FloatOrTensor = TypeVar("FloatOrTensor", float, Tensor)
+
+
+class LogImprovementMCAcquisitionFunction(SampleReducingMCAcquisitionFunction):
+    r"""
+    Abstract base class for Monte-Carlo-based batch LogEI acquisition functions.
+
+    :meta private:
+    """
+
+    _log: bool = True
+
+    def __init__(
+        self,
+        model: Model,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = True,
+        tau_max: float = TAU_MAX,
+    ) -> None:
+        r"""
+        Args:
+            model: A fitted model.
+            sampler: The sampler used to draw base samples. If not given,
+                a sampler is generated using `get_sampler`.
+                NOTE: For posteriors that do not support base samples,
+                a sampler compatible with intended use case must be provided.
+                See `ForkedRNGSampler` and `StochasticSampler` as examples.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points
+                that have points that have been submitted for function evaluation
+                but have not yet been evaluated.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)constraint_indicator` for more details on this parameter.
+            fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the
+                approximation to the `max` operator over the `q` candidate points.
+        """
+        if isinstance(objective, ConstrainedMCObjective):
+            raise BotorchError(
+                "Log-Improvement should not be used with `ConstrainedMCObjective`."
+                "Please pass the `constraints` directly to the constructor of the "
+                "acquisition function."
+            )
+        q_reduction = partial(fatmax if fatten else smooth_amax, tau=tau_max)
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            sample_reduction=logmeanexp,
+            q_reduction=q_reduction,
+            constraints=constraints,
+            eta=eta,
+            fatten=fatten,
+        )
+        self.tau_max = tau_max
+
+
+class qLogExpectedImprovement(LogImprovementMCAcquisitionFunction):
+    r"""MC-based batch Log Expected Improvement.
+
+    This computes qLogEI by
+    (1) sampling the joint posterior over q points,
+    (2) evaluating the smoothed log improvement over the current best for each sample,
+    (3) smoothly maximizing over q, and
+    (4) averaging over the samples in log space.
+
+    `qLogEI(X) ~ log(qEI(X)) = log(E(max(max Y - best_f, 0)))`,
+
+    where `Y ~ f(X)`, and `X = (x_1,...,x_q)`.
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> best_f = train_Y.max()[0]
+        >>> sampler = SobolQMCNormalSampler(1024)
+        >>> qLogEI = qLogExpectedImprovement(model, best_f, sampler)
+        >>> qei = qLogEI(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        best_f: Union[float, Tensor],
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = True,
+        tau_max: float = TAU_MAX,
+        tau_relu: float = TAU_RELU,
+    ) -> None:
+        r"""q-Log Expected Improvement.
+
+        Args:
+            model: A fitted model.
+            best_f: The best objective value observed so far (assumed noiseless). Can be
+                a `batch_shape`-shaped tensor, which in case of a batched model
+                specifies potentially different values for each element of the batch.
+            sampler: The sampler used to draw base samples. See `MCAcquisitionFunction`
+                more details.
+            objective: The MCAcquisitionObjective under which the samples are evaluated.
+                Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending:  A `m x d`-dim Tensor of `m` design points that have been
+                submitted for function evaluation but have not yet been evaluated.
+                Concatenated into `X` upon forward call. Copied and set to have no
+                gradient.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)smoothed_constraint_indicator` for details.
+            fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the smooth
+                approximations to max.
+            tau_relu: Temperature parameter controlling the sharpness of the smooth
+                approximations to ReLU.
+        """
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
+            tau_max=check_tau(tau_max, name="tau_max"),
+            fatten=fatten,
+        )
+        self.register_buffer("best_f", torch.as_tensor(best_f))
+        self.tau_relu = check_tau(tau_relu, name="tau_relu")
+
+    def _sample_forward(self, obj: Tensor) -> Tensor:
+        r"""Evaluate qLogExpectedImprovement on the candidate set `X`.
+
+        Args:
+            obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values.
+
+        Returns:
+            A `mc_shape x batch_shape x q`-dim Tensor of expected improvement values.
+        """
+        li = _log_improvement(
+            Y=obj,
+            best_f=self.best_f,
+            tau=self.tau_relu,
+            fatten=self._fatten,
+        )
+        return li
+
+
+"""
+###################################### utils ##########################################
+"""
+
+
+def _log_improvement(
+    Y: Tensor,
+    best_f: Tensor,
+    tau: Union[float, Tensor],
+    fatten: bool,
+) -> Tensor:
+    """Computes the logarithm of the softplus-smoothed improvement, i.e.
+    `log_softplus(Y - best_f, beta=(1 / tau))`.
+    Note that softplus is an approximation to the regular ReLU objective whose maximum
+    pointwise approximation error is linear with respect to tau as tau goes to zero.
+
+    Args:
+        obj: `mc_samples x batch_shape x q`-dim Tensor of output samples.
+        best_f: Best previously observed objective value(s), broadcastable with `obj`.
+        tau: Temperature parameter for smooth approximation of ReLU.
+            as `tau -> 0`, maximum pointwise approximation error is linear w.r.t. `tau`.
+        fatten: Toggles the logarithmic / linear asymptotic behavior of the
+            smooth approximation to ReLU.
+
+    Returns:
+        A `mc_samples x batch_shape x q`-dim Tensor of improvement values.
+    """
+    log_soft_clamp = log_fatplus if fatten else log_softplus
+    Z = Y - best_f.to(Y)
+    return log_soft_clamp(Z, tau=tau)  # ~ ((Y - best_f) / Y_std).clamp(0)
+
+
+def check_tau(tau: FloatOrTensor, name: str) -> FloatOrTensor:
+    """Checks the validity of the tau arguments of the functions below, and returns
+    `tau` if it is valid."""
+    if isinstance(tau, Tensor) and tau.numel() != 1:
+        raise ValueError(name + f" is not a scalar: {tau.numel() = }.")
+    if not (tau > 0):
+        raise ValueError(name + f" is non-positive: {tau = }.")
+    return tau
diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
index 0b07c4d852..fcc7c8d944 100644
--- a/botorch/acquisition/monte_carlo.py
+++ b/botorch/acquisition/monte_carlo.py
@@ -170,6 +170,8 @@ class SampleReducingMCAcquisitionFunction(MCAcquisitionFunction):
     forward pass. These problems are circumvented by the design of this class.
     """
 
+    _log: bool = False  # whether the acquisition utilities are in log-space
+
     def __init__(
         self,
         model: Model,
@@ -181,6 +183,7 @@ def __init__(
         q_reduction: SampleReductionProtocol = torch.amax,
         constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
         eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = False,
     ):
         r"""Constructor of SampleReducingMCAcquisitionFunction.
 
@@ -216,6 +219,8 @@ def __init__(
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
                 parameter, see the docs of `compute_smoothed_feasibility_indicator`.
+            fatten: Wether to apply a fat-tailed smooth approximation to the feasibility
+                indicator or the canonical sigmoid approximation.
         """
         if constraints is not None and isinstance(objective, ConstrainedMCObjective):
             raise ValueError(
@@ -236,6 +241,7 @@ def __init__(
         self._q_reduction = partial(q_reduction, dim=-1)
         self._constraints = constraints
         self._eta = eta
+        self._fatten = fatten
 
     @concatenate_pending_points
     @t_batch_mode_transform()
@@ -300,14 +306,19 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor:
                 multiplied by a smoothed constraint indicator per sample.
         """
         if self._constraints is not None:
-            if (acqval < 0).any():
+            if not self._log and (acqval < 0).any():
                 raise ValueError(
                     "Constraint-weighting requires unconstrained "
                     "acquisition values to be non-negative."
                 )
-            acqval = acqval * compute_smoothed_feasibility_indicator(
-                constraints=self._constraints, samples=samples, eta=self._eta
+            ind = compute_smoothed_feasibility_indicator(
+                constraints=self._constraints,
+                samples=samples,
+                eta=self._eta,
+                log=self._log,
+                fatten=self._fatten,
             )
+            acqval = acqval.add(ind) if self._log else acqval.mul(ind)
         return acqval
 
 
diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
index d8034d49eb..c751ea5837 100644
--- a/botorch/utils/objective.py
+++ b/botorch/utils/objective.py
@@ -13,6 +13,7 @@
 from typing import Callable, List, Optional, Union
 
 import torch
+from botorch.utils.safe_math import log_fatmoid, logexpit
 from torch import Tensor
 
 
@@ -120,12 +121,17 @@ def compute_smoothed_feasibility_indicator(
     constraints: List[Callable[[Tensor], Tensor]],
     samples: Tensor,
     eta: Union[Tensor, float],
+    log: bool = False,
+    fatten: bool = False,
 ) -> Tensor:
     r"""Computes the smoothed feasibility indicator of a list of constraints.
 
     Given posterior samples, using a sigmoid to smoothly approximate the feasibility
     indicator of each individual constraint to ensure differentiability and high
-    gradient signal.
+    gradient signal. The `fatten` and `log` options improve the numerical behavior of
+    the smooth approximation.
+
+    NOTE: *Negative* constraint values are associated with feasibility.
 
     Args:
         constraints: A list of callables, each mapping a Tensor of size `b x q x m`
@@ -138,6 +144,8 @@ def compute_smoothed_feasibility_indicator(
             constraint in constraints. In case of a tensor the length of the tensor
             must match the number of provided constraints. The i-th constraint is
             then estimated with the i-th eta value.
+        log: Toggles the computation of the log-feasibility indicator.
+        fatten: Toggles the computation of the fat-tailed feasibility indicator.
 
     Returns:
         A `n_samples x b x q`-dim tensor of feasibility indicator values.
@@ -148,12 +156,14 @@ def compute_smoothed_feasibility_indicator(
         raise ValueError(
             "Number of provided constraints and number of provided etas do not match."
         )
-    is_feasible = torch.ones_like(samples[..., 0])
+    if not (eta > 0).all():
+        raise ValueError("eta must be positive.")
+    is_feasible = torch.zeros_like(samples[..., 0])
+    log_sigmoid = log_fatmoid if fatten else logexpit
     for constraint, e in zip(constraints, eta):
-        w = soft_eval_constraint(constraint(samples), eta=e)
-        is_feasible = is_feasible.mul(w)  # TODO: add log version.
+        is_feasible = is_feasible + log_sigmoid(-constraint(samples) / e)
 
-    return is_feasible
+    return is_feasible if log else is_feasible.exp()
 
 
 def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
@@ -172,7 +182,7 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
         `value(x) -> 1` as x becomes negative.
     """
     if eta <= 0:
-        raise ValueError("eta must be positive")
+        raise ValueError("eta must be positive.")
     return torch.sigmoid(-lhs / eta)
 
 
diff --git a/botorch/utils/safe_math.py b/botorch/utils/safe_math.py
index 83a2155a68..a0c79020cc 100644
--- a/botorch/utils/safe_math.py
+++ b/botorch/utils/safe_math.py
@@ -20,10 +20,13 @@
 from typing import Tuple, Union
 
 import torch
+from botorch.exceptions import UnsupportedError
 from botorch.utils.constants import get_constants_like
 from torch import finfo, Tensor
+from torch.nn.functional import softplus
 
 _log2 = math.log(2)
+_inv_sqrt_3 = math.sqrt(1 / 3)
 
 
 # Unary ops
@@ -76,6 +79,23 @@ def log1mexp(x: Tensor) -> Tensor:
     )
 
 
+def log1pexp(x: Tensor) -> Tensor:
+    """Numerically accurate evaluation of log(1 + exp(x)).
+    See [Maechler2012accurate]_ for details.
+    """
+    mask = x <= 18
+    return torch.where(
+        mask,
+        (lambda z: z.exp().log1p())(x.masked_fill(~mask, 0)),
+        (lambda z: z + (-z).exp())(x.masked_fill(mask, 0)),
+    )
+
+
+def logexpit(X: Tensor) -> Tensor:
+    """Computes the logarithm of the expit (a.k.a. sigmoid) function."""
+    return -log1pexp(-X)
+
+
 def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor:
     """Computes log(b - a) accurately given log(a) and log(b).
     Assumes, log_b > log_a, i.e. b > a > 0.
@@ -93,7 +113,7 @@ def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor:
 def logmeanexp(
     X: Tensor, dim: Union[int, Tuple[int, ...]], keepdim: bool = False
 ) -> Tensor:
-    """Computes log(mean(exp(X), dim=dim, keepdim=keepdim)).
+    """Computes `log(mean(exp(X), dim=dim, keepdim=keepdim))`.
 
     Args:
         X: Values of which to compute the logmeanexp.
@@ -101,7 +121,139 @@ def logmeanexp(
         keepdim: If True, keeps the reduced dimensions.
 
     Returns:
-        A Tensor of values corresponding to log(mean(exp(X), dim=dim)).
+        A Tensor of values corresponding to `log(mean(exp(X), dim=dim))`.
     """
     n = X.shape[dim] if isinstance(dim, int) else math.prod(X.shape[i] for i in dim)
     return torch.logsumexp(X, dim=dim, keepdim=keepdim) - math.log(n)
+
+
+def log_softplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the softplus function with high numerical accuracy.
+
+    Args:
+        x: Input tensor, should have single or double precision floats.
+        tau: Decreasing tau increases the tightness of the
+            approximation to ReLU. Non-negative and defaults to 1.0.
+
+    Returns:
+        Tensor corresponding to `log(softplus(x))`.
+    """
+    check_dtype_float32_or_float64(x)
+    tau = torch.as_tensor(tau, dtype=x.dtype, device=x.device)
+    # cutoff chosen to achieve accuracy to machine epsilon
+    upper = 16 if x.dtype == torch.float32 else 32
+    lower = -15 if x.dtype == torch.float32 else -35
+    mask = x / tau > lower
+    return torch.where(
+        mask,
+        softplus(x.masked_fill(~mask, lower), beta=(1 / tau), threshold=upper).log(),
+        x / tau + tau.log(),
+    )
+
+
+def smooth_amax(X: Tensor, tau: Union[float, Tensor] = 1e-3, dim: int = -1) -> Tensor:
+    """Computes a smooth approximation to `max(X, dim=dim)`, i.e the maximum value of
+    `X` over dimension `dim`, using the logarithm of the `l_(1/tau)` norm of `exp(X)`.
+    Note that when `X = log(U)` is the *logarithm* of an acquisition utility `U`,
+
+    `logsumexp(log(U) / tau) * tau = log(sum(U^(1/tau))^tau) = log(norm(U, ord=(1/tau))`
+
+    Args:
+        X: A Tensor from which to compute the smoothed amax.
+        tau: Temperature parameter controlling the smooth approximation
+            to max operator, becomes tighter as tau goes to 0. Needs to be positive.
+
+    Returns:
+        A Tensor of smooth approximations to `max(X, dim=dim)`.
+    """
+    # consider normalizing by log_n = math.log(X.shape[dim]) to reduce error
+    return torch.logsumexp(X / tau, dim=dim) * tau  # ~ X.amax(dim=dim)
+
+
+def check_dtype_float32_or_float64(X: Tensor) -> None:
+    if X.dtype != torch.float32 and X.dtype != torch.float64:
+        raise UnsupportedError(
+            f"Only dtypes float32 and float64 are supported, but received {X.dtype}."
+        )
+
+
+def log_fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the fat-tailed softplus.
+
+    NOTE: Separated out in case the complexity of the `log` implementation increases
+    in the future.
+    """
+    return fatplus(x, tau=tau).log()
+
+
+def fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a fat-tailed approximation to `ReLU(x) = max(x, 0)` by linearly
+    combining a regular softplus function and the density function of a Cauchy
+    distribution. The coefficient `alpha` of the Cauchy density is chosen to guarantee
+    monotonicity and convexity.
+
+    Args:
+        x: A Tensor on whose values to compute the smoothed function.
+
+    Returns:
+        A Tensor of values of the fat-tailed softplus.
+    """
+
+    def _fatplus(x: Tensor) -> Tensor:
+        alpha = 1e-1  # guarantees monotonicity and convexity (TODO: ref + Lemma 4)
+        return softplus(x) + alpha * cauchy(x)
+
+    return tau * _fatplus(x / tau)
+
+
+def fatmax(X: Tensor, dim: int, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a smooth approximation to amax(X, dim=dim) with a fat tail.
+
+    Args:
+        X: A Tensor from which to compute the smoothed amax.
+        tau: Temperature parameter controlling the smooth approximation
+            to max operator, becomes tighter as tau goes to 0. Needs to be positive.
+        standardize: Toggles the temperature standardization of the smoothed function.
+
+    Returns:
+        A Tensor of smooth approximations to `max(X, dim=dim)` with a fat tail.
+    """
+    if X.shape[dim] == 1:
+        return X.squeeze(dim)
+
+    M = X.amax(dim=dim, keepdim=True)
+    Y = (X - M) / tau  # NOTE: this would cause NaNs when X has Infs.
+    M = M.squeeze(dim)
+    return M + tau * cauchy(Y).sum(dim=dim).log()  # could change to mean
+
+
+def log_fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the fatmoid. Separated out in case the implementation
+    of the logarithm becomes more complex in the future to ensure numerical stability.
+    """
+    return fatmoid(X, tau=tau).log()
+
+
+def fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a twice continuously differentiable approximation to the Heaviside
+    step function with a fat tail, i.e. `O(1 / x^2)` as `x` goes to -inf.
+
+    Args:
+        X: A Tensor from which to compute the smoothed step function.
+        tau: Temperature parameter controlling the smoothness of the approximation.
+
+    Returns:
+        A tensor of fat-tailed approximations to the Heaviside step function.
+    """
+    X = X / tau
+    m = _inv_sqrt_3  # this defines the inflection point
+    return torch.where(
+        X < 0,
+        2 / 3 * cauchy(X - m),
+        1 - 2 / 3 * cauchy(X + m),
+    )
+
+
+def cauchy(x: Tensor) -> Tensor:
+    """Computes a Lorentzian, i.e. an un-normalized Cauchy density function."""
+    return 1 / (1 + x.square())
diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst
index 3aae7e277b..c46c906a89 100644
--- a/sphinx/source/acquisition.rst
+++ b/sphinx/source/acquisition.rst
@@ -60,6 +60,9 @@ Monte-Carlo Acquisition Functions
     :members:
     :exclude-members: MCAcquisitionFunction
 
+.. automodule:: botorch.acquisition.logei
+    :members:
+
 Multi-Objective Analytic Acquisition Functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.multi_objective.analytic
diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py
new file mode 100644
index 0000000000..672d7acbac
--- /dev/null
+++ b/test/acquisition/test_logei.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from unittest import mock
+
+import torch
+from botorch import settings
+from botorch.acquisition import (
+    LogImprovementMCAcquisitionFunction,
+    qLogExpectedImprovement,
+)
+from botorch.acquisition.input_constructors import ACQF_INPUT_CONSTRUCTOR_REGISTRY
+from botorch.acquisition.monte_carlo import qExpectedImprovement
+from botorch.acquisition.objective import (
+    ConstrainedMCObjective,
+    IdentityMCObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions import BotorchWarning, UnsupportedError
+from botorch.exceptions.errors import BotorchError
+from botorch.sampling.normal import IIDNormalSampler, SobolQMCNormalSampler
+from botorch.utils.testing import BotorchTestCase, MockModel, MockPosterior
+from torch import Tensor
+
+
+def infeasible_con(samples: Tensor) -> Tensor:
+    return torch.ones_like(samples[..., 0])
+
+
+def feasible_con(samples: Tensor) -> Tensor:
+    return -torch.ones_like(samples[..., 0])
+
+
+class DummyLogImprovementAcquisitionFunction(LogImprovementMCAcquisitionFunction):
+    def _sample_forward(self, X):
+        pass
+
+
+class DummyNonScalarizingPosteriorTransform(PosteriorTransform):
+    scalarize = False
+
+    def evaluate(self, Y):
+        pass  # pragma: no cover
+
+    def forward(self, posterior):
+        pass  # pragma: no cover
+
+
+class TestLogImprovementAcquisitionFunction(BotorchTestCase):
+    def test_abstract_raises(self):
+        with self.assertRaises(TypeError):
+            LogImprovementMCAcquisitionFunction()
+        # raise if model is multi-output, but no outcome transform or objective
+        # are given
+        no = "botorch.utils.testing.MockModel.num_outputs"
+        with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs:
+            mock_num_outputs.return_value = 2
+            mm = MockModel(MockPosterior())
+            with self.assertRaises(UnsupportedError):
+                DummyLogImprovementAcquisitionFunction(model=mm)
+        # raise if model is multi-output, but outcome transform does not
+        # scalarize and no objetive is given
+        with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs:
+            mock_num_outputs.return_value = 2
+            mm = MockModel(MockPosterior())
+            ptf = DummyNonScalarizingPosteriorTransform()
+            with self.assertRaises(UnsupportedError):
+                DummyLogImprovementAcquisitionFunction(
+                    model=mm, posterior_transform=ptf
+                )
+
+        mm = MockModel(MockPosterior())
+        objective = ConstrainedMCObjective(
+            IdentityMCObjective(),
+            constraints=[lambda samples: torch.zeros_like(samples[..., 0])],
+        )
+        with self.assertRaisesRegex(
+            BotorchError,
+            "Log-Improvement should not be used with `ConstrainedMCObjective`.",
+        ):
+            DummyLogImprovementAcquisitionFunction(model=mm, objective=objective)
+
+
+class TestQLogExpectedImprovement(BotorchTestCase):
+    def test_q_log_expected_improvement(self):
+        self.assertIn(qLogExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys())
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            # the event shape is `b x q x t` = 1 x 1 x 1
+            samples = torch.zeros(1, 1, 1, **tkwargs)
+            mm = MockModel(MockPosterior(samples=samples))
+            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
+            X = torch.zeros(1, 1, **tkwargs)
+
+            # basic test
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            self.assertFalse(acqf._fatten)  # different default behavior
+            self.assertTrue(log_acqf._fatten)
+            # test initialization
+            for k in ["objective", "sampler"]:
+                self.assertIn(k, acqf._modules)
+                self.assertIn(k, log_acqf._modules)
+
+            res = acqf(X).item()
+            self.assertEqual(res, 0.0)
+            exp_log_res = log_acqf(X).exp().item()
+            # Due to the smooth approximation, the value at zero should be close to, but
+            # not exactly zero, and upper-bounded by the tau hyperparameter.
+            self.assertTrue(0 < exp_log_res)
+            self.assertTrue(exp_log_res <= log_acqf.tau_relu)
+
+            # test shifting best_f value downward to see non-zero improvement
+            best_f = -1
+            acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            res, exp_log_res = acqf(X), log_acqf(X).exp()
+            expected_val = -best_f
+
+            self.assertEqual(res.dtype, dtype)
+            self.assertEqual(res.device.type, self.device.type)
+            self.assertEqual(res.item(), expected_val)
+            # Further away from zero, the value is numerically indistinguishable with
+            # single precision arithmetic.
+            self.assertTrue(expected_val <= exp_log_res.item())
+            self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu)
+
+            # test shifting best_f value upward to see advantage of LogEI
+            best_f = 1
+            acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            res, log_res = acqf(X), log_acqf(X)
+            exp_log_res = log_res.exp()
+            expected_val = 0
+            self.assertEqual(res.item(), expected_val)
+            self.assertTrue(expected_val <= exp_log_res.item())
+            self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu)
+            # However, the log value is large and negative with non-vanishing gradients
+            self.assertGreater(-1, log_res.item())
+            self.assertGreater(log_res.item(), -100)
+
+            # NOTE: The following tests are adapted from the qEI tests.
+            # basic test, no resample
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            res = acqf(X)
+            self.assertTrue(0 < res.exp().item())
+            self.assertTrue(res.exp().item() < acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            res = acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # basic test, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            res = acqf(X)
+            self.assertTrue(0 < res.exp().item())
+            self.assertTrue(res.exp().item() < acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # basic test for X_pending and warning
+            acqf.set_X_pending()
+            self.assertIsNone(acqf.X_pending)
+            acqf.set_X_pending(None)
+            self.assertIsNone(acqf.X_pending)
+            acqf.set_X_pending(X)
+            self.assertEqual(acqf.X_pending, X)
+            mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs)
+            res = acqf(X)
+            X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True)
+            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
+                acqf.set_X_pending(X2)
+                self.assertEqual(acqf.X_pending, X2)
+                self.assertEqual(
+                    sum(issubclass(w.category, BotorchWarning) for w in ws), 1
+                )
+
+            # testing with illegal taus
+            with self.assertRaisesRegex(ValueError, "tau_max is not a scalar:"):
+                qLogExpectedImprovement(
+                    model=mm, best_f=0, tau_max=torch.tensor([1, 2])
+                )
+            with self.assertRaisesRegex(ValueError, "tau_relu is non-positive:"):
+                qLogExpectedImprovement(model=mm, best_f=0, tau_relu=-2)
+
+    def test_q_log_expected_improvement_batch(self):
+        for dtype in (torch.float, torch.double):
+            # the event shape is `b x q x t` = 2 x 2 x 1
+            samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
+            samples[0, 0, 0] = 1.0
+            mm = MockModel(MockPosterior(samples=samples))
+
+            # X is a dummy and unused b/c of mocking
+            X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            # in the batch case, the values get adjusted toward
+            self.assertEqual(exp_log_res.dtype, dtype)
+            self.assertEqual(exp_log_res.device.type, self.device.type)
+            self.assertTrue(1.0 <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertAllClose(exp_log_res[0], torch.ones_like(exp_log_res[0]), )
+
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0 < exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu)
+
+            # test batch model, batched best_f values
+            sampler = IIDNormalSampler(sample_shape=torch.Size([3]))
+            acqf = qLogExpectedImprovement(
+                model=mm, best_f=torch.Tensor([0, 0]), sampler=sampler
+            )
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            self.assertTrue(1.0 <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0 < exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu)
+
+            # test shifting best_f value
+            acqf = qLogExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 2.0)
+            # TODO: figure out numerically stable tests and principled tolerances
+            # With q > 1, maximum value can get moved down due to L_q-norm approximation
+            # of the maximum over the q-batch.
+            safe_upper_lower_bound = 1.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 2.0 + acqf.tau_relu + acqf.tau_max)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 1.0)
+            self.assertTrue(1.0 <= exp_log_res[1].item())
+            # ocurring ~tau_max error when all candidates in a q-batch have the
+            # acquisition value
+            self.assertTrue(exp_log_res[1].item() <= 1.0 + acqf.tau_relu + acqf.tau_max)
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            # res = acqf(X)  # 1-dim batch
+            exp_log_res = acqf(X).exp()  # 1-dim batch
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+            exp_log_res = acqf(X.expand(2, 2, 1)).exp()  # 2-dim batch
+            # self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            # the base samples should have the batch dim collapsed
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X.expand(2, 2, 1))
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # test batch mode, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+    # # TODO: Test different objectives (incl. constraints)
diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py
index c7000b712f..f3ac65d564 100644
--- a/test/utils/test_objective.py
+++ b/test/utils/test_objective.py
@@ -10,6 +10,7 @@
 from botorch.utils.objective import (
     compute_feasibility_indicator,
     compute_smoothed_feasibility_indicator,
+    soft_eval_constraint,
 )
 from botorch.utils.testing import BotorchTestCase
 from torch import Tensor
@@ -65,7 +66,7 @@ def test_apply_constraints(self):
         # nonnegative objective, one constraint, eta = 0
         samples = torch.randn(1)
         obj = ones_f(samples)
-        with self.assertRaises(ValueError):
+        with self.assertRaisesRegex(ValueError, "eta must be positive."):
             apply_constraints(
                 obj=obj,
                 constraints=[zeros_f],
@@ -74,6 +75,11 @@ def test_apply_constraints(self):
                 eta=0.0,
             )
 
+        # soft_eval_constraint is not in the path of apply_constraints, adding this test
+        # for coverage.
+        with self.assertRaisesRegex(ValueError, "eta must be positive."):
+            soft_eval_constraint(lhs=obj, eta=0.0)
+
     def test_apply_constraints_multi_output(self):
         # nonnegative objective, one constraint
         tkwargs = {"device": self.device}
diff --git a/test/utils/test_safe_math.py b/test/utils/test_safe_math.py
index 3167928f71..32135f6f61 100644
--- a/test/utils/test_safe_math.py
+++ b/test/utils/test_safe_math.py
@@ -12,15 +12,40 @@
 from typing import Callable
 
 import torch
+from botorch.exceptions import UnsupportedError
 from botorch.utils import safe_math
 from botorch.utils.constants import get_constants_like
-from botorch.utils.safe_math import logmeanexp
+from botorch.utils.objective import compute_smoothed_feasibility_indicator
+from botorch.utils.safe_math import (
+    cauchy,
+    fatmax,
+    fatmoid,
+    fatplus,
+    log_fatmoid,
+    log_fatplus,
+    log_softplus,
+    logmeanexp,
+    smooth_amax,
+)
 from botorch.utils.testing import BotorchTestCase
 from torch import finfo, Tensor
+from torch.nn.functional import softplus
 
 INF = float("inf")
 
 
+def sum_constraint(samples: Tensor) -> Tensor:
+    """Represents the constraint `samples.sum(dim=-1) > 0`.
+
+    Args:
+        samples: A `b x q x m`-dim Tensor.
+
+    Returns:
+        A `b x q`-dim Tensor representing constraint feasibility.
+    """
+    return -samples.sum(dim=-1)
+
+
 class UnaryOpTestMixin:
     op: Callable[[Tensor], Tensor]
     safe_op: Callable[[Tensor], Tensor]
@@ -233,3 +258,143 @@ def test_log_mean_exp(self):
                 logmeanexp(X.log(), dim=(0, -1), keepdim=True).exp(),
                 X.mean(dim=(0, -1), keepdim=True),
             )
+
+
+class TestSmoothNonLinearities(BotorchTestCase):
+    def test_smooth_non_linearities(self):
+        for dtype in (torch.float, torch.double):
+            n = 17
+            X = torch.randn(n, dtype=dtype, device=self.device)
+            self.assertAllClose(cauchy(X), 1 / (X.square() + 1))
+
+            # testing softplus and fatplus
+            tau = 1e-2
+            X = torch.randn(n, dtype=dtype, device=self.device)
+            fatplus_X = fatplus(X, tau=tau)
+            self.assertAllClose(fatplus_X, X.clamp(0), atol=tau)
+            self.assertTrue((fatplus_X > 0).all())
+            self.assertAllClose(fatplus_X.log(), log_fatplus(X, tau=tau))
+            self.assertAllClose(
+                softplus(X, beta=1 / tau), log_softplus(X, tau=tau).exp()
+            )
+
+            # testing fatplus differentiability
+            X = torch.randn(n, dtype=dtype, device=self.device)
+            X.requires_grad = True
+            log_fatplus(X, tau=tau).sum().backward()
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            # always increasing, could also test convexity (mathematically guaranteed)
+            self.assertTrue((X.grad > 0).all())
+
+            X_soft = X.detach().clone()
+            X_soft.requires_grad = True
+            log_softplus(X_soft, tau=tau).sum().backward()
+
+            # for positive values away from zero, log_softplus and log_fatplus are close
+            is_positive = X > 100 * tau  # i.e. 1 for tau = 1e-2
+            self.assertAllClose(X.grad[is_positive], 1 / X[is_positive], atol=tau)
+            self.assertAllClose(X_soft.grad[is_positive], 1 / X[is_positive], atol=tau)
+
+            is_negative = X < -100 * tau  # i.e. -1
+            # the softplus has very large gradients, which can saturate the smooth
+            # approximation to the maximum over the q-batch.
+            asym_val = torch.full_like(X_soft.grad[is_negative], 1 / tau)
+            self.assertAllClose(X_soft.grad[is_negative], asym_val, atol=tau, rtol=tau)
+            # the fatplus on the other hand has smaller, though non-vanishing gradients.
+            self.assertTrue((X_soft.grad[is_negative] > X.grad[is_negative]).all())
+
+            # testing smoothmax and fatmax
+            d = 3
+            X = torch.randn(n, d, dtype=dtype, device=self.device)
+            fatmax_X = fatmax(X, dim=-1, tau=tau)
+            true_max = X.amax(dim=-1)
+            self.assertAllClose(fatmax_X, true_max, atol=tau)
+            self.assertAllClose(smooth_amax(X, dim=-1, tau=tau), true_max, atol=tau)
+
+            # special case for d = 1
+            d = 1
+            X = torch.randn(n, d, dtype=dtype, device=self.device)
+            fatmax_X = fatmax(X, dim=-1, tau=tau)
+            self.assertAllClose(fatmax_X, X[..., 0])
+
+            # testing fatmax differentiability
+            X = torch.randn(n, dtype=dtype, device=self.device)
+            X.requires_grad = True
+            fatmax(X, dim=-1, tau=tau).sum().backward()
+
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            self.assertTrue(X.grad.min() > 0)
+
+            # testing fatmoid
+            X = torch.randn(n, dtype=dtype, device=self.device)
+            fatmoid_X = fatmoid(X, tau=tau)
+            # output is in [0, 1]
+            self.assertTrue((fatmoid_X > 0).all())
+            self.assertTrue((fatmoid_X < 1).all())
+            # skew symmetry
+            atol = 1e-6 if dtype == torch.float32 else 1e-12
+            self.assertAllClose(1 - fatmoid_X, fatmoid(-X, tau=tau), atol=atol)
+            zero = torch.tensor(0.0, dtype=dtype, device=self.device)
+            half = torch.tensor(0.5, dtype=dtype, device=self.device)
+            self.assertAllClose(fatmoid(zero), half, atol=atol)
+
+            self.assertAllClose(fatmoid_X.log(), log_fatmoid(X, tau=tau))
+
+            is_center = X.abs() < 100 * tau
+            self.assertAllClose(
+                fatmoid_X[~is_center], (X[~is_center] > 0).to(fatmoid_X), atol=1e-3
+            )
+
+            # testing differentiability
+            X.requires_grad = True
+            log_fatmoid(X, tau=tau).sum().backward()
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            self.assertTrue((X.grad > 0).all())
+
+            # testing constraint indicator
+            constraints = [sum_constraint]
+            b = 3
+            q = 4
+            m = 5
+            samples = torch.randn(b, q, m)
+            eta = 1e-3
+            fatten = True
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fatten=fatten,
+            )
+            self.assertTrue(log_feas_vals.shape == torch.Size([b, q]))
+            expected_feas_vals = sum_constraint(samples) < 0
+            hard_feas_vals = log_feas_vals.exp() > 1 / 2
+            self.assertAllClose(hard_feas_vals, expected_feas_vals)
+
+            # with deterministic inputs:
+            samples = torch.ones(1, 1, m)  # sum is greater than 0
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fatten=fatten,
+            )
+            self.assertTrue((log_feas_vals.exp() > 1 / 2).item())
+
+            # with deterministic inputs:
+            samples = -torch.ones(1, 1, m)  # sum is smaller than 0
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fatten=fatten,
+            )
+            self.assertFalse((log_feas_vals.exp() > 1 / 2).item())
+
+        with self.assertRaisesRegex(UnsupportedError, "Only dtypes"):
+            log_softplus(torch.randn(2, dtype=torch.float16))