Skip to content

Commit

Permalink
Merge branch 'main' into fix_mo_posterior
Browse files Browse the repository at this point in the history
  • Loading branch information
SaiAakash authored Oct 24, 2024
2 parents 4f2af4f + ccf278a commit 4a1de1b
Show file tree
Hide file tree
Showing 14 changed files with 443 additions and 136 deletions.
8 changes: 5 additions & 3 deletions botorch/acquisition/input_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
LearnedObjective,
MCAcquisitionObjective,
PosteriorTransform,
ScalarizedPosteriorTransform,
)
from botorch.acquisition.preference import (
AnalyticExpectedUtilityOfBestOption,
Expand Down Expand Up @@ -1800,8 +1801,8 @@ def construct_inputs_qJES(
model: Model,
bounds: list[tuple[float, float]],
num_optima: int = 64,
maximize: bool = True,
condition_noiseless: bool = True,
posterior_transform: ScalarizedPosteriorTransform | None = None,
X_pending: Tensor | None = None,
estimation_type: str = "LB",
num_samples: int = 64,
Expand All @@ -1811,15 +1812,16 @@ def construct_inputs_qJES(
model=model,
bounds=torch.as_tensor(bounds, dtype=dtype).T,
num_optima=num_optima,
maximize=maximize,
posterior_transform=posterior_transform,
return_transformed=True,
)

inputs = {
"model": model,
"optimal_inputs": optimal_inputs,
"optimal_outputs": optimal_outputs,
"condition_noiseless": condition_noiseless,
"maximize": maximize,
"posterior_transform": posterior_transform,
"X_pending": X_pending,
"estimation_type": estimation_type,
"num_samples": num_samples,
Expand Down
48 changes: 26 additions & 22 deletions botorch/acquisition/joint_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def __init__(
posterior_transform: PosteriorTransform | None = None,
X_pending: Tensor | None = None,
estimation_type: str = "LB",
maximize: bool = True,
num_samples: int = 64,
) -> None:
r"""Joint entropy search acquisition function.
Expand All @@ -91,11 +90,11 @@ def __init__(
[Tu2022joint]_. These are sampled identically, so this only controls
the fashion in which the GP is reshaped as a result of conditioning
on the optimum.
posterior_transform: PosteriorTransform to negate or scalarize the output.
estimation_type: estimation_type: A string to determine which entropy
estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC").
Lower Bound is recommended due to the relatively high variance
of the MC estimator.
maximize: If true, we consider a maximization problem.
X_pending: A `m x d`-dim Tensor of `m` design points that have been
submitted for function evaluation, but have not yet been evaluated.
num_samples: The number of Monte Carlo samples used for the Monte Carlo
Expand All @@ -112,16 +111,13 @@ def __init__(
# and three-dimensional otherwise.
self.optimal_inputs = optimal_inputs.unsqueeze(-2)
self.optimal_outputs = optimal_outputs.unsqueeze(-2)
self.optimal_output_values = (
posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
if posterior_transform
else self.optimal_outputs
)
self.posterior_transform = posterior_transform
self.maximize = maximize

# The optima (can be maxima, can be minima) come in as the largest
# values if we optimize, or the smallest (likely substantially negative)
# if we minimize. Inside the acquisition function, however, we always
# want to consider MAX-values. As such, we need to flip them if
# we want to minimize.
if not self.maximize:
optimal_outputs = -optimal_outputs

self.num_samples = optimal_inputs.shape[0]
self.condition_noiseless = condition_noiseless
self.initial_model = model
Expand Down Expand Up @@ -203,7 +199,9 @@ def _compute_lower_bound_information_gain(
A `batch_shape`-dim Tensor of acquisition values at the given design
points `X`.
"""
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
initial_posterior = self.initial_model.posterior(
X, observation_noise=True, posterior_transform=self.posterior_transform
)
# need to check if there is a two-dimensional batch shape -
# the sampled optima appear in the dimension right after
batch_shape = X.shape[:-2]
Expand All @@ -221,15 +219,17 @@ def _compute_lower_bound_information_gain(

# Compute the mixture mean and variance
posterior_m = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)
noiseless_var = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=False
X.unsqueeze(MCMC_DIM),
observation_noise=False,
posterior_transform=self.posterior_transform,
).variance

mean_m = posterior_m.mean
if not self.maximize:
mean_m = -mean_m
variance_m = posterior_m.variance

check_no_nans(variance_m)
Expand All @@ -240,7 +240,7 @@ def _compute_lower_bound_information_gain(
torch.zeros(1, device=X.device, dtype=X.dtype),
torch.ones(1, device=X.device, dtype=X.dtype),
)
normalized_mvs = (self.optimal_outputs - mean_m) / stdv
normalized_mvs = (self.optimal_output_values - mean_m) / stdv
cdf_mvs = normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
pdf_mvs = torch.exp(normal.log_prob(normalized_mvs))

Expand Down Expand Up @@ -294,7 +294,9 @@ def _compute_monte_carlo_information_gain(
A `batch_shape`-dim Tensor of acquisition values at the given design
points `X`.
"""
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
initial_posterior = self.initial_model.posterior(
X, observation_noise=True, posterior_transform=self.posterior_transform
)

batch_shape = X.shape[:-2]
sample_dim = len(batch_shape)
Expand All @@ -311,15 +313,17 @@ def _compute_monte_carlo_information_gain(

# Compute the mixture mean and variance
posterior_m = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=True
X.unsqueeze(MCMC_DIM),
observation_noise=True,
posterior_transform=self.posterior_transform,
)
noiseless_var = self.conditional_model.posterior(
X.unsqueeze(MCMC_DIM), observation_noise=False
X.unsqueeze(MCMC_DIM),
observation_noise=False,
posterior_transform=self.posterior_transform,
).variance

mean_m = posterior_m.mean
if not self.maximize:
mean_m = -mean_m
variance_m = posterior_m.variance.clamp_min(CLAMP_LB)
conditional_samples, conditional_logprobs = self._compute_monte_carlo_variables(
posterior_m
Expand Down
56 changes: 42 additions & 14 deletions botorch/acquisition/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
IdentityMCObjective,
MCAcquisitionObjective,
PosteriorTransform,
ScalarizedPosteriorTransform,
)
from botorch.exceptions.errors import (
BotorchTensorDimensionError,
Expand All @@ -28,10 +29,11 @@
from botorch.models.model import Model
from botorch.sampling.base import MCSampler
from botorch.sampling.get_sampler import get_sampler
from botorch.sampling.pathwise import draw_matheron_paths
from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
from botorch.utils.objective import compute_feasibility_indicator
from botorch.utils.sampling import optimize_posterior_samples
from botorch.utils.transforms import is_ensemble, normalize_indices
from gpytorch.models import GP
from torch import Tensor


Expand Down Expand Up @@ -486,36 +488,62 @@ def project_to_sample_points(X: Tensor, sample_points: Tensor) -> Tensor:


def get_optimal_samples(
model: Model,
model: GP,
bounds: Tensor,
num_optima: int,
raw_samples: int = 1024,
num_restarts: int = 20,
maximize: bool = True,
posterior_transform: ScalarizedPosteriorTransform | None = None,
objective: MCAcquisitionObjective | None = None,
return_transformed: bool = False,
) -> tuple[Tensor, Tensor]:
"""Draws sample paths from the posterior and maximizes the samples using GD.
Args:
model (Model): The model from which samples are drawn.
bounds: (Tensor): Bounds of the search space. If the model inputs are
model: The model from which samples are drawn.
bounds: Bounds of the search space. If the model inputs are
normalized, the bounds should be normalized as well.
num_optima (int): The number of paths to be drawn and optimized.
raw_samples (int, optional): The number of candidates randomly sample.
num_optima: The number of paths to be drawn and optimized.
raw_samples: The number of candidates randomly sample.
Defaults to 1024.
num_restarts (int, optional): The number of candidates to do gradient-based
num_restarts: The number of candidates to do gradient-based
optimization on. Defaults to 20.
maximize: Whether to maximize or minimize the samples.
posterior_transform: A ScalarizedPosteriorTransform (may e.g. be used to
scalarize multi-output models or negate the objective).
objective: An MCAcquisitionObjective, used to negate the objective or otherwise
transform sample outputs. Cannot be combined with `posterior_transform`.
return_transformed: If True, return the transformed samples.
Returns:
Tuple[Tensor, Tensor]: The optimal input locations and corresponding
outputs, x* and f*.
The optimal input locations and corresponding outputs, x* and f*.
"""
paths = draw_matheron_paths(model, sample_shape=torch.Size([num_optima]))
if posterior_transform and not isinstance(
posterior_transform, ScalarizedPosteriorTransform
):
raise ValueError(
"Only the ScalarizedPosteriorTransform is supported for "
"get_optimal_samples."
)
if posterior_transform and objective:
raise ValueError(
"Only one of `posterior_transform` and `objective` can be specified."
)

if posterior_transform:
sample_transform = posterior_transform.evaluate
elif objective:
sample_transform = objective
else:
sample_transform = None

paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima]))
optimal_inputs, optimal_outputs = optimize_posterior_samples(
paths,
paths=paths,
bounds=bounds,
raw_samples=raw_samples,
num_restarts=num_restarts,
maximize=maximize,
sample_transform=sample_transform,
return_transformed=return_transformed,
)
return optimal_inputs, optimal_outputs
41 changes: 36 additions & 5 deletions botorch/optim/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,8 @@ def optimize_acqf_discrete(
choices: Tensor,
max_batch_size: int = 2048,
unique: bool = True,
X_avoid: Tensor | None = None,
inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
) -> tuple[Tensor, Tensor]:
r"""Optimize over a discrete set of points using batch evaluation.
Expand All @@ -1017,6 +1019,12 @@ def optimize_acqf_discrete(
a large training set.
unique: If True return unique choices, o/w choices may be repeated
(only relevant if `q > 1`).
X_avoid: An `n x d` tensor of candidates that we aren't allowed to pick.
These will be removed from the set of choices.
inequality constraints: A list of tuples (indices, coefficients, rhs),
with each tuple encoding an inequality constraint of the form
`\sum_i (X[indices[i]] * coefficients[i]) >= rhs`.
Infeasible points will be removed from the set of choices.
Returns:
A two-element tuple containing
Expand All @@ -1029,8 +1037,31 @@ def optimize_acqf_discrete(
"Discrete optimization is not supported for"
"one-shot acquisition functions."
)
if choices.numel() == 0:
raise InputDataError("`choices` must be non-emtpy.")
if X_avoid is not None and unique:
choices = _filter_invalid(X=choices, X_avoid=X_avoid)
if inequality_constraints is not None:
choices = _filter_infeasible(
X=choices, inequality_constraints=inequality_constraints
)
len_choices = len(choices)
if len_choices == 0:
message = "`choices` must be non-empty."
if X_avoid is not None or inequality_constraints is not None:
message += (
" No feasible points remain after removing `X_avoid` and "
"filtering out infeasible points."
)
raise InputDataError(message)
elif len_choices < q and unique:
warnings.warn(
(
f"Requested {q=} candidates from fully discrete search "
f"space, but only {len_choices} possible choices remain. "
),
OptimizationWarning,
stacklevel=2,
)
q = len_choices
choices_batched = choices.unsqueeze(-2)
if q > 1:
candidate_list, acq_value_list = [], []
Expand Down Expand Up @@ -1081,7 +1112,7 @@ def _generate_neighbors(
discrete_choices: list[Tensor],
X_avoid: Tensor,
inequality_constraints: list[tuple[Tensor, Tensor, float]],
):
) -> Tensor:
# generate all 1D perturbations
npts = sum([len(c) for c in discrete_choices])
X_loc = x.repeat(npts, 1)
Expand All @@ -1097,15 +1128,15 @@ def _generate_neighbors(

def _filter_infeasible(
X: Tensor, inequality_constraints: list[tuple[Tensor, Tensor, float]]
):
) -> Tensor:
"""Remove all points from `X` that don't satisfy the constraints."""
is_feasible = torch.ones(X.shape[0], dtype=torch.bool, device=X.device)
for inds, weights, bound in inequality_constraints:
is_feasible &= (X[..., inds] * weights).sum(dim=-1) >= bound
return X[is_feasible]


def _filter_invalid(X: Tensor, X_avoid: Tensor):
def _filter_invalid(X: Tensor, X_avoid: Tensor) -> Tensor:
"""Remove all occurences of `X_avoid` from `X`."""
return X[~(X == X_avoid.unsqueeze(-2)).all(dim=-1).any(dim=-2)]

Expand Down
Loading

0 comments on commit 4a1de1b

Please sign in to comment.