Skip to content

Commit

Permalink
Trust region with mixed search spaces (#821)
Browse files Browse the repository at this point in the history
* Add trust region with mixed search spaces

* Add unit tests for tr discrete and product (WIP)

* Fix string format

* Add remaining tr product unit tests

* Fix mock call_args for old python ver

* Add batch tr produce unit tests

* Update (minor) comments and docstrings

* Add more explanations

* Remove general dtype detection, not really useful

* Add integ test for TR mixed space

* Add notes about mixed spaces dtype

* Fix cast for discrete space

* Add turbo based integ tests, that shows arch issue

* Add input_active_dims mechanism

* Make select_in_region typing consistent

* Add a bit more explanation for active_dims

* Use new discrete steps to include 3 mins
  • Loading branch information
khurram-ghani authored Feb 23, 2024
1 parent eea50fd commit 9d9e71c
Show file tree
Hide file tree
Showing 6 changed files with 1,059 additions and 35 deletions.
164 changes: 164 additions & 0 deletions docs/notebooks/mixed_search_spaces.pct.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
# We create our mixed search space by instantiating this class with a list containing the discrete
# and continuous spaces, without any explicit tags (hence using default tags).
# This can be easily extended to more than two search spaces by adding more elements to the list.
#
# Note: the dtype of all the component search spaces must be the same.

# %%
from trieste.space import Box, DiscreteSearchSpace, TaggedProductSearchSpace
Expand Down Expand Up @@ -217,6 +219,168 @@
alpha=0.6,
)

# %% [markdown]
# ## Trust region with mixed search spaces
#
# In this section, we demonstrate the use of trust region acquisition rules with mixed search
# spaces. We use the same mixed search space and observer as before, and the same initial data.
# See [trust region Bayesian optimization notebook](trust_region.ipynb) for an introduction to
# trust region acquisition rules.
#
# First we build a Gaussian process model of the objective function using the initial data.

# %%
gpflow_model = build_gpr(
initial_data, mixed_search_space, likelihood_variance=1e-7
)
model = GaussianProcessRegression(gpflow_model)

# %% [markdown]
# We create a trust region acquisition rule that uses the efficient global optimization (EGO)
# acquisition rule as the base rule. The trust region acquisition rule is initialized with a set of
# trust regions; 5 in this example. Each trust regions is defined as a product of a discrete and a
# continuous trust sub-region. The base rule is then used to optimize the acquisition function
# within each trust region. This setup is similar to the one used in the "Batch trust region rule"
# section of the [trust region Bayesian optimization notebook](trust_region.ipynb).
#
# Analogous to a `TaggedProductSearchSpace`, each trust region is a product of a discrete and a
# continuous trust (sub-)region. The discrete sub-region is defined by
# `FixedPointTrustRegionDiscrete` that selects a random point from the discrete space, which is then
# fixed for the duration of the optimization. The continuous sub-region is defined by
# `SingleObjectiveTrustRegionBox`, just as in the trust region notebook, where the region is updated
# at each step of the optimization.

# %%
from trieste.acquisition import ParallelContinuousThompsonSampling
from trieste.acquisition.rule import (
BatchTrustRegionProduct,
EfficientGlobalOptimization,
FixedPointTrustRegionDiscrete,
SingleObjectiveTrustRegionBox,
UpdatableTrustRegionProduct,
)

num_query_points = 5
init_regions = [
UpdatableTrustRegionProduct(
[
FixedPointTrustRegionDiscrete(discrete_space),
SingleObjectiveTrustRegionBox(continuous_space),
]
)
for _ in range(num_query_points)
]
base_rule = EfficientGlobalOptimization( # type: ignore[var-annotated]
builder=ParallelContinuousThompsonSampling(),
num_query_points=num_query_points,
)
tr_acq_rule = BatchTrustRegionProduct(init_regions, base_rule)

# %% [markdown]
# We run the optimization loop for 15 steps using the trust region acquisition rule.

# %%
bo = BayesianOptimizer(observer, mixed_search_space)

num_steps = 15
tr_result = bo.optimize(
num_steps, initial_data, model, tr_acq_rule, track_state=True
)
dataset = tr_result.try_get_final_dataset()

# %% [markdown]
# The best point found by the optimizer is obtained as before.

# %%
query_point, observation, arg_min_idx = tr_result.try_get_optimal_point()

print(f"query point: {query_point}")
print(f"observation: {observation}")

# %% [markdown]
# Plot of the optimization loop over the mixed search space, similar to the previous plot.

# %%
query_points = dataset.query_points.numpy()
observations = dataset.observations.numpy()

_, ax = plot_function_2d(
scaled_branin,
ScaledBranin.search_space.lower,
ScaledBranin.search_space.upper,
contour=True,
)
plot_bo_points(query_points, ax[0, 0], num_initial_points, arg_min_idx)
ax[0, 0].set_xlabel(r"$x_1$")
ax[0, 0].set_ylabel(r"$x_2$")

for point in points:
ax[0, 0].vlines(
point,
mixed_search_space.lower[1],
mixed_search_space.upper[1],
colors="b",
linestyles="dashed",
alpha=0.6,
)

# %% [markdown]
# Finally, we visualize the optimization progress by plotting the 5 (product) trust regions at each
# step. The trust regions are shown as translucent boxes, with each box in a different color. The
# new query point for earch region is plotted in matching color.
#
# Note that since the discrete dimension is on the x-axis, the trust regions appear as vertical
# lines with zero width.

# %%
import base64
from typing import Optional

import IPython
import matplotlib.pyplot as plt

from trieste.bayesian_optimizer import OptimizationResult
from trieste.experimental.plotting import (
convert_figure_to_frame,
convert_frames_to_gif,
plot_trust_region_history_2d,
)


def plot_history(
result: OptimizationResult,
num_query_points: Optional[int] = None,
) -> None:
frames = []
for step, hist in enumerate(
result.history + [result.final_result.unwrap()]
):
fig, _ = plot_trust_region_history_2d(
scaled_branin,
ScaledBranin.search_space.lower,
ScaledBranin.search_space.upper,
hist,
num_query_points=num_query_points,
num_init=num_initial_points,
alpha=1.0,
)

if fig is not None:
fig.suptitle(f"step number {step}")
frames.append(convert_figure_to_frame(fig))
plt.close(fig)

gif_file = convert_frames_to_gif(frames)
gif = IPython.display.HTML(
'<img src="data:image/gif;base64,{0}"/>'.format(
base64.b64encode(gif_file.getvalue()).decode()
)
)
IPython.display.display(gif)


plot_history(tr_result)

# %% [markdown]
# ## LICENSE
#
Expand Down
81 changes: 72 additions & 9 deletions tests/integration/test_mixed_space_bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# limitations under the License.
from __future__ import annotations

from typing import cast

import numpy as np
import numpy.testing as npt
import pytest
import tensorflow as tf
Expand All @@ -22,8 +25,16 @@
AcquisitionFunctionClass,
BatchMonteCarloExpectedImprovement,
LocalPenalization,
ParallelContinuousThompsonSampling,
)
from trieste.acquisition.rule import (
AcquisitionRule,
BatchTrustRegionProduct,
EfficientGlobalOptimization,
FixedPointTrustRegionDiscrete,
SingleObjectiveTrustRegionBox,
UpdatableTrustRegionProduct,
)
from trieste.acquisition.rule import AcquisitionRule, EfficientGlobalOptimization
from trieste.bayesian_optimizer import BayesianOptimizer
from trieste.models import TrainableProbabilisticModel
from trieste.models.gpflow import GaussianProcessRegression, build_gpr
Expand All @@ -34,6 +45,34 @@
from trieste.types import TensorType


def _get_mixed_search_space() -> TaggedProductSearchSpace:
# The discrete space is defined by a set of 10 points that are equally spaced, ensuring that
# the three Branin minimizers (of dimension 0) are included in this set. The continuous
# dimension is defined by the interval [0, 1].
# We observe that the first and third minimizers are equidistant from the middle minimizer, so
# we choose the discretization points to be equally spaced around the middle minimizer.
minimizers0 = ScaledBranin.minimizers[:, 0]
step = (minimizers0[1] - minimizers0[0]) / 4
points = np.concatenate(
[
# Equally spaced points to the left of the middle minimizer. Skip the last point as it
# is the same as the first point in the next array.
np.flip(np.arange(minimizers0[1], 0.0, -step))[:-1],
# Equally spaced points to the right of the middle minimizer.
np.arange(minimizers0[1], 1.0, step),
]
)
discrete_space = DiscreteSearchSpace(points[:, None])
continuous_space = Box([0], [1])
return TaggedProductSearchSpace(
spaces=[discrete_space, continuous_space],
tags=["discrete", "continuous"],
)


mixed_search_space = _get_mixed_search_space()


@random_seed
@pytest.mark.parametrize(
"num_steps, acquisition_rule",
Expand All @@ -57,6 +96,35 @@
),
id="LocalPenalization",
),
pytest.param(
8,
BatchTrustRegionProduct(
[
UpdatableTrustRegionProduct(
[
FixedPointTrustRegionDiscrete(
cast(
DiscreteSearchSpace, mixed_search_space.get_subspace("discrete")
)
),
SingleObjectiveTrustRegionBox(
mixed_search_space.get_subspace("continuous")
),
],
tags=mixed_search_space.subspace_tags,
)
for _ in range(10)
],
EfficientGlobalOptimization(
ParallelContinuousThompsonSampling(),
# Use a large batch to ensure discrete init finds a good point.
# We are using a fixed point trust region for the discrete space, so
# the init point is randomly chosen and then never updated.
num_query_points=10,
),
),
id="TrustRegionSingleObjectiveFixed",
),
],
)
def test_optimizer_finds_minima_of_the_scaled_branin_function(
Expand All @@ -65,20 +133,15 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function(
TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel
],
) -> None:
search_space = TaggedProductSearchSpace(
spaces=[Box([0], [1]), DiscreteSearchSpace(tf.linspace(0, 1, 15)[:, None])],
tags=["continuous", "discrete"],
)

initial_query_points = search_space.sample(5)
initial_query_points = mixed_search_space.sample(5)
observer = mk_observer(ScaledBranin.objective)
initial_data = observer(initial_query_points)
model = GaussianProcessRegression(
build_gpr(initial_data, search_space, likelihood_variance=1e-8)
build_gpr(initial_data, mixed_search_space, likelihood_variance=1e-8)
)

dataset = (
BayesianOptimizer(observer, search_space)
BayesianOptimizer(observer, mixed_search_space)
.optimize(num_steps, initial_data, model, acquisition_rule)
.try_get_final_dataset()
)
Expand Down
Loading

0 comments on commit 9d9e71c

Please sign in to comment.