From 33a0e2bb042b18393355c6cb0b3418ba740080e6 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 1 Feb 2024 18:15:00 +0000
Subject: [PATCH 1/4] Add mixed search spaces notebook

---
 docs/notebooks/mixed_search_spaces.pct.py | 207 ++++++++++++++++++++++
 docs/tutorials.rst                        |   1 +
 2 files changed, 208 insertions(+)
 create mode 100644 docs/notebooks/mixed_search_spaces.pct.py

diff --git a/docs/notebooks/mixed_search_spaces.pct.py b/docs/notebooks/mixed_search_spaces.pct.py
new file mode 100644
index 0000000000..d67d2b3d71
--- /dev/null
+++ b/docs/notebooks/mixed_search_spaces.pct.py
@@ -0,0 +1,207 @@
+# ---
+# jupyter:
+#   jupytext:
+#     cell_metadata_filter: -all
+#     custom_cell_magics: kql
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.11.2
+#   kernelspec:
+#     display_name: .venv_310
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Mixed search spaces
+#
+# This notebook demonstrates the use of mixed search spaces in Trieste.
+# It shows an example of optimizing a function over a 2D search space that is a combination of a
+# discrete and a continuous search space. The problem is a modification of the Branin function
+# where one of the input dimensions is discretized.
+
+# %%
+import numpy as np
+import tensorflow as tf
+
+np.random.seed(1793)
+tf.random.set_seed(1793)
+
+# %% [markdown]
+# ## The problem
+#
+# The Branin function is a common benchmark function for optimization algorithms that has three
+# global minima. It is normally defined over a 2D continuous search space.
+#
+# We first show the Branin function over its original search space.
+
+# %%
+from trieste.experimental.plotting import plot_function_plotly
+from trieste.objectives import ScaledBranin
+
+scaled_branin = ScaledBranin.objective
+
+fig = plot_function_plotly(
+    scaled_branin,
+    ScaledBranin.search_space.lower,
+    ScaledBranin.search_space.upper,
+)
+fig.show()
+
+# %% [markdown]
+# In order to convert the Branin function from a continuous to a mixed search space problem, we
+# modify it by discretizing its first input dimension.
+#
+# The discrete dimension is defined by a set of 10 points that are equally spaced, ensuring that
+# that the three minima are included in the set. The continuous dimension is defined by the
+# interval [0, 1].
+#
+# We observe the first and third minima are equidistant from the middle minimizer, so we choose
+# the discretization points to be equally spaced around the middle minimizer.
+
+# %%
+from trieste.space import Box, DiscreteSearchSpace
+
+minimizers0 = ScaledBranin.minimizers[:, 0]
+step = (minimizers0[1] - minimizers0[0]) / 4
+points = np.concatenate(
+    [
+        # Equally spaced points to the left of the middle minimizer. Skip the last point as it is
+        # the same as the first point in the next array.
+        np.flip(np.arange(minimizers0[1], 0.0, -step))[:-1],
+        # Equally spaced points to the right of the middle minimizer.
+        np.arange(minimizers0[1], 1.0, step),
+    ]
+)
+discrete_space = DiscreteSearchSpace(points[:, None])
+continuous_space = Box([0.0], [1.0])
+mixed_search_space = discrete_space * continuous_space
+
+
+# %% [markdown]
+# Next we demonstrate the Branin function over the mixed search space, by plotting the original
+# function contours and highlighting the discretization points.
+# The discrete dimension is on the x-axis and the continuous dimension is on the y-axis, with
+# the vertical dashed lines indicating the discretization points.
+
+# %%
+from trieste.experimental.plotting import plot_function_2d
+
+# Plot over the predefined search space.
+fig, ax = plot_function_2d(
+    scaled_branin,
+    ScaledBranin.search_space.lower,
+    ScaledBranin.search_space.upper,
+    contour=True,
+)
+
+ax[0, 0].set_xlabel(r"$x_1$")
+ax[0, 0].set_ylabel(r"$x_2$")
+
+# Draw vertical lines at the discrete points
+for point in points:
+    ax[0, 0].vlines(
+        point,
+        mixed_search_space.lower[1],
+        mixed_search_space.upper[1],
+        colors="b",
+        linestyles="dashed",
+        alpha=0.6,
+    )
+
+# %% [markdown]
+# ## Sample the observer over the search space
+#
+# We begin our optimization by first collecting five function evaluations from random locations in
+# the mixed search space.
+
+# %%
+from trieste.objectives import mk_observer
+
+observer = mk_observer(scaled_branin)
+
+num_initial_points = 5
+initial_query_points = mixed_search_space.sample(num_initial_points)
+initial_data = observer(initial_query_points)
+
+# %% [markdown]
+# ## Model the objective function
+#
+# We now build a Gaussian process model of the objective function using the initial data, similar
+# to the [introduction notebook](expected_improvement.ipynb).
+
+# %%
+from trieste.models.gpflow import GaussianProcessRegression, build_gpr
+
+gpflow_model = build_gpr(
+    initial_data, mixed_search_space, likelihood_variance=1e-7
+)
+model = GaussianProcessRegression(gpflow_model)
+
+# %% [markdown]
+# ## Run the optimization loop
+#
+# The Bayesian optimization loop is run for 15 steps over the mixed search space.
+# For each step, the optimizer fixes the discrete dimension to the best points found from a random
+# initial search, and then optimizes the continuous dimension using a gradient-based optimizer.
+
+# %%
+from trieste.bayesian_optimizer import BayesianOptimizer
+
+bo = BayesianOptimizer(observer, mixed_search_space)
+
+num_steps = 15
+result = bo.optimize(num_steps, initial_data, model)
+dataset = result.try_get_final_dataset()
+
+# %% [markdown]
+# ## Explore the results
+#
+# We can now get the best point found by the optimizer. Note this isn't necessarily the point that
+# was last evaluated.
+
+# %%
+query_point, observation, arg_min_idx = result.try_get_optimal_point()
+
+print(f"query point: {query_point}")
+print(f"observation: {observation}")
+
+# %% [markdown]
+# The plot below shows how the optimizer explored the mixed search space over the course of the
+# optimization loop. The blue 'x' markers indicate the initial points, the blue circles indicate
+# the points that were evaluated during the optimization loop, and the purple circle indicates the
+# optimal point found by the optimizer.
+
+# %%
+from trieste.experimental.plotting import plot_bo_points
+
+query_points = dataset.query_points.numpy()
+observations = dataset.observations.numpy()
+
+_, ax = plot_function_2d(
+    scaled_branin,
+    ScaledBranin.search_space.lower,
+    ScaledBranin.search_space.upper,
+    contour=True,
+)
+plot_bo_points(query_points, ax[0, 0], num_initial_points, arg_min_idx)
+ax[0, 0].set_xlabel(r"$x_1$")
+ax[0, 0].set_ylabel(r"$x_2$")
+
+# Draw vertical lines at the discrete points
+for point in points:
+    ax[0, 0].vlines(
+        point,
+        mixed_search_space.lower[1],
+        mixed_search_space.upper[1],
+        colors="b",
+        linestyles="dashed",
+        alpha=0.6,
+    )
+
+# %% [markdown]
+# ## LICENSE
+#
+# [Apache License 2.0](https://github.com/secondmind-labs/trieste/blob/develop/LICENSE)
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
index 749a2eca19..59aaa2324f 100644
--- a/docs/tutorials.rst
+++ b/docs/tutorials.rst
@@ -41,6 +41,7 @@ The following tutorials illustrate solving different types of optimization probl
    notebooks/multifidelity_modelling
    notebooks/rembo
    notebooks/trust_region
+   notebooks/mixed_search_spaces
 
 Frequently asked questions
 --------------------------

From 3635756d3ff8f2cda6fa66ab53309302fb49c88d Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 2 Feb 2024 14:34:54 +0000
Subject: [PATCH 2/4] Improve markdown text

---
 docs/notebooks/mixed_search_spaces.pct.py | 34 +++++++++++------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/docs/notebooks/mixed_search_spaces.pct.py b/docs/notebooks/mixed_search_spaces.pct.py
index d67d2b3d71..56ca5f6142 100644
--- a/docs/notebooks/mixed_search_spaces.pct.py
+++ b/docs/notebooks/mixed_search_spaces.pct.py
@@ -17,8 +17,9 @@
 # %% [markdown]
 # # Mixed search spaces
 #
-# This notebook demonstrates the use of mixed search spaces in Trieste.
-# It shows an example of optimizing a function over a 2D search space that is a combination of a
+# This notebook demonstrates optimization of mixed search spaces in Trieste.
+#
+# The example function is defined over 2D input space that is a combination of a
 # discrete and a continuous search space. The problem is a modification of the Branin function
 # where one of the input dimensions is discretized.
 
@@ -32,10 +33,10 @@
 # %% [markdown]
 # ## The problem
 #
-# The Branin function is a common benchmark function for optimization algorithms that has three
-# global minima. It is normally defined over a 2D continuous search space.
+# The Branin function is a common optimization benchmark that has three global minima. It is normally
+# defined over a 2D continuous search space.
 #
-# We first show the Branin function over its original search space.
+# We first show the Branin function over its original continuous search space.
 
 # %%
 from trieste.experimental.plotting import plot_function_plotly
@@ -55,11 +56,11 @@
 # modify it by discretizing its first input dimension.
 #
 # The discrete dimension is defined by a set of 10 points that are equally spaced, ensuring that
-# that the three minima are included in the set. The continuous dimension is defined by the
+# the three minimizers are included in this set. The continuous dimension is defined by the
 # interval [0, 1].
 #
-# We observe the first and third minima are equidistant from the middle minimizer, so we choose
-# the discretization points to be equally spaced around the middle minimizer.
+# We observe that the first and third minimizers are equidistant from the middle minimizer, so we
+# choose the discretization points to be equally spaced around the middle minimizer.
 
 # %%
 from trieste.space import Box, DiscreteSearchSpace
@@ -83,7 +84,7 @@
 # %% [markdown]
 # Next we demonstrate the Branin function over the mixed search space, by plotting the original
 # function contours and highlighting the discretization points.
-# The discrete dimension is on the x-axis and the continuous dimension is on the y-axis, with
+# The discrete dimension is along the x-axis and the continuous dimension is on the y-axis, with
 # the vertical dashed lines indicating the discretization points.
 
 # %%
@@ -100,7 +101,7 @@
 ax[0, 0].set_xlabel(r"$x_1$")
 ax[0, 0].set_ylabel(r"$x_2$")
 
-# Draw vertical lines at the discrete points
+# Draw vertical lines at the discrete points.
 for point in points:
     ax[0, 0].vlines(
         point,
@@ -145,7 +146,7 @@
 #
 # The Bayesian optimization loop is run for 15 steps over the mixed search space.
 # For each step, the optimizer fixes the discrete dimension to the best points found from a random
-# initial search, and then optimizes the continuous dimension using a gradient-based optimizer.
+# initial search, and then optimizes the continuous dimension using a gradient-based method.
 
 # %%
 from trieste.bayesian_optimizer import BayesianOptimizer
@@ -159,8 +160,8 @@
 # %% [markdown]
 # ## Explore the results
 #
-# We can now get the best point found by the optimizer. Note this isn't necessarily the point that
-# was last evaluated.
+# We can now get the best point found by the optimizer. Note that this isn't necessarily the last
+# evaluated point.
 
 # %%
 query_point, observation, arg_min_idx = result.try_get_optimal_point()
@@ -169,9 +170,9 @@
 print(f"observation: {observation}")
 
 # %% [markdown]
-# The plot below shows how the optimizer explored the mixed search space over the course of the
-# optimization loop. The blue 'x' markers indicate the initial points, the blue circles indicate
-# the points that were evaluated during the optimization loop, and the purple circle indicates the
+# The plot below highlights how the optimizer explored the mixed search space over the course of the
+# optimization loop. The green 'x' markers indicate the initial points, the green circles mark
+# the points evaluated during the optimization loop, and the purple circle indicates the
 # optimal point found by the optimizer.
 
 # %%
@@ -190,7 +191,6 @@
 ax[0, 0].set_xlabel(r"$x_1$")
 ax[0, 0].set_ylabel(r"$x_2$")
 
-# Draw vertical lines at the discrete points
 for point in points:
     ax[0, 0].vlines(
         point,

From da82c3112a2777e0cf7f3a5c51f1fcf647b259fc Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Mon, 5 Feb 2024 16:51:09 +0000
Subject: [PATCH 3/4] Used tagged-product space and add quickrun

---
 docs/notebooks/mixed_search_spaces.pct.py        | 6 ++++--
 docs/notebooks/quickrun/mixed_search_spaces.yaml | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 docs/notebooks/quickrun/mixed_search_spaces.yaml

diff --git a/docs/notebooks/mixed_search_spaces.pct.py b/docs/notebooks/mixed_search_spaces.pct.py
index 56ca5f6142..3fd93ec928 100644
--- a/docs/notebooks/mixed_search_spaces.pct.py
+++ b/docs/notebooks/mixed_search_spaces.pct.py
@@ -63,7 +63,7 @@
 # choose the discretization points to be equally spaced around the middle minimizer.
 
 # %%
-from trieste.space import Box, DiscreteSearchSpace
+from trieste.space import Box, DiscreteSearchSpace, TaggedProductSearchSpace
 
 minimizers0 = ScaledBranin.minimizers[:, 0]
 step = (minimizers0[1] - minimizers0[0]) / 4
@@ -78,7 +78,9 @@
 )
 discrete_space = DiscreteSearchSpace(points[:, None])
 continuous_space = Box([0.0], [1.0])
-mixed_search_space = discrete_space * continuous_space
+mixed_search_space = TaggedProductSearchSpace(
+    [discrete_space, continuous_space]
+)
 
 
 # %% [markdown]
diff --git a/docs/notebooks/quickrun/mixed_search_spaces.yaml b/docs/notebooks/quickrun/mixed_search_spaces.yaml
new file mode 100644
index 0000000000..dff966f5c3
--- /dev/null
+++ b/docs/notebooks/quickrun/mixed_search_spaces.yaml
@@ -0,0 +1,2 @@
+replace:
+  - { from: "num_steps = \\d+", to: "num_steps = 2" }

From fda2347b9c637df2de0e064a75eeffcc81d2bba3 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 7 Feb 2024 13:31:31 +0000
Subject: [PATCH 4/4] Add more notebook explanations

---
 docs/notebooks/mixed_search_spaces.pct.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/docs/notebooks/mixed_search_spaces.pct.py b/docs/notebooks/mixed_search_spaces.pct.py
index 3fd93ec928..60127116dc 100644
--- a/docs/notebooks/mixed_search_spaces.pct.py
+++ b/docs/notebooks/mixed_search_spaces.pct.py
@@ -61,6 +61,12 @@
 #
 # We observe that the first and third minimizers are equidistant from the middle minimizer, so we
 # choose the discretization points to be equally spaced around the middle minimizer.
+#
+# The `TaggedProductSearchSpace` class is a convenient way to define a search space
+# that is a combination of multiple search spaces, each with an optional tag.
+# We create our mixed search space by instantiating this class with a list containing the discrete
+# and continuous spaces, without any explicit tags (hence using default tags).
+# This can be easily extended to more than two search spaces by adding more elements to the list.
 
 # %%
 from trieste.space import Box, DiscreteSearchSpace, TaggedProductSearchSpace
@@ -118,7 +124,9 @@
 # ## Sample the observer over the search space
 #
 # We begin our optimization by first collecting five function evaluations from random locations in
-# the mixed search space.
+# the mixed search space. Samples from the discrete dimension are drawn uniformly at random with
+# replacement, and samples from the continuous dimension are drawn from a uniform distribution.
+# Observe that the `sample` method deals with the mixed search space automatically.
 
 # %%
 from trieste.objectives import mk_observer
@@ -134,6 +142,11 @@
 #
 # We now build a Gaussian process model of the objective function using the initial data, similar
 # to the [introduction notebook](expected_improvement.ipynb).
+#
+# Since all of the data in this example is quantitative, the model does not differentiate between
+# the discrete and continuous dimensions of the search space. The Gaussian process regression model
+# treats all dimensions as continuous variables, allowing for a seamless integration of both types
+# of dimensions in the optimization process.
 
 # %%
 from trieste.models.gpflow import GaussianProcessRegression, build_gpr
@@ -149,6 +162,7 @@
 # The Bayesian optimization loop is run for 15 steps over the mixed search space.
 # For each step, the optimizer fixes the discrete dimension to the best points found from a random
 # initial search, and then optimizes the continuous dimension using a gradient-based method.
+# This dispatch of discrete and continuous optimization is handled by the optimizer automatically.
 
 # %%
 from trieste.bayesian_optimizer import BayesianOptimizer