Categorical trust regions (#865)

secondmind-labs · Aug 27, 2024 · 2c725f9 · 2c725f9
1 parent f07f2ea
commit 2c725f9
Show file tree

Hide file tree

Showing 5 changed files with 197 additions and 75 deletions.
diff --git a/tests/integration/test_mixed_space_bayesian_optimization.py b/tests/integration/test_mixed_space_bayesian_optimization.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import dataclasses
 from typing import cast
 
 import numpy as np
@@ -47,6 +48,7 @@
     Box,
     CategoricalSearchSpace,
     DiscreteSearchSpace,
+    EncoderFunction,
     TaggedProductSearchSpace,
     one_hot_encoder,
 )
@@ -167,15 +169,32 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function(
         TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel
     ],
 ) -> None:
-    initial_query_points = mixed_search_space.sample(5)
-    observer = mk_observer(ScaledBranin.objective)
+    mixed_branin = cast(SingleObjectiveTestProblem[TaggedProductSearchSpace], ScaledBranin)
+    _test_optimizer_finds_problem_minima(
+        dataclasses.replace(mixed_branin, search_space=mixed_search_space),
+        num_steps,
+        acquisition_rule,
+    )
+
+
+def _test_optimizer_finds_problem_minima(
+    problem: SingleObjectiveTestProblem[TaggedProductSearchSpace],
+    num_steps: int,
+    acquisition_rule: AcquisitionRule[
+        TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel
+    ],
+    encoder: EncoderFunction | None = None,
+) -> None:
+    initial_query_points = problem.search_space.sample(5)
+    observer = mk_observer(problem.objective)
     initial_data = observer(initial_query_points)
     model = GaussianProcessRegression(
-        build_gpr(initial_data, mixed_search_space, likelihood_variance=1e-8)
+        build_gpr(initial_data, problem.search_space, likelihood_variance=1e-8),
+        encoder=encoder,
     )
 
     dataset = (
-        BayesianOptimizer(observer, mixed_search_space)
+        BayesianOptimizer(observer, problem.search_space)
         .optimize(num_steps, initial_data, model, acquisition_rule)
         .try_get_final_dataset()
     )
@@ -185,7 +204,7 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function(
     best_y = dataset.observations[arg_min_idx]
     best_x = dataset.query_points[arg_min_idx]
 
-    relative_minimizer_err = tf.abs((best_x - ScaledBranin.minimizers) / ScaledBranin.minimizers)
+    relative_minimizer_err = tf.abs((best_x - problem.minimizers) / problem.minimizers)
     # these accuracies are the current best for the given number of optimization steps, which makes
     # this is a regression test
     assert tf.reduce_any(tf.reduce_all(relative_minimizer_err < 0.1, axis=-1), axis=0)
@@ -210,7 +229,7 @@ def categorical_scaled_branin(
     continuous_space = Box([0], [1])
     search_space = TaggedProductSearchSpace(
         spaces=[categorical_space, continuous_space],
-        tags=["discrete", "continuous"],
+        tags=["categorical", "continuous"],
     )
 
     def objective(x: TensorType) -> TensorType:
@@ -234,11 +253,50 @@ def objective(x: TensorType) -> TensorType:
     )
 
 
+def _get_categorical_problem() -> SingleObjectiveTestProblem[TaggedProductSearchSpace]:
+    # a categorical scaled branin problem with 6 categories mapping to 3 random points
+    # plus the 3 minimizer points (to guarantee that the minimum is present)
+    points = tf.concat(
+        [tf.random.uniform([3], dtype=tf.float64), ScaledBranin.minimizers[..., 0]], 0
+    )
+    return categorical_scaled_branin(tf.random.shuffle(points))
+
+
+cat_problem = _get_categorical_problem()
+
+
 @random_seed
 @pytest.mark.parametrize(
     "num_steps, acquisition_rule",
     [
         pytest.param(25, EfficientGlobalOptimization(), id="EfficientGlobalOptimization"),
+        pytest.param(
+            8,
+            BatchTrustRegionProduct(
+                [
+                    UpdatableTrustRegionProduct(
+                        [
+                            SingleObjectiveTrustRegionDiscrete(
+                                cast(
+                                    CategoricalSearchSpace,
+                                    cat_problem.search_space.get_subspace("categorical"),
+                                )
+                            ),
+                            SingleObjectiveTrustRegionBox(
+                                cast(Box, cat_problem.search_space.get_subspace("continuous"))
+                            ),
+                        ],
+                        tags=cat_problem.search_space.subspace_tags,
+                    )
+                    for _ in range(3)
+                ],
+                EfficientGlobalOptimization(
+                    ParallelContinuousThompsonSampling(),
+                    num_query_points=3,
+                ),
+            ),
+            id="TrustRegionSingleObjective",
+        ),
     ],
 )
 def test_optimizer_finds_minima_of_the_categorical_scaled_branin_function(
@@ -247,35 +305,10 @@ def test_optimizer_finds_minima_of_the_categorical_scaled_branin_function(
         TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel
     ],
 ) -> None:
-    # 6 categories mapping to 3 random points plus the 3 minimizer points
-    points = tf.concat(
-        [tf.random.uniform([3], dtype=tf.float64), ScaledBranin.minimizers[..., 0]], 0
-    )
-    problem = categorical_scaled_branin(tf.random.shuffle(points))
-    initial_query_points = problem.search_space.sample(5)
-    observer = mk_observer(problem.objective)
-    initial_data = observer(initial_query_points)
-
     # model uses one-hot encoding for the categorical inputs
-    encoder = one_hot_encoder(problem.search_space)
-    model = GaussianProcessRegression(
-        build_gpr(initial_data, problem.search_space, likelihood_variance=1e-8),
-        encoder=encoder,
+    _test_optimizer_finds_problem_minima(
+        cat_problem,
+        num_steps,
+        acquisition_rule,
+        encoder=one_hot_encoder(cat_problem.search_space),
     )
-
-    dataset = (
-        BayesianOptimizer(observer, problem.search_space)
-        .optimize(num_steps, initial_data, model, acquisition_rule)
-        .try_get_final_dataset()
-    )
-
-    arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0))
-
-    best_y = dataset.observations[arg_min_idx]
-    best_x = dataset.query_points[arg_min_idx]
-
-    relative_minimizer_err = tf.abs((best_x - problem.minimizers) / problem.minimizers)
-    assert tf.reduce_any(
-        tf.reduce_all(relative_minimizer_err < 0.1, axis=-1), axis=0
-    ), relative_minimizer_err
-    npt.assert_allclose(best_y, problem.minimum, rtol=0.005)
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
@@ -77,6 +77,7 @@
 from trieste.observer import OBJECTIVE
 from trieste.space import (
     Box,
+    CategoricalSearchSpace,
     DiscreteSearchSpace,
     SearchSpace,
     TaggedMultiSearchSpace,
@@ -2057,29 +2058,41 @@ def discrete_search_space() -> DiscreteSearchSpace:
     return DiscreteSearchSpace(points)
 
 
+@pytest.fixture
+def categorical_search_space() -> CategoricalSearchSpace:
+    return CategoricalSearchSpace([10, 3])
+
+
 @pytest.fixture
 def continuous_search_space() -> Box:
     return Box([0.0], [1.0])
 
 
+@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"])
 @pytest.mark.parametrize("with_initialize", [True, False])
 def test_fixed_trust_region_discrete_initialize(
-    discrete_search_space: DiscreteSearchSpace, with_initialize: bool
+    space_fixture: str,
+    with_initialize: bool,
+    request: Any,
 ) -> None:
     """Check that FixedTrustRegionDiscrete inits correctly by picking a single point from the global
     search space."""
-    tr = FixedPointTrustRegionDiscrete(discrete_search_space)
+    search_space = request.getfixturevalue(space_fixture)
+    tr = FixedPointTrustRegionDiscrete(search_space)
     if with_initialize:
         tr.initialize()
     assert tr.location.shape == (2,)
-    assert tr.location in discrete_search_space
+    assert tr.location in search_space
 
 
+@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"])
 def test_fixed_trust_region_discrete_update(
-    discrete_search_space: DiscreteSearchSpace,
+    space_fixture: str,
+    request: Any,
 ) -> None:
     """Update call should not change the location of the region."""
-    tr = FixedPointTrustRegionDiscrete(discrete_search_space)
+    search_space = request.getfixturevalue(space_fixture)
+    tr = FixedPointTrustRegionDiscrete(search_space)
     tr.initialize()
     orig_location = tr.location.numpy()
     assert not tr.requires_initialization
@@ -2103,13 +2116,16 @@ def test_trust_region_discrete_get_dataset_min_raises_if_dataset_is_faulty(
         tr.get_dataset_min(datasets)
 
 
+@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"])
 def test_trust_region_discrete_raises_on_location_not_found(
-    discrete_search_space: DiscreteSearchSpace,
+    space_fixture: str,
+    request: Any,
 ) -> None:
     """Check that an error is raised if the location is not found in the global search space."""
-    tr = SingleObjectiveTrustRegionDiscrete(discrete_search_space)
+    search_space = request.getfixturevalue(space_fixture)
+    tr = SingleObjectiveTrustRegionDiscrete(search_space)
     with pytest.raises(ValueError, match="location .* not found in the global search space"):
-        tr.location = tf.constant([0.0, 0.0], dtype=tf.float64)
+        tr.location = tf.constant([0.1, 0.0], dtype=tf.float64)
 
 
 def test_trust_region_discrete_get_dataset_min(discrete_search_space: DiscreteSearchSpace) -> None:
@@ -2172,6 +2188,24 @@ def test_trust_region_discrete_initialize(
     npt.assert_array_equal(tr._y_min, tf.constant([np.inf], dtype=tf.float64))
 
 
+def test_trust_region_categorical_initialize(
+    categorical_search_space: CategoricalSearchSpace,
+) -> None:
+    """Check initialize sets the region to a random location, and sets the eps and y_min values."""
+    datasets = {
+        OBJECTIVE: Dataset(  # Points outside the search space should be ignored.
+            tf.constant([[0, 1, 2, 0], [4, -4, -5, 3]], dtype=tf.float64),
+            tf.constant([[0.7], [0.9]], dtype=tf.float64),
+        )
+    }
+    tr = SingleObjectiveTrustRegionDiscrete(categorical_search_space, input_active_dims=[1, 2])
+    tr.initialize(datasets=datasets)
+
+    npt.assert_array_equal(tr.eps, 1)
+    assert tr.location in categorical_search_space
+    npt.assert_array_equal(tr._y_min, tf.constant([np.inf], dtype=tf.float64))
+
+
 def test_trust_region_discrete_requires_initialization(
     discrete_search_space: DiscreteSearchSpace,
 ) -> None:
@@ -2223,20 +2257,28 @@ def test_trust_region_discrete_update_no_initialize(
 
 @pytest.mark.parametrize("dtype", [tf.float32, tf.float64])
 @pytest.mark.parametrize("success", [True, False])
+@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"])
 def test_trust_region_discrete_update_size(
-    dtype: tf.DType, success: bool, discrete_search_space: DiscreteSearchSpace
+    dtype: tf.DType, success: bool, space_fixture: str, request: Any
 ) -> None:
-    discrete_search_space = DiscreteSearchSpace(  # Convert to the correct dtype.
-        tf.cast(discrete_search_space.points, dtype=dtype)
-    )
+    search_space = request.getfixturevalue(space_fixture)
+    categorical = isinstance(search_space, CategoricalSearchSpace)
+
+    # Convert to the correct dtype.
+    if isinstance(search_space, DiscreteSearchSpace):
+        search_space = DiscreteSearchSpace(tf.cast(search_space.points, dtype=dtype))
+    else:
+        assert isinstance(search_space, CategoricalSearchSpace)
+        search_space = CategoricalSearchSpace(search_space.tags, dtype=dtype)
+
     """Check that update shrinks/expands region on successful/unsuccessful step."""
     datasets = {
         OBJECTIVE: Dataset(
             tf.constant([[5, 4], [0, 1], [1, 1]], dtype=dtype),
             tf.constant([[0.5], [0.3], [1.0]], dtype=dtype),
         )
     }
-    tr = SingleObjectiveTrustRegionDiscrete(discrete_search_space, min_eps=0.1)
+    tr = SingleObjectiveTrustRegionDiscrete(search_space, min_eps=0.1)
     tr.initialize(datasets=datasets)
 
     # Ensure there is at least one point captured in the region.
@@ -2252,11 +2294,17 @@ def test_trust_region_discrete_update_size(
     eps = tr.eps
 
     if success:
-        # Sample a point from the region.
-        new_point = tr.sample(1)
+        # Sample a point from the region. For categorical spaces ensure that
+        # it's a different point to tr.location (this must exist)
+        for _ in range(10):
+            new_point = tr.sample(1)
+            if not (categorical and tf.reduce_all(new_point[0] == tr.location)):
+                break
+        else:
+            assert False, "TR contains just one point"
     else:
         # Pick point outside the region.
-        new_point = tf.constant([[1, 2]], dtype=dtype)
+        new_point = tf.constant([[10, 1]], dtype=dtype)
 
     # Add a new min point to the dataset.
     assert not tr.requires_initialization
@@ -2269,28 +2317,33 @@ def test_trust_region_discrete_update_size(
     tr.update(datasets=datasets)
 
     assert tr.location.dtype == dtype
-    assert tr.eps.dtype == dtype
+    assert tr.eps == 1 if categorical else tr.eps.dtype == dtype
     assert tr.points.dtype == dtype
 
     if success:
         # Check that the location is the new min point.
         new_point = np.squeeze(new_point)
         npt.assert_array_equal(new_point, tr.location)
         npt.assert_allclose(new_min, tr._y_min)
-        # Check that the region is larger by beta.
-        npt.assert_allclose(eps / tr._beta, tr.eps)
+        # Check that the region is larger by beta (except for categorical)
+        npt.assert_allclose(1 if categorical else eps / tr._beta, tr.eps)
     else:
         # Check that the location is the old min point.
         orig_point = np.squeeze(orig_point)
         npt.assert_array_equal(orig_point, tr.location)
         npt.assert_allclose(orig_min, tr._y_min)
-        # Check that the region is smaller by beta.
-        npt.assert_allclose(eps * tr._beta, tr.eps)
+        # Check that the region is smaller by beta (except for categorical)
+        npt.assert_allclose(1 if categorical else eps * tr._beta, tr.eps)
 
     # Check the new set of neighbors.
-    neighbors_mask = tf.abs(discrete_search_space.points - tr.location) <= tr.eps
-    neighbors_mask = tf.reduce_all(neighbors_mask, axis=-1)
-    neighbors = tf.boolean_mask(discrete_search_space.points, neighbors_mask)
+    if categorical:
+        # Hamming distance
+        neighbors_mask = tf.where(search_space.points != tr.location, 1, 0)
+        neighbors_mask = tf.reduce_sum(neighbors_mask, axis=-1) <= tr.eps
+    else:
+        neighbors_mask = tf.abs(search_space.points - tr.location) <= tr.eps
+        neighbors_mask = tf.reduce_all(neighbors_mask, axis=-1)
+    neighbors = tf.boolean_mask(search_space.points, neighbors_mask)
     npt.assert_array_equal(tr.points, neighbors)