From 2c725f9c96a737676a2942fa905729ce8880567c Mon Sep 17 00:00:00 2001 From: uri-granta <50578464+uri-granta@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:22:19 +0100 Subject: [PATCH] Categorical trust regions (#865) --- .../test_mixed_space_bayesian_optimization.py | 105 ++++++++++++------ tests/unit/acquisition/test_rule.py | 101 +++++++++++++---- trieste/acquisition/rule.py | 50 ++++++--- trieste/models/gpflow/sampler.py | 9 ++ trieste/models/interfaces.py | 7 ++ 5 files changed, 197 insertions(+), 75 deletions(-) diff --git a/tests/integration/test_mixed_space_bayesian_optimization.py b/tests/integration/test_mixed_space_bayesian_optimization.py index 84cbe3bf2..ce80c030c 100644 --- a/tests/integration/test_mixed_space_bayesian_optimization.py +++ b/tests/integration/test_mixed_space_bayesian_optimization.py @@ -13,6 +13,7 @@ # limitations under the License. from __future__ import annotations +import dataclasses from typing import cast import numpy as np @@ -47,6 +48,7 @@ Box, CategoricalSearchSpace, DiscreteSearchSpace, + EncoderFunction, TaggedProductSearchSpace, one_hot_encoder, ) @@ -167,15 +169,32 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function( TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel ], ) -> None: - initial_query_points = mixed_search_space.sample(5) - observer = mk_observer(ScaledBranin.objective) + mixed_branin = cast(SingleObjectiveTestProblem[TaggedProductSearchSpace], ScaledBranin) + _test_optimizer_finds_problem_minima( + dataclasses.replace(mixed_branin, search_space=mixed_search_space), + num_steps, + acquisition_rule, + ) + + +def _test_optimizer_finds_problem_minima( + problem: SingleObjectiveTestProblem[TaggedProductSearchSpace], + num_steps: int, + acquisition_rule: AcquisitionRule[ + TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel + ], + encoder: EncoderFunction | None = None, +) -> None: + initial_query_points = problem.search_space.sample(5) + observer = mk_observer(problem.objective) initial_data = observer(initial_query_points) model = GaussianProcessRegression( - build_gpr(initial_data, mixed_search_space, likelihood_variance=1e-8) + build_gpr(initial_data, problem.search_space, likelihood_variance=1e-8), + encoder=encoder, ) dataset = ( - BayesianOptimizer(observer, mixed_search_space) + BayesianOptimizer(observer, problem.search_space) .optimize(num_steps, initial_data, model, acquisition_rule) .try_get_final_dataset() ) @@ -185,7 +204,7 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function( best_y = dataset.observations[arg_min_idx] best_x = dataset.query_points[arg_min_idx] - relative_minimizer_err = tf.abs((best_x - ScaledBranin.minimizers) / ScaledBranin.minimizers) + relative_minimizer_err = tf.abs((best_x - problem.minimizers) / problem.minimizers) # these accuracies are the current best for the given number of optimization steps, which makes # this is a regression test assert tf.reduce_any(tf.reduce_all(relative_minimizer_err < 0.1, axis=-1), axis=0) @@ -210,7 +229,7 @@ def categorical_scaled_branin( continuous_space = Box([0], [1]) search_space = TaggedProductSearchSpace( spaces=[categorical_space, continuous_space], - tags=["discrete", "continuous"], + tags=["categorical", "continuous"], ) def objective(x: TensorType) -> TensorType: @@ -234,11 +253,50 @@ def objective(x: TensorType) -> TensorType: ) +def _get_categorical_problem() -> SingleObjectiveTestProblem[TaggedProductSearchSpace]: + # a categorical scaled branin problem with 6 categories mapping to 3 random points + # plus the 3 minimizer points (to guarantee that the minimum is present) + points = tf.concat( + [tf.random.uniform([3], dtype=tf.float64), ScaledBranin.minimizers[..., 0]], 0 + ) + return categorical_scaled_branin(tf.random.shuffle(points)) + + +cat_problem = _get_categorical_problem() + + @random_seed @pytest.mark.parametrize( "num_steps, acquisition_rule", [ pytest.param(25, EfficientGlobalOptimization(), id="EfficientGlobalOptimization"), + pytest.param( + 8, + BatchTrustRegionProduct( + [ + UpdatableTrustRegionProduct( + [ + SingleObjectiveTrustRegionDiscrete( + cast( + CategoricalSearchSpace, + cat_problem.search_space.get_subspace("categorical"), + ) + ), + SingleObjectiveTrustRegionBox( + cast(Box, cat_problem.search_space.get_subspace("continuous")) + ), + ], + tags=cat_problem.search_space.subspace_tags, + ) + for _ in range(3) + ], + EfficientGlobalOptimization( + ParallelContinuousThompsonSampling(), + num_query_points=3, + ), + ), + id="TrustRegionSingleObjective", + ), ], ) def test_optimizer_finds_minima_of_the_categorical_scaled_branin_function( @@ -247,35 +305,10 @@ def test_optimizer_finds_minima_of_the_categorical_scaled_branin_function( TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel ], ) -> None: - # 6 categories mapping to 3 random points plus the 3 minimizer points - points = tf.concat( - [tf.random.uniform([3], dtype=tf.float64), ScaledBranin.minimizers[..., 0]], 0 - ) - problem = categorical_scaled_branin(tf.random.shuffle(points)) - initial_query_points = problem.search_space.sample(5) - observer = mk_observer(problem.objective) - initial_data = observer(initial_query_points) - # model uses one-hot encoding for the categorical inputs - encoder = one_hot_encoder(problem.search_space) - model = GaussianProcessRegression( - build_gpr(initial_data, problem.search_space, likelihood_variance=1e-8), - encoder=encoder, + _test_optimizer_finds_problem_minima( + cat_problem, + num_steps, + acquisition_rule, + encoder=one_hot_encoder(cat_problem.search_space), ) - - dataset = ( - BayesianOptimizer(observer, problem.search_space) - .optimize(num_steps, initial_data, model, acquisition_rule) - .try_get_final_dataset() - ) - - arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0)) - - best_y = dataset.observations[arg_min_idx] - best_x = dataset.query_points[arg_min_idx] - - relative_minimizer_err = tf.abs((best_x - problem.minimizers) / problem.minimizers) - assert tf.reduce_any( - tf.reduce_all(relative_minimizer_err < 0.1, axis=-1), axis=0 - ), relative_minimizer_err - npt.assert_allclose(best_y, problem.minimum, rtol=0.005) diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py index 57d8d73b8..f07d280f1 100644 --- a/tests/unit/acquisition/test_rule.py +++ b/tests/unit/acquisition/test_rule.py @@ -77,6 +77,7 @@ from trieste.observer import OBJECTIVE from trieste.space import ( Box, + CategoricalSearchSpace, DiscreteSearchSpace, SearchSpace, TaggedMultiSearchSpace, @@ -2057,29 +2058,41 @@ def discrete_search_space() -> DiscreteSearchSpace: return DiscreteSearchSpace(points) +@pytest.fixture +def categorical_search_space() -> CategoricalSearchSpace: + return CategoricalSearchSpace([10, 3]) + + @pytest.fixture def continuous_search_space() -> Box: return Box([0.0], [1.0]) +@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"]) @pytest.mark.parametrize("with_initialize", [True, False]) def test_fixed_trust_region_discrete_initialize( - discrete_search_space: DiscreteSearchSpace, with_initialize: bool + space_fixture: str, + with_initialize: bool, + request: Any, ) -> None: """Check that FixedTrustRegionDiscrete inits correctly by picking a single point from the global search space.""" - tr = FixedPointTrustRegionDiscrete(discrete_search_space) + search_space = request.getfixturevalue(space_fixture) + tr = FixedPointTrustRegionDiscrete(search_space) if with_initialize: tr.initialize() assert tr.location.shape == (2,) - assert tr.location in discrete_search_space + assert tr.location in search_space +@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"]) def test_fixed_trust_region_discrete_update( - discrete_search_space: DiscreteSearchSpace, + space_fixture: str, + request: Any, ) -> None: """Update call should not change the location of the region.""" - tr = FixedPointTrustRegionDiscrete(discrete_search_space) + search_space = request.getfixturevalue(space_fixture) + tr = FixedPointTrustRegionDiscrete(search_space) tr.initialize() orig_location = tr.location.numpy() assert not tr.requires_initialization @@ -2103,13 +2116,16 @@ def test_trust_region_discrete_get_dataset_min_raises_if_dataset_is_faulty( tr.get_dataset_min(datasets) +@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"]) def test_trust_region_discrete_raises_on_location_not_found( - discrete_search_space: DiscreteSearchSpace, + space_fixture: str, + request: Any, ) -> None: """Check that an error is raised if the location is not found in the global search space.""" - tr = SingleObjectiveTrustRegionDiscrete(discrete_search_space) + search_space = request.getfixturevalue(space_fixture) + tr = SingleObjectiveTrustRegionDiscrete(search_space) with pytest.raises(ValueError, match="location .* not found in the global search space"): - tr.location = tf.constant([0.0, 0.0], dtype=tf.float64) + tr.location = tf.constant([0.1, 0.0], dtype=tf.float64) def test_trust_region_discrete_get_dataset_min(discrete_search_space: DiscreteSearchSpace) -> None: @@ -2172,6 +2188,24 @@ def test_trust_region_discrete_initialize( npt.assert_array_equal(tr._y_min, tf.constant([np.inf], dtype=tf.float64)) +def test_trust_region_categorical_initialize( + categorical_search_space: CategoricalSearchSpace, +) -> None: + """Check initialize sets the region to a random location, and sets the eps and y_min values.""" + datasets = { + OBJECTIVE: Dataset( # Points outside the search space should be ignored. + tf.constant([[0, 1, 2, 0], [4, -4, -5, 3]], dtype=tf.float64), + tf.constant([[0.7], [0.9]], dtype=tf.float64), + ) + } + tr = SingleObjectiveTrustRegionDiscrete(categorical_search_space, input_active_dims=[1, 2]) + tr.initialize(datasets=datasets) + + npt.assert_array_equal(tr.eps, 1) + assert tr.location in categorical_search_space + npt.assert_array_equal(tr._y_min, tf.constant([np.inf], dtype=tf.float64)) + + def test_trust_region_discrete_requires_initialization( discrete_search_space: DiscreteSearchSpace, ) -> None: @@ -2223,12 +2257,20 @@ def test_trust_region_discrete_update_no_initialize( @pytest.mark.parametrize("dtype", [tf.float32, tf.float64]) @pytest.mark.parametrize("success", [True, False]) +@pytest.mark.parametrize("space_fixture", ["discrete_search_space", "categorical_search_space"]) def test_trust_region_discrete_update_size( - dtype: tf.DType, success: bool, discrete_search_space: DiscreteSearchSpace + dtype: tf.DType, success: bool, space_fixture: str, request: Any ) -> None: - discrete_search_space = DiscreteSearchSpace( # Convert to the correct dtype. - tf.cast(discrete_search_space.points, dtype=dtype) - ) + search_space = request.getfixturevalue(space_fixture) + categorical = isinstance(search_space, CategoricalSearchSpace) + + # Convert to the correct dtype. + if isinstance(search_space, DiscreteSearchSpace): + search_space = DiscreteSearchSpace(tf.cast(search_space.points, dtype=dtype)) + else: + assert isinstance(search_space, CategoricalSearchSpace) + search_space = CategoricalSearchSpace(search_space.tags, dtype=dtype) + """Check that update shrinks/expands region on successful/unsuccessful step.""" datasets = { OBJECTIVE: Dataset( @@ -2236,7 +2278,7 @@ def test_trust_region_discrete_update_size( tf.constant([[0.5], [0.3], [1.0]], dtype=dtype), ) } - tr = SingleObjectiveTrustRegionDiscrete(discrete_search_space, min_eps=0.1) + tr = SingleObjectiveTrustRegionDiscrete(search_space, min_eps=0.1) tr.initialize(datasets=datasets) # Ensure there is at least one point captured in the region. @@ -2252,11 +2294,17 @@ def test_trust_region_discrete_update_size( eps = tr.eps if success: - # Sample a point from the region. - new_point = tr.sample(1) + # Sample a point from the region. For categorical spaces ensure that + # it's a different point to tr.location (this must exist) + for _ in range(10): + new_point = tr.sample(1) + if not (categorical and tf.reduce_all(new_point[0] == tr.location)): + break + else: + assert False, "TR contains just one point" else: # Pick point outside the region. - new_point = tf.constant([[1, 2]], dtype=dtype) + new_point = tf.constant([[10, 1]], dtype=dtype) # Add a new min point to the dataset. assert not tr.requires_initialization @@ -2269,7 +2317,7 @@ def test_trust_region_discrete_update_size( tr.update(datasets=datasets) assert tr.location.dtype == dtype - assert tr.eps.dtype == dtype + assert tr.eps == 1 if categorical else tr.eps.dtype == dtype assert tr.points.dtype == dtype if success: @@ -2277,20 +2325,25 @@ def test_trust_region_discrete_update_size( new_point = np.squeeze(new_point) npt.assert_array_equal(new_point, tr.location) npt.assert_allclose(new_min, tr._y_min) - # Check that the region is larger by beta. - npt.assert_allclose(eps / tr._beta, tr.eps) + # Check that the region is larger by beta (except for categorical) + npt.assert_allclose(1 if categorical else eps / tr._beta, tr.eps) else: # Check that the location is the old min point. orig_point = np.squeeze(orig_point) npt.assert_array_equal(orig_point, tr.location) npt.assert_allclose(orig_min, tr._y_min) - # Check that the region is smaller by beta. - npt.assert_allclose(eps * tr._beta, tr.eps) + # Check that the region is smaller by beta (except for categorical) + npt.assert_allclose(1 if categorical else eps * tr._beta, tr.eps) # Check the new set of neighbors. - neighbors_mask = tf.abs(discrete_search_space.points - tr.location) <= tr.eps - neighbors_mask = tf.reduce_all(neighbors_mask, axis=-1) - neighbors = tf.boolean_mask(discrete_search_space.points, neighbors_mask) + if categorical: + # Hamming distance + neighbors_mask = tf.where(search_space.points != tr.location, 1, 0) + neighbors_mask = tf.reduce_sum(neighbors_mask, axis=-1) <= tr.eps + else: + neighbors_mask = tf.abs(search_space.points - tr.location) <= tr.eps + neighbors_mask = tf.reduce_all(neighbors_mask, axis=-1) + neighbors = tf.boolean_mask(search_space.points, neighbors_mask) npt.assert_array_equal(tr.points, neighbors) diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py index 4a5446560..9719e0dd1 100644 --- a/trieste/acquisition/rule.py +++ b/trieste/acquisition/rule.py @@ -64,6 +64,8 @@ from ..space import ( Box, DiscreteSearchSpace, + GeneralDiscreteSearchSpace, + HasOneHotEncoder, SearchSpace, TaggedMultiSearchSpace, TaggedProductSearchSpace, @@ -1624,7 +1626,13 @@ def __init__( self._y_min = tf.constant(np.inf, dtype=self.location.dtype) def _init_eps(self) -> None: - self.eps = self._zeta * (self.global_search_space.upper - self.global_search_space.lower) + if not isinstance(self.global_search_space, HasOneHotEncoder): + self.eps = self._zeta * ( + self.global_search_space.upper - self.global_search_space.lower + ) + else: + # categorical space distance is hardcoded to a (Hamming) distance of 1 + self.eps = 1 @abstractmethod def _update_domain(self) -> None: @@ -1691,9 +1699,12 @@ def update( datasets = self.select_in_region(datasets) # See `select_in_region` comment above. x_min, y_min = self.get_dataset_min(datasets) - tr_volume = tf.reduce_prod(self.upper - self.lower) - self._step_is_success = y_min < self._y_min - self._kappa * tr_volume - self.eps = self.eps / self._beta if self._step_is_success else self.eps * self._beta + if isinstance(self.global_search_space, HasOneHotEncoder): + self._step_is_success = y_min < self._y_min + else: + tr_volume = tf.reduce_prod(self.upper - self.lower) + self._step_is_success = y_min < self._y_min - self._kappa * tr_volume + self.eps = self.eps / self._beta if self._step_is_success else self.eps * self._beta # Only update the location if the step was successful. if self._step_is_success: @@ -2225,7 +2236,7 @@ class UpdatableTrustRegionDiscrete(DiscreteSearchSpace, UpdatableTrustRegion): def __init__( self, - global_search_space: DiscreteSearchSpace, + global_search_space: GeneralDiscreteSearchSpace, region_index: Optional[int] = None, input_active_dims: Optional[Union[slice, Sequence[int]]] = None, ): @@ -2256,27 +2267,36 @@ def location(self, location: TensorType) -> None: self._location_ix = tf.squeeze(location_ix, axis=-1) @property - def global_search_space(self) -> DiscreteSearchSpace: + def global_search_space(self) -> GeneralDiscreteSearchSpace: return self._global_search_space def _compute_global_distances(self) -> TensorType: # Helper method to compute and return pairwise distances along each axis in the # global search space. + points = self.global_search_space.points - return tf.abs( - tf.expand_dims(points, -2) - tf.expand_dims(points, -3) - ) # [num_points, num_points, D] + if isinstance(self.global_search_space, HasOneHotEncoder): + # use Hamming distance for categorical spaces + return tf.math.reduce_sum( + tf.where(tf.expand_dims(points, -2) == tf.expand_dims(points, -3), 0, 1), + axis=-1, + keepdims=True, # (keep last dim for reduce_all distance calculation below) + ) # [num_points, num_points, 1] + else: + return tf.abs( + tf.expand_dims(points, -2) - tf.expand_dims(points, -3) + ) # [num_points, num_points, D] def _get_points_within_distance( - self, global_distances: TensorType, eps: TensorType + self, global_distances: TensorType, distance: TensorType ) -> TensorType: - # Helper method to return subset of global points within a given `eps` distance of the + # Helper method to return subset of global points within a given distance of the # region location. Takes the precomputed pairwise distances tensor and the trust region - # size `eps`. + # size `eps` (or a hard-coded value of 1 in the case of categorical spaces). # Indices of the neighbors within the trust region. neighbors_mask = tf.reduce_all( - tf.gather(global_distances, self._location_ix) <= eps, axis=-1 + tf.gather(global_distances, self._location_ix) <= distance, axis=-1 ) neighbors_mask = tf.reshape(neighbors_mask, (-1,)) neighbor_ixs = tf.where(neighbors_mask) @@ -2294,7 +2314,7 @@ class FixedPointTrustRegionDiscrete(UpdatableTrustRegionDiscrete): def __init__( self, - global_search_space: DiscreteSearchSpace, + global_search_space: GeneralDiscreteSearchSpace, region_index: Optional[int] = None, input_active_dims: Optional[Union[slice, Sequence[int]]] = None, ): @@ -2339,7 +2359,7 @@ class SingleObjectiveTrustRegionDiscrete(UpdatableTrustRegionDiscrete, Hypercube def __init__( self, - global_search_space: DiscreteSearchSpace, + global_search_space: GeneralDiscreteSearchSpace, beta: float = 0.7, kappa: float = 1e-4, zeta: float = 0.5, diff --git a/trieste/models/gpflow/sampler.py b/trieste/models/gpflow/sampler.py index 26053a58a..d1aa8f1ce 100644 --- a/trieste/models/gpflow/sampler.py +++ b/trieste/models/gpflow/sampler.py @@ -29,6 +29,7 @@ from gpflux.math import compute_A_inv_b from typing_extensions import Protocol, TypeGuard, runtime_checkable +from ...space import EncoderFunction from ...types import TensorType from ...utils import DEFAULTS, flatten_leading_dims from ..interfaces import ( @@ -43,6 +44,7 @@ TrajectoryFunction, TrajectoryFunctionClass, TrajectorySampler, + get_encoder, ) _IntTensorType = Union[tf.Tensor, int] @@ -397,6 +399,7 @@ def get_trajectory(self) -> TrajectoryFunction: feature_functions=self._feature_functions, weight_sampler=weight_sampler, mean_function=self._mean_function, + encoder=get_encoder(self._model), ) def update_trajectory(self, trajectory: TrajectoryFunction) -> TrajectoryFunction: @@ -873,15 +876,18 @@ def __init__( feature_functions: Callable[[TensorType], TensorType], weight_sampler: Callable[[int], TensorType], mean_function: Callable[[TensorType], TensorType], + encoder: EncoderFunction | None = None, ): """ :param feature_functions: Set of feature function. :param weight_sampler: New sampler that generates feature weight samples. :param mean_function: The underlying model's mean function. + :param encoder: Optional encoder with which to transform input points. """ self._feature_functions = feature_functions self._mean_function = mean_function self._weight_sampler = weight_sampler + self._encoder = encoder self._initialized = tf.Variable(False) self._weights_sample = tf.Variable( # dummy init to be updated before trajectory evaluation @@ -896,6 +902,9 @@ def __init__( def __call__(self, inputs: TensorType) -> TensorType: # [N, B, D] -> [N, B, L] """Call trajectory function.""" + if self._encoder is not None: + inputs = self._encoder(inputs) + if not self._initialized: # work out desired batch size from input self._batch_size.assign(tf.shape(inputs)[-2]) # B self.resample() # sample B feature weights diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py index 223408ce1..a1b1bf1f6 100644 --- a/trieste/models/interfaces.py +++ b/trieste/models/interfaces.py @@ -904,3 +904,10 @@ def conditional_predict_y( return self.conditional_predict_y_encoded( self.encode(query_points), self.encode(additional_data) ) + + +def get_encoder(model: ProbabilisticModel) -> EncoderFunction | None: + """Helper function for getting an encoder from model (which may or may not have one).""" + if isinstance(model, EncodedProbabilisticModel): + return model.encoder + return None