Skip to content

Commit

Permalink
Local models and datasets (#788)
Browse files Browse the repository at this point in the history
* Add support for local models and datasets (WIP)

* Add unit test for local models (WIP)

* Update multi model/dataset test (WIP)

* Add unit test for keep datasets in regions

* Add more tests and move to local tags class

* Always include global dataset in mapping

* Add filter_mask method to trust region

* Add more testing

* Fix mypy model type issues

* Add ask_tell testing

* Fix summary init when only global dataset

* Remove walrus operator

* Update test, ask_tell data not changed in-place

* Add some test comments

* Add some rule comments

* Allow input-multi-observers for batch observer

* Allow multiple models/datasets for base rule

* Support multiple models/datasets in region selects

* Fix TR plotting history colors

* Add notebook init points explanation

* Rename region index and add init param

* Remove old comment

* Tidy-up redundant expression

* Keep full datasets along with filtered ones

* Make changes from PR feedback

* Address some of the recent feedback

* Fix dataset mypy error

* Copy dataset in optimizers to avoid changing it

* Share DatasetChecker and tidy-up exp values in tests

* Address more feedback

* Avoid default num_models in integ tests

* Fix old python typing issue

* Use flatten_... func and add comment
  • Loading branch information
khurram-ghani authored Dec 14, 2023
1 parent c537c5b commit f766953
Show file tree
Hide file tree
Showing 15 changed files with 1,198 additions and 165 deletions.
107 changes: 68 additions & 39 deletions tests/integration/test_ask_tell_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,31 @@
import copy
import pickle
import tempfile
from typing import Callable
from typing import Callable, Tuple, Union

import numpy.testing as npt
import pytest
import tensorflow as tf

from tests.util.misc import random_seed
from trieste.acquisition import LocalPenalization
from trieste.acquisition import LocalPenalization, ParallelContinuousThompsonSampling
from trieste.acquisition.rule import (
AcquisitionRule,
AsynchronousGreedy,
AsynchronousRuleState,
BatchTrustRegionBox,
EfficientGlobalOptimization,
SingleObjectiveTrustRegionBox,
TREGOBox,
)
from trieste.acquisition.utils import copy_to_local_models
from trieste.ask_tell_optimization import AskTellOptimizer
from trieste.bayesian_optimizer import OptimizationResult, Record
from trieste.logging import set_step_number, tensorboard_writer
from trieste.models import TrainableProbabilisticModel
from trieste.models.gpflow import GaussianProcessRegression, build_gpr
from trieste.objectives import ScaledBranin, SimpleQuadratic
from trieste.objectives.utils import mk_observer
from trieste.objectives.utils import mk_batch_observer, mk_observer
from trieste.observer import OBJECTIVE
from trieste.space import Box, SearchSpace
from trieste.types import State, TensorType
Expand All @@ -59,14 +61,44 @@
id="EfficientGlobalOptimization/reload_state",
),
pytest.param(
15, False, lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)), id="TREGO"
15,
False,
lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
id="TREGO",
),
pytest.param(
16,
True,
lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
id="TREGO/reload_state",
),
pytest.param(
10,
False,
lambda: BatchTrustRegionBox(
[SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
EfficientGlobalOptimization(
ParallelContinuousThompsonSampling(),
num_query_points=3,
),
),
id="BatchTrustRegionBox",
),
pytest.param(
10,
False,
(
lambda: BatchTrustRegionBox(
[SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
EfficientGlobalOptimization(
ParallelContinuousThompsonSampling(),
num_query_points=2,
),
),
3,
),
id="BatchTrustRegionBox/LocalModels",
),
pytest.param(
10,
False,
Expand All @@ -92,23 +124,26 @@
)


@random_seed
@pytest.mark.slow # to run this, add --runslow yes to the pytest command
@pytest.mark.parametrize(*OPTIMIZER_PARAMS)
def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
num_steps: int,
reload_state: bool,
acquisition_rule_fn: Callable[
[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
]
| Callable[
AcquisitionRuleFunction = Union[
Callable[[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]],
Callable[
[],
AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
State[TensorType, Union[AsynchronousRuleState, BatchTrustRegionBox.State]],
Box,
TrainableProbabilisticModel,
],
],
]


@random_seed
@pytest.mark.slow # to run this, add --runslow yes to the pytest command
@pytest.mark.parametrize(*OPTIMIZER_PARAMS)
def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
num_steps: int,
reload_state: bool,
acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
) -> None:
_test_ask_tell_optimization_finds_minima(True, num_steps, reload_state, acquisition_rule_fn)

Expand All @@ -118,17 +153,7 @@ def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
def test_ask_tell_optimizer_finds_minima_of_simple_quadratic(
num_steps: int,
reload_state: bool,
acquisition_rule_fn: Callable[
[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
]
| Callable[
[],
AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
Box,
TrainableProbabilisticModel,
],
],
acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
) -> None:
# for speed reasons we sometimes test with a simple quadratic defined on the same search space
# branin; currently assume that every rule should be able to solve this in 5 steps
Expand All @@ -141,17 +166,7 @@ def _test_ask_tell_optimization_finds_minima(
optimize_branin: bool,
num_steps: int,
reload_state: bool,
acquisition_rule_fn: Callable[
[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
]
| Callable[
[],
AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
Box,
TrainableProbabilisticModel,
],
],
acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
) -> None:
# For the case when optimization state is saved and reload on each iteration
# we need to use new acquisition function object to imitate real life usage
Expand All @@ -160,17 +175,27 @@ def _test_ask_tell_optimization_finds_minima(
search_space = ScaledBranin.search_space
initial_query_points = search_space.sample(5)
observer = mk_observer(ScaledBranin.objective if optimize_branin else SimpleQuadratic.objective)
batch_observer = mk_batch_observer(observer)
initial_data = observer(initial_query_points)

if isinstance(acquisition_rule_fn, tuple):
acquisition_rule_fn, num_models = acquisition_rule_fn
else:
num_models = 1

model = GaussianProcessRegression(
build_gpr(initial_data, search_space, likelihood_variance=1e-7)
)
models = copy_to_local_models(model, num_models) if num_models > 1 else {OBJECTIVE: model}
initial_dataset = {OBJECTIVE: initial_data}

with tempfile.TemporaryDirectory() as tmpdirname:
summary_writer = tf.summary.create_file_writer(tmpdirname)
with tensorboard_writer(summary_writer):
set_step_number(0)
ask_tell = AskTellOptimizer(search_space, initial_data, model, acquisition_rule_fn())
ask_tell = AskTellOptimizer(
search_space, initial_dataset, models, acquisition_rule_fn()
)

for i in range(1, num_steps + 1):
# two scenarios are tested here, depending on `reload_state` parameter
Expand All @@ -185,7 +210,11 @@ def _test_ask_tell_optimization_finds_minima(
] = ask_tell.to_record()
written_state = pickle.dumps(state)

new_data_point = observer(new_point)
# If query points are rank 3, then use a batched observer.
if tf.rank(new_point) == 3:
new_data_point = batch_observer(new_point)
else:
new_data_point = observer(new_point)

if reload_state:
state = pickle.loads(written_state)
Expand Down
74 changes: 51 additions & 23 deletions tests/integration/test_bayesian_optimization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 The Trieste Contributors
# Copyright 2021 The Trieste Contrib_fnutors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@
import tempfile
from functools import partial
from pathlib import Path
from typing import Any, List, Mapping, Optional, Tuple, Type, cast
from typing import Any, List, Mapping, Optional, Tuple, Type, Union, cast

import dill
import gpflow
Expand Down Expand Up @@ -58,6 +58,7 @@
TREGOBox,
)
from trieste.acquisition.sampler import ThompsonSamplerFromTrajectory
from trieste.acquisition.utils import copy_to_local_models
from trieste.bayesian_optimizer import (
BayesianOptimizer,
FrozenRecord,
Expand Down Expand Up @@ -227,6 +228,23 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
),
id="BatchTrustRegionBox",
),
pytest.param(
10,
(
BatchTrustRegionBox(
[
SingleObjectiveTrustRegionBox(ScaledBranin.search_space)
for _ in range(3)
],
EfficientGlobalOptimization(
ParallelContinuousThompsonSampling(),
num_query_points=2,
),
),
3,
),
id="BatchTrustRegionBox/LocalModels",
),
pytest.param(15, DiscreteThompsonSampling(500, 5), id="DiscreteThompsonSampling"),
pytest.param(
15,
Expand Down Expand Up @@ -262,33 +280,38 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
)


AcquisitionRuleType = Union[
AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType],
AcquisitionRule[
State[TensorType, Union[AsynchronousRuleState, BatchTrustRegion.State]],
Box,
TrainableProbabilisticModelType,
],
]


@random_seed
@pytest.mark.slow # to run this, add --runslow yes to the pytest command
@pytest.mark.parametrize(*GPR_OPTIMIZER_PARAMS())
def test_bayesian_optimizer_with_gpr_finds_minima_of_scaled_branin(
num_steps: int,
acquisition_rule: AcquisitionRule[TensorType, SearchSpace, GaussianProcessRegression]
| AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
Box,
GaussianProcessRegression,
],
acquisition_rule: AcquisitionRuleType[GaussianProcessRegression]
| Tuple[AcquisitionRuleType[GaussianProcessRegression], int],
) -> None:
_test_optimizer_finds_minimum(
GaussianProcessRegression, num_steps, acquisition_rule, optimize_branin=True
GaussianProcessRegression,
num_steps,
acquisition_rule,
optimize_branin=True,
)


@random_seed
@pytest.mark.parametrize(*GPR_OPTIMIZER_PARAMS())
def test_bayesian_optimizer_with_gpr_finds_minima_of_simple_quadratic(
num_steps: int,
acquisition_rule: AcquisitionRule[TensorType, SearchSpace, GaussianProcessRegression]
| AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
Box,
GaussianProcessRegression,
],
acquisition_rule: AcquisitionRuleType[GaussianProcessRegression]
| Tuple[AcquisitionRuleType[GaussianProcessRegression], int],
) -> None:
# for speed reasons we sometimes test with a simple quadratic defined on the same search space
# branin; currently assume that every rule should be able to solve this in 6 steps
Expand Down Expand Up @@ -556,12 +579,8 @@ def test_bayesian_optimizer_with_PCTS_and_deep_ensemble_finds_minima_of_simple_q
def _test_optimizer_finds_minimum(
model_type: Type[TrainableProbabilisticModelType],
num_steps: Optional[int],
acquisition_rule: AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType]
| AcquisitionRule[
State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
Box,
TrainableProbabilisticModelType,
],
acquisition_rule: AcquisitionRuleType[TrainableProbabilisticModelType]
| Tuple[AcquisitionRuleType[TrainableProbabilisticModelType], int],
optimize_branin: bool = False,
model_args: Optional[Mapping[str, Any]] = None,
check_regret: bool = False,
Expand Down Expand Up @@ -590,6 +609,11 @@ def _test_optimizer_finds_minimum(
observer = mk_observer(ScaledBranin.objective if optimize_branin else SimpleQuadratic.objective)
initial_data = observer(initial_query_points)

if isinstance(acquisition_rule, tuple):
acquisition_rule, num_models = acquisition_rule
else:
num_models = 1

model: TrainableProbabilisticModel # (really TPMType, but that's too complicated for mypy)

if model_type is GaussianProcessRegression:
Expand Down Expand Up @@ -647,13 +671,17 @@ def _test_optimizer_finds_minimum(
else:
raise ValueError(f"Unsupported model_type '{model_type}'")

model = cast(TrainableProbabilisticModelType, model)
models = copy_to_local_models(model, num_models) if num_models > 1 else {OBJECTIVE: model}
dataset = {OBJECTIVE: initial_data}

with tempfile.TemporaryDirectory() as tmpdirname:
summary_writer = tf.summary.create_file_writer(tmpdirname)
with tensorboard_writer(summary_writer):
result = BayesianOptimizer(observer, search_space).optimize(
num_steps or 2,
initial_data,
cast(TrainableProbabilisticModelType, model),
dataset,
models,
acquisition_rule,
track_state=True,
track_path=Path(tmpdirname) / "history",
Expand Down
Loading

0 comments on commit f766953

Please sign in to comment.