Skip to content

Commit b66e134

Browse files
Rewrite for named tuples (#852)
## Title Refactor mlos_core APIs to encapsulate related data fields. ## Description Refactors the mlos_core Optimizer APIs to accept new data types `Observation`, `Observations` and return `Suggestion`, instead of a mess of `Tuple[DataFrame, DataFrame, Optional[DataFrame], Optional[DataFrame]]` that must be named and checked everywhere. Additionally, this makes it more explicit that `_register` is a bulk operation that is not actually supported currently by the underlying optimizers, though leaves notes on how we can do that in the future. ## Type of Change - Refactor --- ## Testing Usual CI plus some new unit tests for new data type operations. --- ## Additional Notes A more significant rewrite of named tuple support inside mlos_core. This is based on comments in #811 as well as conversations with @bpkroth --- --------- Co-authored-by: Brian Kroth <[email protected]> Co-authored-by: Brian Kroth <[email protected]>
1 parent 644a718 commit b66e134

21 files changed

+1393
-677
lines changed

Diff for: .cspell.json

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"discretization",
2222
"discretize",
2323
"drivername",
24+
"dropna",
2425
"dstpath",
2526
"dtype",
2627
"duckdb",

Diff for: mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from mlos_bench.services.base_service import Service
2222
from mlos_bench.tunables.tunable import TunableValue
2323
from mlos_bench.tunables.tunable_groups import TunableGroups
24+
from mlos_core.data_classes import Observations
2425
from mlos_core.optimizers import (
2526
DEFAULT_OPTIMIZER_TYPE,
2627
BaseOptimizer,
@@ -128,7 +129,7 @@ def bulk_register(
128129

129130
# TODO: Specify (in the config) which metrics to pass to the optimizer.
130131
# Issue: https://github.com/microsoft/MLOS/issues/745
131-
self._opt.register(configs=df_configs, scores=df_scores)
132+
self._opt.register(observations=Observations(configs=df_configs, scores=df_scores))
132133

133134
if _LOG.isEnabledFor(logging.DEBUG):
134135
(score, _) = self.get_best_observation()
@@ -198,10 +199,10 @@ def suggest(self) -> TunableGroups:
198199
tunables = super().suggest()
199200
if self._start_with_defaults:
200201
_LOG.info("Use default values for the first trial")
201-
df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults)
202+
suggestion = self._opt.suggest(defaults=self._start_with_defaults)
202203
self._start_with_defaults = False
203-
_LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
204-
return tunables.assign(configspace_data_to_tunable_values(df_config.loc[0].to_dict()))
204+
_LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config)
205+
return tunables.assign(configspace_data_to_tunable_values(suggestion.config.to_dict()))
205206

206207
def register(
207208
self,
@@ -221,18 +222,20 @@ def register(
221222
# TODO: Specify (in the config) which metrics to pass to the optimizer.
222223
# Issue: https://github.com/microsoft/MLOS/issues/745
223224
self._opt.register(
224-
configs=df_config,
225-
scores=pd.DataFrame([registered_score], dtype=float),
225+
observations=Observations(
226+
configs=df_config,
227+
scores=pd.DataFrame([registered_score], dtype=float),
228+
)
226229
)
227230
return registered_score
228231

229232
def get_best_observation(
230233
self,
231234
) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
232-
(df_config, df_score, _df_context) = self._opt.get_best_observations()
233-
if len(df_config) == 0:
235+
best_observations = self._opt.get_best_observations()
236+
if len(best_observations) == 0:
234237
return (None, None)
235-
params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
236-
scores = self._adjust_signs_df(df_score).iloc[0].to_dict()
238+
params = configspace_data_to_tunable_values(best_observations.configs.iloc[0].to_dict())
239+
scores = self._adjust_signs_df(best_observations.scores).iloc[0].to_dict()
237240
_LOG.debug("Best observation: %s score: %s", params, scores)
238241
return (scores, self._tunables.copy().assign(params))

Diff for: mlos_bench/mlos_bench/tests/optimizers/toy_optimization_loop_test.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
from mlos_bench.optimizers.mlos_core_optimizer import MlosCoreOptimizer
1717
from mlos_bench.optimizers.mock_optimizer import MockOptimizer
1818
from mlos_bench.tunables.tunable_groups import TunableGroups
19+
from mlos_core.data_classes import Suggestion
1920
from mlos_core.optimizers.bayesian_optimizers.smac_optimizer import SmacOptimizer
20-
from mlos_core.util import config_to_dataframe
21+
from mlos_core.util import config_to_series
2122

2223
# For debugging purposes output some warnings which are captured with failed tests.
2324
DEBUG = True
@@ -40,10 +41,13 @@ def _optimize(env: Environment, opt: Optimizer) -> Tuple[float, TunableGroups]:
4041
# pylint: disable=protected-access
4142
if isinstance(opt, MlosCoreOptimizer) and isinstance(opt._opt, SmacOptimizer):
4243
config = tunable_values_to_configuration(tunables)
43-
config_df = config_to_dataframe(config)
44+
config_series = config_to_series(config)
4445
logger("config: %s", str(config))
4546
try:
46-
logger("prediction: %s", opt._opt.surrogate_predict(configs=config_df))
47+
logger(
48+
"prediction: %s",
49+
opt._opt.surrogate_predict(suggestion=Suggestion(config=config_series)),
50+
)
4751
except RuntimeError:
4852
pass
4953

Diff for: mlos_core/mlos_core/__init__.py

+20-21
Original file line numberDiff line numberDiff line change
@@ -62,39 +62,38 @@
6262
... space_adapter_kwargs=space_adpaters_kwargs,
6363
... )
6464
>>> # Get a new configuration suggestion.
65-
>>> (config_df, _metadata_df) = opt.suggest()
65+
>>> suggestion = opt.suggest()
6666
>>> # Examine the suggested configuration.
67-
>>> assert len(config_df) == 1
68-
>>> config_df.iloc[0]
67+
>>> assert len(suggestion.config) == 1
68+
>>> suggestion.config
6969
x 3
70-
Name: 0, dtype: int64
70+
dtype: object
7171
>>> # Register the configuration and its corresponding target value
7272
>>> score = 42 # a made up score
73-
>>> scores_df = pandas.DataFrame({"y": [score]})
74-
>>> opt.register(configs=config_df, scores=scores_df)
73+
>>> scores_sr = pandas.Series({"y": score})
74+
>>> opt.register(suggestion.complete(scores_sr))
7575
>>> # Get a new configuration suggestion.
76-
>>> (config_df, _metadata_df) = opt.suggest()
77-
>>> config_df.iloc[0]
76+
>>> suggestion = opt.suggest()
77+
>>> suggestion.config
7878
x 10
79-
Name: 0, dtype: int64
79+
dtype: object
8080
>>> score = 7 # a better made up score
8181
>>> # Optimizers minimize by convention, so a lower score is better
8282
>>> # You can use a negative score to maximize values instead
8383
>>> #
84-
>>> # Convert it to a DataFrame again
85-
>>> scores_df = pandas.DataFrame({"y": [score]})
86-
>>> opt.register(configs=config_df, scores=scores_df)
84+
>>> # Convert it to a Series again
85+
>>> scores_sr = pandas.Series({"y": score})
86+
>>> opt.register(suggestion.complete(scores_sr))
8787
>>> # Get the best observations.
88-
>>> (configs_df, scores_df, _contexts_df) = opt.get_best_observations()
88+
>>> observations = opt.get_best_observations()
8989
>>> # The default is to only return one
90-
>>> assert len(configs_df) == 1
91-
>>> assert len(scores_df) == 1
92-
>>> configs_df.iloc[0]
93-
x 10
94-
Name: 1, dtype: int64
95-
>>> scores_df.iloc[0]
96-
y 7
97-
Name: 1, dtype: int64
90+
>>> assert len(observations) == 1
91+
>>> observations.configs
92+
x
93+
0 10
94+
>>> observations.scores
95+
y
96+
0 7
9897
9998
Notes
10099
-----

0 commit comments

Comments
 (0)