diff --git a/CHANGELOG.md b/CHANGELOG.md index f6768b6898..72bdf29228 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# 2.2.0 + +## Features +- Add example to specify total budget (fidelity units) instead of n_trials for multi-fidelity/Hyperband (#1121) + +## Dependencies +- Update numpy NaN (#1122) and restrict numpy version +- Upgrade to ConfigSpace 1.x.x (#1124) + # 2.1.0 ## Improvements diff --git a/CITATION.cff b/CITATION.cff index ece8325c64..2279146d72 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,7 +9,7 @@ date-released: "2016-08-17" url: "https://automl.github.io/SMAC3/master/index.html" repository-code: "https://github.com/automl/SMAC3" -version: "2.1.0" +version: "2.2.0" type: "software" keywords: diff --git a/Makefile b/Makefile index 195773082e..145d1bb14c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ SHELL := /bin/bash NAME := SMAC3 PACKAGE_NAME := smac -VERSION := 2.1.0 +VERSION := 2.2.0 DIR := "${CURDIR}" SOURCE_DIR := ${PACKAGE_NAME} diff --git a/benchmark/src/benchmark.py b/benchmark/src/benchmark.py index 55cabd6d4e..9ae74a5fba 100644 --- a/benchmark/src/benchmark.py +++ b/benchmark/src/benchmark.py @@ -17,6 +17,8 @@ from collections import defaultdict from pathlib import Path +from smac.utils.numpyencoder import NumpyEncoder + import pandas as pd from src.tasks import TASKS # noqa: E402 from src.utils.exceptions import NotSupportedError # noqa: E402 @@ -79,7 +81,7 @@ def _save_data(self) -> None: """Saves the internal data to the file.""" print("Saving data...") with open(str(RAW_FILENAME), "w") as f: - json.dump(self._data, f, indent=4) + json.dump(self._data, f, indent=4, cls=NumpyEncoder) def _fill_keys(self) -> None: """Fill data with keys based on computer name, tasks, and selected version.""" diff --git a/benchmark/src/models/ac_branin.py b/benchmark/src/models/ac_branin.py index 12287520fb..e55c2862a8 100644 --- a/benchmark/src/models/ac_branin.py +++ b/benchmark/src/models/ac_branin.py @@ -20,7 +20,7 @@ def configspace(self) -> ConfigurationSpace: x2 = Float("x2", (0, 15), default=7.5) # Add hyperparameters and conditions to our configspace - cs.add_hyperparameters([x2]) + cs.add([x2]) return cs diff --git a/benchmark/src/models/branin.py b/benchmark/src/models/branin.py index 1fd20554fb..0f86d82ac6 100644 --- a/benchmark/src/models/branin.py +++ b/benchmark/src/models/branin.py @@ -20,7 +20,7 @@ def configspace(self) -> ConfigurationSpace: x2 = Float("x2", (0, 15), default=0) # Add hyperparameters and conditions to our configspace - cs.add_hyperparameters([x1, x2]) + cs.add([x1, x2]) return cs diff --git a/benchmark/src/models/himmelblau.py b/benchmark/src/models/himmelblau.py index cab99019a0..c12029e1ad 100644 --- a/benchmark/src/models/himmelblau.py +++ b/benchmark/src/models/himmelblau.py @@ -19,7 +19,7 @@ def configspace(self) -> ConfigurationSpace: y = Float("y", (-5, 5)) # Add hyperparameters and conditions to our configspace - cs.add_hyperparameters([x, y]) + cs.add([x, y]) return cs diff --git a/benchmark/src/models/mlp.py b/benchmark/src/models/mlp.py index 4867a2f42b..7329de0dfd 100644 --- a/benchmark/src/models/mlp.py +++ b/benchmark/src/models/mlp.py @@ -33,7 +33,7 @@ def configspace(self) -> ConfigurationSpace: learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True) # Add all hyperparameters at once: - cs.add_hyperparameters([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) + cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) # Adding conditions to restrict the hyperparameter space... # ... since learning rate is used when solver is 'sgd'. @@ -44,7 +44,7 @@ def configspace(self) -> ConfigurationSpace: use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"]) # We can also add multiple conditions on hyperparameters at once: - cs.add_conditions([use_lr, use_batch_size, use_lr_init]) + cs.add([use_lr, use_batch_size, use_lr_init]) return cs diff --git a/benchmark/src/models/svm.py b/benchmark/src/models/svm.py index 88159ab709..3ed294ad0f 100644 --- a/benchmark/src/models/svm.py +++ b/benchmark/src/models/svm.py @@ -34,8 +34,8 @@ def configspace(self) -> ConfigurationSpace: use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"]) # Add hyperparameters and conditions to our configspace - cs.add_hyperparameters([kernel, C, shrinking, degree, coef, gamma, gamma_value]) - cs.add_conditions([use_degree, use_coef, use_gamma, use_gamma_value]) + cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value]) + cs.add([use_degree, use_coef, use_gamma, use_gamma_value]) return cs diff --git a/docs/3_getting_started.rst b/docs/3_getting_started.rst index dc568184a7..dbc2873681 100644 --- a/docs/3_getting_started.rst +++ b/docs/3_getting_started.rst @@ -27,7 +27,7 @@ ranges and default values. "species": ["mouse", "cat", "dog"], # Categorical }) -Please see the documentation of `ConfigSpace `_ for more details. +Please see the documentation of `ConfigSpace `_ for more details. Target Function diff --git a/docs/advanced_usage/9_parallelism.rst b/docs/advanced_usage/9_parallelism.rst index 67e555215b..9912c782f5 100644 --- a/docs/advanced_usage/9_parallelism.rst +++ b/docs/advanced_usage/9_parallelism.rst @@ -21,39 +21,6 @@ SMAC supports multiple workers natively via Dask. Just specify ``n_workers`` in When using multiple workers, SMAC is not reproducible anymore. -.. warning :: - - You cannot use resource limitation (pynisher, via the `scenario` arguments `trail_walltime_limit` and `trial_memory_limit`). - This is because pynisher works by running your function inside of a subprocess. - Once in the subprocess, the resources will be limited for that process before running your function. - This does not work together with pickling - which is required by dask to schedule jobs on the cluster, even on a local one. - - -.. warning :: - - Start/run SMAC inside ``if __name__ == "__main__"`` in your script otherwise Dask is not able to correctly - spawn jobs and probably this runtime error will be raised: - - .. code-block :: - - RuntimeError: - An attempt has been made to start a new process before the - current process has finished its bootstrapping phase. - - This probably means that you are not using fork to start your - child processes and you have forgotten to use the proper idiom - in the main module: - - if __name__ == '__main__': - freeze_support() - ... - - The "freeze_support()" line can be omitted if the program - is not going to be frozen to produce an executable. - - - - Running on a Cluster -------------------- You can also pass a custom dask client, e.g. to run on a slurm cluster. diff --git a/docs/conf.py b/docs/conf.py index 198361fb9e..4adbf4cd28 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,8 @@ "version": version, "versions": { f"v{version}": "#", + "v2.2.0": "https://automl.github.io/SMAC3/v2.2.0/", + "v2.1.0": "https://automl.github.io/SMAC3/v2.1.0/", "v2.0.1": "https://automl.github.io/SMAC3/v2.0.1/", "v2.0.0": "https://automl.github.io/SMAC3/v2.0.0/", "v2.0.0b1": "https://automl.github.io/SMAC3/v2.0.0b1/", diff --git a/examples/1_basics/1_quadratic_function.py b/examples/1_basics/1_quadratic_function.py index 3cafb846b8..4d27c7ae6d 100644 --- a/examples/1_basics/1_quadratic_function.py +++ b/examples/1_basics/1_quadratic_function.py @@ -26,7 +26,7 @@ class QuadraticFunction: def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x = Float("x", (-5, 5), default=-5) - cs.add_hyperparameters([x]) + cs.add([x]) return cs diff --git a/examples/1_basics/2_svm_cv.py b/examples/1_basics/2_svm_cv.py index cfe39d4bb1..345fcffb07 100644 --- a/examples/1_basics/2_svm_cv.py +++ b/examples/1_basics/2_svm_cv.py @@ -46,8 +46,8 @@ def configspace(self) -> ConfigurationSpace: use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"]) # Add hyperparameters and conditions to our configspace - cs.add_hyperparameters([kernel, C, shrinking, degree, coef, gamma, gamma_value]) - cs.add_conditions([use_degree, use_coef, use_gamma, use_gamma_value]) + cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value]) + cs.add([use_degree, use_coef, use_gamma, use_gamma_value]) return cs diff --git a/examples/1_basics/3_ask_and_tell.py b/examples/1_basics/3_ask_and_tell.py index 5d0e5e78c5..6ab8b5ba80 100644 --- a/examples/1_basics/3_ask_and_tell.py +++ b/examples/1_basics/3_ask_and_tell.py @@ -20,7 +20,7 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x0 = Float("x0", (-5, 10), default=-3) x1 = Float("x1", (-5, 10), default=-4) - cs.add_hyperparameters([x0, x1]) + cs.add([x0, x1]) return cs diff --git a/examples/1_basics/4_callback.py b/examples/1_basics/4_callback.py index c3d66a4b94..0fd9e9d9d7 100644 --- a/examples/1_basics/4_callback.py +++ b/examples/1_basics/4_callback.py @@ -27,7 +27,7 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x0 = Float("x0", (-5, 10), default=-3) x1 = Float("x1", (-5, 10), default=-4) - cs.add_hyperparameters([x0, x1]) + cs.add([x0, x1]) return cs diff --git a/examples/1_basics/5_continue.py b/examples/1_basics/5_continue.py index 025856fee0..63cfb3957f 100644 --- a/examples/1_basics/5_continue.py +++ b/examples/1_basics/5_continue.py @@ -47,7 +47,7 @@ class QuadraticFunction: def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x = Float("x", (-5, 5), default=-5) - cs.add_hyperparameters([x]) + cs.add([x]) return cs diff --git a/examples/1_basics/6_priors.py b/examples/1_basics/6_priors.py index 218bd8f460..691460c0b2 100644 --- a/examples/1_basics/6_priors.py +++ b/examples/1_basics/6_priors.py @@ -95,13 +95,13 @@ def configspace(self) -> ConfigurationSpace: "learning_rate_init", lower=1e-5, upper=1.0, - mu=np.log(1e-3), - sigma=np.log(10), + mu=1e-3, # will be transformed to log space later + sigma=10, # will be transformed to log space later log=True, ) # Add all hyperparameters at once: - cs.add_hyperparameters([n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]) + cs.add([n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]) return cs diff --git a/examples/1_basics/7_parallelization_cluster.py b/examples/1_basics/7_parallelization_cluster.py index 36f79586e3..2467f7d229 100644 --- a/examples/1_basics/7_parallelization_cluster.py +++ b/examples/1_basics/7_parallelization_cluster.py @@ -6,6 +6,9 @@ SLURM cluster. If you do not want to use a cluster but your local machine, set dask_client to `None` and pass `n_workers` to the `Scenario`. +Sometimes, the submitted jobs by the slurm client might be cancelled once it starts. In that +case, you could try to start your job from a computing node + :warning: On some clusters you cannot spawn new jobs when running a SLURMCluster inside a job instead of on the login node. No obvious errors might be raised but it can hang silently. @@ -41,7 +44,7 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x0 = Float("x0", (-5, 10), default=-5, log=False) x1 = Float("x1", (0, 15), default=2, log=False) - cs.add_hyperparameters([x0, x1]) + cs.add([x0, x1]) return cs @@ -77,7 +80,7 @@ def train(self, config: Configuration, seed: int = 0) -> float: model = Branin() # Scenario object specifying the optimization "environment" - scenario = Scenario(model.configspace, deterministic=True, n_trials=100) + scenario = Scenario(model.configspace, deterministic=True, n_trials=100, trial_walltime_limit=100) # Create cluster n_workers = 4 # Use 4 workers on the cluster @@ -97,6 +100,10 @@ def train(self, config: Configuration, seed: int = 0) -> float: walltime="00:10:00", processes=1, log_directory="tmp/smac_dask_slurm", + # if you would like to limit the resources consumption of each function evaluation with pynisher, you need to + # set nanny as False + # Otherwise, an error `daemonic processes are not allowed to have children` will raise! + nanny=False, # if you do not use pynisher to limit the memory/time usage, feel free to set this one as True ) cluster.scale(jobs=n_workers) diff --git a/examples/2_multi_fidelity/1_mlp_epochs.py b/examples/2_multi_fidelity/1_mlp_epochs.py index 48c027d3bd..5cb0aefa05 100644 --- a/examples/2_multi_fidelity/1_mlp_epochs.py +++ b/examples/2_multi_fidelity/1_mlp_epochs.py @@ -65,7 +65,7 @@ def configspace(self) -> ConfigurationSpace: learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True) # Add all hyperparameters at once: - cs.add_hyperparameters([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) + cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) # Adding conditions to restrict the hyperparameter space... # ... since learning rate is only used when solver is 'sgd'. @@ -76,7 +76,7 @@ def configspace(self) -> ConfigurationSpace: use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"]) # We can also add multiple conditions on hyperparameters at once: - cs.add_conditions([use_lr, use_batch_size, use_lr_init]) + cs.add([use_lr, use_batch_size, use_lr_init]) return cs diff --git a/examples/2_multi_fidelity/2_sgd_datasets.py b/examples/2_multi_fidelity/2_sgd_datasets.py index 09864a963a..178ea21c2b 100644 --- a/examples/2_multi_fidelity/2_sgd_datasets.py +++ b/examples/2_multi_fidelity/2_sgd_datasets.py @@ -76,7 +76,7 @@ def configspace(self) -> ConfigurationSpace: learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant") eta0 = Float("eta0", (0.00001, 1), default=0.1, log=True) # Add the parameters to configuration space - cs.add_hyperparameters([alpha, l1_ratio, learning_rate, eta0]) + cs.add([alpha, l1_ratio, learning_rate, eta0]) return cs diff --git a/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py b/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py new file mode 100644 index 0000000000..7c0ebdcf0d --- /dev/null +++ b/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py @@ -0,0 +1,112 @@ +""" +Specify Number of Trials via a Total Budget in Hyperband +^^^^^^^^^^^^^^^^^^ +This example uses a dummy function but illustrates how to setup Hyperband if you +want to specify a total optimization budget in terms of fidelity units. + +In Hyperband, normally SMAC calculates a typical Hyperband round. +If the number of trials is not used up by one single round, the next round is started. +Instead of specifying the number of trial beforehand, specify the total budget +in terms of the fidelity units and let SMAC calculate how many trials that would be. + + +""" +from __future__ import annotations + +import numpy as np +from ConfigSpace import Configuration, ConfigurationSpace, Float +from matplotlib import pyplot as plt + +from smac import MultiFidelityFacade, RunHistory, Scenario +from smac.intensifier.hyperband_utils import get_n_trials_for_hyperband_multifidelity + +__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover" +__license__ = "3-clause BSD" + + +class QuadraticFunction: + max_budget = 500 + + @property + def configspace(self) -> ConfigurationSpace: + cs = ConfigurationSpace(seed=0) + x = Float("x", (-5, 5), default=-5) + cs.add([x]) + + return cs + + def train(self, config: Configuration, seed: int = 0, budget: float | None = None) -> float: + """Returns the y value of a quadratic function with a minimum we know to be at x=0.""" + x = config["x"] + + if budget is None: + multiplier = 1 + else: + multiplier = 1 + budget / self.max_budget + + return x**2 * multiplier + + +def plot(runhistory: RunHistory, incumbent: Configuration) -> None: + plt.figure() + + # Plot ground truth + x = list(np.linspace(-5, 5, 100)) + y = [xi * xi for xi in x] + plt.plot(x, y) + + # Plot all trials + for k, v in runhistory.items(): + config = runhistory.get_config(k.config_id) + x = config["x"] + y = v.cost # type: ignore + plt.scatter(x, y, c="blue", alpha=0.1, zorder=9999, marker="o") + + # Plot incumbent + plt.scatter(incumbent["x"], incumbent["x"] * incumbent["x"], c="red", zorder=10000, marker="x") + + plt.show() + + +if __name__ == "__main__": + model = QuadraticFunction() + + min_budget = 10 # minimum budget per trial + max_budget = 500 # maximum budget per trial + eta = 3 # standard HB parameter influencing the number of stages + + # Let's calculate how many trials we need to exhaust the total optimization budget (in terms of + # fidelity units) + n_trials = get_n_trials_for_hyperband_multifidelity( + total_budget=10000, # this is the total optimization budget we specify in terms of fidelity units + min_budget=min_budget, # This influences the Hyperband rounds, minimum budget per trial + max_budget=max_budget, # This influences the Hyperband rounds, maximum budget per trial + eta=eta, # This influences the Hyperband rounds + print_summary=True, + ) + + # Scenario object specifying the optimization "environment" + scenario = Scenario( + model.configspace, deterministic=True, n_trials=n_trials, min_budget=min_budget, max_budget=max_budget + ) + + # Now we use SMAC to find the best hyperparameters + smac = MultiFidelityFacade( + scenario, + model.train, # We pass the target function here + overwrite=True, # Overrides any previous results that are found that are inconsistent with the meta-data + intensifier=MultiFidelityFacade.get_intensifier(scenario=scenario, eta=eta), + ) + + incumbent = smac.optimize() + + # Get cost of default configuration + default_cost = smac.validate(model.configspace.get_default_configuration()) + print(f"Default cost: {default_cost}") + + # Let's calculate the cost of the incumbent + incumbent_cost = smac.validate(incumbent) + print(f"Incumbent cost: {incumbent_cost}") + + # Let's plot it too + plot(smac.runhistory, incumbent) diff --git a/examples/3_multi_objective/2_parego.py b/examples/3_multi_objective/2_parego.py index 856c2e857f..b5294fb98b 100644 --- a/examples/3_multi_objective/2_parego.py +++ b/examples/3_multi_objective/2_parego.py @@ -54,14 +54,14 @@ def configspace(self) -> ConfigurationSpace: learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant") learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True) - cs.add_hyperparameters([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) + cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init]) use_lr = EqualsCondition(child=learning_rate, parent=solver, value="sgd") use_lr_init = InCondition(child=learning_rate_init, parent=solver, values=["sgd", "adam"]) use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"]) # We can also add multiple conditions on hyperparameters at once: - cs.add_conditions([use_lr, use_batch_size, use_lr_init]) + cs.add([use_lr, use_batch_size, use_lr_init]) return cs diff --git a/examples/4_advanced_optimizer/1_turbo_optimizer.py b/examples/4_advanced_optimizer/1_turbo_optimizer.py index 860243c028..dc936f7079 100644 --- a/examples/4_advanced_optimizer/1_turbo_optimizer.py +++ b/examples/4_advanced_optimizer/1_turbo_optimizer.py @@ -28,7 +28,7 @@ # cs = ConfigurationSpace(seed=0) # x0 = Float("x0", (-5, 10), default=-3) # x1 = Float("x1", (-5, 10), default=-4) -# cs.add_hyperparameters([x0, x1]) +# cs.add([x0, x1]) # return cs diff --git a/examples/4_advanced_optimizer/2_boing_optimizer.py b/examples/4_advanced_optimizer/2_boing_optimizer.py index 3eb1a18691..815b1f3401 100644 --- a/examples/4_advanced_optimizer/2_boing_optimizer.py +++ b/examples/4_advanced_optimizer/2_boing_optimizer.py @@ -27,7 +27,7 @@ # cs = ConfigurationSpace(seed=0) # x0 = Float("x0", (-5, 10), default=-3) # x1 = Float("x1", (-5, 10), default=-4) -# cs.add_hyperparameters([x0, x1]) +# cs.add([x0, x1]) # return cs diff --git a/examples/4_advanced_optimizer/3_metadata_callback.py b/examples/4_advanced_optimizer/3_metadata_callback.py index ab35f28627..b82670dbc6 100644 --- a/examples/4_advanced_optimizer/3_metadata_callback.py +++ b/examples/4_advanced_optimizer/3_metadata_callback.py @@ -36,7 +36,7 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x0 = Float("x0", (-5, 10), default=-3) x1 = Float("x1", (-5, 10), default=-4) - cs.add_hyperparameters([x0, x1]) + cs.add([x0, x1]) return cs diff --git a/examples/4_advanced_optimizer/4_intensify_crossvalidation.py b/examples/4_advanced_optimizer/4_intensify_crossvalidation.py index d215dd8ec6..679253da18 100644 --- a/examples/4_advanced_optimizer/4_intensify_crossvalidation.py +++ b/examples/4_advanced_optimizer/4_intensify_crossvalidation.py @@ -35,17 +35,17 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) # First we create our hyperparameters - C = Float("C", (2 ** - 5, 2 ** 15), default=1.0, log=True) - gamma = Float("gamma", (2 ** -15, 2 ** 3), default=1.0, log=True) + C = Float("C", (2**-5, 2**15), default=1.0, log=True) + gamma = Float("gamma", (2**-15, 2**3), default=1.0, log=True) # Add hyperparameters to our configspace - cs.add_hyperparameters([C, gamma]) + cs.add([C, gamma]) return cs def train(self, config: Configuration, instance: str, seed: int = 0) -> float: """Creates a SVM based on a configuration and evaluate on the given fold of the digits dataset - + Parameters ---------- config: Configuration @@ -81,15 +81,14 @@ def train(self, config: Configuration, instance: str, seed: int = 0) -> float: scenario = Scenario( classifier.configspace, n_trials=50, # We want to run max 50 trials (combination of config and instances in the case of - # deterministic=True. In the case of deterministic=False, this would be the - # combination of instances, seeds and configs). The number of distinct configurations - # evaluated by SMAC will be lower than this number because some of the configurations - # will be executed on more than one instance (CV fold). + # deterministic=True. In the case of deterministic=False, this would be the + # combination of instances, seeds and configs). The number of distinct configurations + # evaluated by SMAC will be lower than this number because some of the configurations + # will be executed on more than one instance (CV fold). instances=[f"{i}" for i in range(N_FOLDS)], # Specify all instances by their name (as a string) - instance_features={f"{i}": [i] for i in range(N_FOLDS)}, # breaks SMAC + instance_features={f"{i}": [i] for i in range(N_FOLDS)}, # breaks SMAC deterministic=True # To simplify the problem we make SMAC believe that we have a deterministic - # optimization problem. - + # optimization problem. ) # We want to run the facade's default initial design, but we want to change the number @@ -102,12 +101,12 @@ def train(self, config: Configuration, instance: str, seed: int = 0) -> float: classifier.train, initial_design=initial_design, overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state - # The next line defines the intensifier, i.e., the module that governs the selection of + # The next line defines the intensifier, i.e., the module that governs the selection of # instance-seed pairs. Since we set deterministic to True above, it only governs the instance in # this example. Technically, it is not necessary to create the intensifier as a user, but it is # necessary to do so because we change the argument max_config_calls (the number of instance-seed pairs # per configuration to try) to the number of cross-validation folds, while the default would be 3. - intensifier=Intensifier(scenario=scenario, max_config_calls=N_FOLDS, seed=0) + intensifier=Intensifier(scenario=scenario, max_config_calls=N_FOLDS, seed=0), ) incumbent = smac.optimize() @@ -124,4 +123,4 @@ def train(self, config: Configuration, instance: str, seed: int = 0) -> float: # at more configurations than would have been possible with regular cross-validation, where the number # of configurations would be determined by the number of trials divided by the number of folds (50 / 10). runhistory = smac.runhistory - print(f"Number of evaluated configurations: {len(runhistory.config_ids)}") \ No newline at end of file + print(f"Number of evaluated configurations: {len(runhistory.config_ids)}") diff --git a/setup.py b/setup.py index 27ec7e6954..3967dcf1f1 100644 --- a/setup.py +++ b/setup.py @@ -57,16 +57,16 @@ def read_file(filepath: str) -> str: include_package_data=True, python_requires=">=3.8", install_requires=[ - "numpy>=1.23.3", + "numpy>=1.23.3,<2.0.0", "scipy>=1.9.2", "psutil", "pynisher>=1.0.0", - "ConfigSpace>=0.6.1", + "ConfigSpace>=1.0.0", "joblib", "scikit-learn>=1.1.2", "pyrfr>=0.9.0", "dask[distributed]", - "dask_jobqueue", + "dask_jobqueue>=0.8.2", "emcee>=3.0.0", "regex", "pyyaml", diff --git a/smac/__init__.py b/smac/__init__.py index 3241da0628..439dc48fae 100644 --- a/smac/__init__.py +++ b/smac/__init__.py @@ -12,14 +12,14 @@ description = "SMAC3, a Python implementation of 'Sequential Model-based Algorithm Configuration'." url = "https://www.automl.org/" project_urls = { - "Documentation": "https://https://github.com/automl.github.io/SMAC3/main", - "Source Code": "https://github.com/https://github.com/automl/smac", + "Documentation": "https://automl.github.io/SMAC3/main", + "Source Code": "https://github.com/automl/SMAC3", } copyright = f""" Copyright {datetime.date.today().strftime('%Y')}, Marius Lindauer, Katharina Eggensperger, Matthias Feurer, André Biedenkapp, Difan Deng, Carolin Benjamins, Tim Ruhkopf, René Sass and Frank Hutter""" -version = "2.1.0" +version = "2.2.0" try: diff --git a/smac/acquisition/maximizer/local_and_random_search.py b/smac/acquisition/maximizer/local_and_random_search.py index d81af2f699..71c7f86c47 100644 --- a/smac/acquisition/maximizer/local_and_random_search.py +++ b/smac/acquisition/maximizer/local_and_random_search.py @@ -145,7 +145,6 @@ def _maximize( previous_configs: list[Configuration], n_points: int, ) -> list[tuple[float, Configuration]]: - if self._uniform_configspace is not None and self._prior_sampling_fraction is not None: # Get configurations sorted by acquisition function value next_configs_by_prior_random_search_sorted = self._prior_random_search._maximize( diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py index 3ef1ae96e7..297c032a22 100644 --- a/smac/acquisition/maximizer/local_search.py +++ b/smac/acquisition/maximizer/local_search.py @@ -389,7 +389,7 @@ def _search( if acq_val[acq_index] > acq_val_candidates[i]: is_valid = False try: - neighbors[acq_index].is_valid_configuration() + neighbors[acq_index].check_valid_configuration() is_valid = True except (ValueError, ForbiddenValueError) as e: logger.debug("Local search %d: %s", i, e) diff --git a/smac/callback/metadata_callback.py b/smac/callback/metadata_callback.py index 626de5dee5..95a382b5f6 100644 --- a/smac/callback/metadata_callback.py +++ b/smac/callback/metadata_callback.py @@ -7,6 +7,7 @@ import smac from smac.callback.callback import Callback from smac.main.smbo import SMBO +from smac.utils.numpyencoder import NumpyEncoder __copyright__ = "Copyright 2023, AutoML.org Freiburg-Hannover" __license__ = "3-clause BSD" @@ -31,4 +32,4 @@ def on_start(self, smbo: SMBO) -> None: path.mkdir(parents=True, exist_ok=True) with open(path / "metadata.json", "w") as fp: - json.dump(meta_dict, fp, indent=2) + json.dump(meta_dict, fp, indent=2, cls=NumpyEncoder) diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py index 6ca49b057c..9a2031099f 100644 --- a/smac/facade/abstract_facade.py +++ b/smac/facade/abstract_facade.py @@ -463,18 +463,6 @@ def _validate(self) -> None: # Make sure the same acquisition function is used assert self._acquisition_function == self._acquisition_maximizer._acquisition_function - if isinstance(self._runner, DaskParallelRunner) and ( - self.scenario.trial_walltime_limit is not None or self.scenario.trial_memory_limit is not None - ): - # This is probably due to pickling dask jobs - raise ValueError( - "Parallelization via Dask cannot be used in combination with limiting " - "the resources " - "of the target function via `scenario.trial_walltime_limit` or " - "`scenario.trial_memory_limit`. Set those to `None` if you want " - "parallelization. " - ) - def _get_signature_arguments(self) -> list[str]: """Returns signature arguments, which are required by the intensifier.""" arguments = [] diff --git a/smac/initial_design/abstract_initial_design.py b/smac/initial_design/abstract_initial_design.py index 7e17c88c24..d561a6772f 100644 --- a/smac/initial_design/abstract_initial_design.py +++ b/smac/initial_design/abstract_initial_design.py @@ -176,7 +176,6 @@ def _transform_continuous_designs( """ params = configspace.get_hyperparameters() for idx, param in enumerate(params): - if isinstance(param, IntegerHyperparameter): design[:, idx] = param._inverse_transform(param._transform(design[:, idx])) elif isinstance(param, NumericalHyperparameter): diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index cb537e9ccb..b7a5ae1ca6 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -26,6 +26,7 @@ from smac.scenario import Scenario from smac.utils.configspace import get_config_hash, print_config_changes from smac.utils.logging import get_logger +from smac.utils.numpyencoder import NumpyEncoder from smac.utils.pareto_front import calculate_pareto_front, sort_by_crowding_distance __copyright__ = "Copyright 2022, automl.org" @@ -666,7 +667,7 @@ def save(self, filename: str | Path) -> None: } with open(filename, "w") as fp: - json.dump(data, fp, indent=2) + json.dump(data, fp, indent=2, cls=NumpyEncoder) def load(self, filename: str | Path) -> None: """Loads the latest state of the intensifier including the incumbents and trajectory.""" diff --git a/smac/intensifier/hyperband_utils.py b/smac/intensifier/hyperband_utils.py new file mode 100644 index 0000000000..77f6a748c6 --- /dev/null +++ b/smac/intensifier/hyperband_utils.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +import numpy as np + +from smac.intensifier.successive_halving import SuccessiveHalving + + +def determine_HB(min_budget: float, max_budget: float, eta: int = 3) -> dict: + """Determine one Hyperband round + + Parameters + ---------- + min_budget : float + Minimum budget per trial in fidelity units + max_budget : float + Maximum budget per trial in fidelity units + eta : int, defaults to 3 + Input that controls the proportion of configurations discarded in each round of Successive Halving. + + Returns + ------- + dict + Info about the Hyperband round + "max_iterations" + "n_configs_in_stage" + "budgets_in_stage" + "trials_used" + "budget_used" + "number_of_brackets" + + """ + _s_max = SuccessiveHalving._get_max_iterations(eta, max_budget, min_budget) + + _max_iterations: dict[int, int] = {} + _n_configs_in_stage: dict[int, list] = {} + _budgets_in_stage: dict[int, list] = {} + + for i in range(_s_max + 1): + max_iter = _s_max - i + + _budgets_in_stage[i], _n_configs_in_stage[i] = SuccessiveHalving._compute_configs_and_budgets_for_stages( + eta, max_budget, max_iter, _s_max + ) + _max_iterations[i] = max_iter + 1 + + total_trials = np.sum([np.sum(v) for v in _n_configs_in_stage.values()]) + total_budget = np.sum([np.sum(v) for v in _budgets_in_stage.values()]) + + return { + "max_iterations": _max_iterations, + "n_configs_in_stage": _n_configs_in_stage, + "budgets_in_stage": _budgets_in_stage, + "trials_used": total_trials, + "budget_used": total_budget, + "number_of_brackets": len(_max_iterations), + } + + +def determine_hyperband_for_multifidelity( + total_budget: float, min_budget: float, max_budget: float, eta: int = 3 +) -> dict: + """Determine how many Hyperband rounds should happen based on a total budget + + Parameters + ---------- + total_budget : float + Total budget for the complete optimization in fidelity units + min_budget : float + Minimum budget per trial in fidelity units + max_budget : float + Maximum budget per trial in fidelity units + eta : int, defaults to 3 + Input that controls the proportion of configurations discarded in each round of Successive Halving. + + Returns + ------- + dict + Info about one Hyperband round + "max_iterations" + "n_configs_in_stage" + "budgets_in_stage" + "trials_used" + "budget_used" + "number_of_brackets" + Info about whole optimization + "n_trials" + "total_budget" + "eta" + "min_budget" + "max_budget" + + """ + # Determine the HB + hyperband_round = determine_HB(eta=eta, min_budget=min_budget, max_budget=max_budget) + + # Calculate how many HB rounds we can have + budget_used_per_hyperband_round = hyperband_round["budget_used"] + number_of_full_hb_rounds = int(np.floor(total_budget / budget_used_per_hyperband_round)) + remaining_budget = total_budget % budget_used_per_hyperband_round + trials_used_per_hb_round = hyperband_round["trials_used"] + n_configs_in_stage = hyperband_round["n_configs_in_stage"] + budgets_in_stage = hyperband_round["budgets_in_stage"] + + remaining_trials = 0 + for stage in n_configs_in_stage.keys(): + B = budgets_in_stage[stage] + C = n_configs_in_stage[stage] + for b, c in zip(B, C): + # How many trials are left? + # If b * c is lower than remaining budget, we can add full c + # otherwise we need to find out how many trials we can do with this budget + remaining_trials += min(c, int(np.floor(remaining_budget / b))) + # We cannot go lower than 0 + # If we are in the case of b*c > remaining_budget, we will not have any + # budget left. We can not add full c but the number of trials that still fit + remaining_budget = max(0, remaining_budget - b * c) + + n_trials = int(number_of_full_hb_rounds * trials_used_per_hb_round + remaining_trials) + + hyperband_info = hyperband_round + hyperband_info["n_trials"] = n_trials + hyperband_info["total_budget"] = total_budget + hyperband_info["eta"] = eta + hyperband_info["min_budget"] = min_budget + hyperband_info["max_budget"] = max_budget + + return hyperband_info + + +def print_hyperband_summary(hyperband_info: dict) -> None: + """Print summary about Hyperband as used in the MultiFidelityFacade + + Parameters + ---------- + hyperband_info : dict + Info dict about Hyperband + """ + print("-" * 30, "HYPERBAND IN MULTI-FIDELITY", "-" * 30) + print("total budget:\t\t", hyperband_info["total_budget"]) + print("total number of trials:\t", hyperband_info["n_trials"]) + print("number of HB rounds:\t", hyperband_info["total_budget"] / hyperband_info["budget_used"]) + print() + + print("\t~~~~~~~~~~~~HYPERBAND ROUND") + print("\teta:\t\t\t\t\t", hyperband_info["eta"]) + print("\tmin budget per trial:\t\t\t", hyperband_info["min_budget"]) + print("\tmax budget per trial:\t\t\t", hyperband_info["max_budget"]) + print("\ttotal number of trials per HB round:\t", hyperband_info["trials_used"]) + print("\tbudget used per HB round:\t\t", hyperband_info["budget_used"]) + print("\tnumber of brackets:\t\t\t", hyperband_info["number_of_brackets"]) + print("\tbudgets per stage:\t\t\t", hyperband_info["budgets_in_stage"]) + print("\tn configs per stage:\t\t\t", hyperband_info["n_configs_in_stage"]) + print("-" * (2 * 30 + len("HYPERBAND IN MULTI-FIDELITY") + 2)) + + +def get_n_trials_for_hyperband_multifidelity( + total_budget: float, min_budget: float, max_budget: float, eta: int = 3, print_summary: bool = True +) -> int: + """Calculate the number of trials needed for multi-fidelity optimization + + Specify the total budget and find out how many trials that equals. + + Parameters + ---------- + total_budget : float + Total budget for the complete optimization in fidelity units. + A fidelity unit can be one epoch or a fraction of a dataset size. + min_budget : float + Minimum budget per trial in fidelity units + max_budget : float + Maximum budget per trial in fidelity units + eta : int, defaults to 3 + Input that controls the proportion of configurations discarded in each round of Successive Halving. + + Returns + ------- + int + Number of trials needed for the specified total budgets + """ + hyperband_info = determine_hyperband_for_multifidelity( + total_budget=total_budget, eta=eta, min_budget=min_budget, max_budget=max_budget + ) + if print_summary: + print_hyperband_summary(hyperband_info=hyperband_info) + return hyperband_info["n_trials"] diff --git a/smac/intensifier/successive_halving.py b/smac/intensifier/successive_halving.py index 01b95c6d86..58960ca67e 100644 --- a/smac/intensifier/successive_halving.py +++ b/smac/intensifier/successive_halving.py @@ -384,7 +384,7 @@ def __iter__(self) -> Iterator[TrialInfo]: # noqa: D102 logger.debug("Updating tracker:") # TODO: Process stages ascending or descending? - for (bracket, stage) in list(self._tracker.keys()): + for bracket, stage in list(self._tracker.keys()): pairs = self._tracker[(bracket, stage)].copy() for seed, configs in pairs: isb_keys = self._get_instance_seed_budget_keys_by_stage(bracket=bracket, stage=stage, seed=seed) diff --git a/smac/main/smbo.py b/smac/main/smbo.py index 6138e4d64f..dd6bfd3548 100644 --- a/smac/main/smbo.py +++ b/smac/main/smbo.py @@ -24,6 +24,7 @@ from smac.scenario import Scenario from smac.utils.data_structures import recursively_compare_dicts from smac.utils.logging import get_logger +from smac.utils.numpyencoder import NumpyEncoder __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -414,7 +415,7 @@ def save(self) -> None: # Save optimization data with open(str(path / "optimization.json"), "w") as file: - json.dump(data, file, indent=2) + json.dump(data, file, indent=2, cls=NumpyEncoder) # And save runhistory and intensifier self._runhistory.save(path / "runhistory.json") diff --git a/smac/model/gaussian_process/kernels/rbf_kernel.py b/smac/model/gaussian_process/kernels/rbf_kernel.py index 5bf2076588..13aaac49f1 100644 --- a/smac/model/gaussian_process/kernels/rbf_kernel.py +++ b/smac/model/gaussian_process/kernels/rbf_kernel.py @@ -24,7 +24,6 @@ def __init__( has_conditions: bool = False, prior: AbstractPrior | None = None, ) -> None: - super().__init__( operate_on=operate_on, has_conditions=has_conditions, diff --git a/smac/model/gaussian_process/kernels/white_kernel.py b/smac/model/gaussian_process/kernels/white_kernel.py index a3fa4a61b3..f8a7814a77 100644 --- a/smac/model/gaussian_process/kernels/white_kernel.py +++ b/smac/model/gaussian_process/kernels/white_kernel.py @@ -21,7 +21,6 @@ def __init__( has_conditions: bool = False, prior: AbstractPrior | None = None, ) -> None: - super().__init__( operate_on=operate_on, has_conditions=has_conditions, diff --git a/smac/model/gaussian_process/mcmc_gaussian_process.py b/smac/model/gaussian_process/mcmc_gaussian_process.py index 7c4ff40287..d6a098159c 100644 --- a/smac/model/gaussian_process/mcmc_gaussian_process.py +++ b/smac/model/gaussian_process/mcmc_gaussian_process.py @@ -247,7 +247,6 @@ def _train( assert self._samples is not None for sample in self._samples: - if (sample < -50).any(): sample[sample < -50] = -50 if (sample > 50).any(): diff --git a/smac/model/random_forest/random_forest.py b/smac/model/random_forest/random_forest.py index 634e0af06f..72685803f9 100644 --- a/smac/model/random_forest/random_forest.py +++ b/smac/model/random_forest/random_forest.py @@ -213,7 +213,7 @@ def _predict( third_dimension = max(max_num_leaf_data, third_dimension) # Transform list of 2d arrays into a 3d array - preds_as_array = np.zeros((X.shape[0], self._rf_opts.num_trees, third_dimension)) * np.NaN + preds_as_array = np.zeros((X.shape[0], self._rf_opts.num_trees, third_dimension)) * np.nan for i, preds_per_tree in enumerate(all_preds): for j, pred in enumerate(preds_per_tree): preds_as_array[i, j, : len(pred)] = pred diff --git a/smac/runhistory/encoder/abstract_encoder.py b/smac/runhistory/encoder/abstract_encoder.py index 9a891229a4..e9de8b14cb 100644 --- a/smac/runhistory/encoder/abstract_encoder.py +++ b/smac/runhistory/encoder/abstract_encoder.py @@ -80,9 +80,9 @@ def __init__( ) # Learned statistics - self._min_y = np.array([np.NaN] * self._n_objectives) - self._max_y = np.array([np.NaN] * self._n_objectives) - self._percentile = np.array([np.NaN] * self._n_objectives) + self._min_y = np.array([np.nan] * self._n_objectives) + self._max_y = np.array([np.nan] * self._n_objectives) + self._percentile = np.array([np.nan] * self._n_objectives) self._multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None self._runhistory: RunHistory | None = None diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py index c713841074..091e3f95b2 100644 --- a/smac/runhistory/runhistory.py +++ b/smac/runhistory/runhistory.py @@ -24,6 +24,7 @@ from smac.utils.configspace import get_config_hash from smac.utils.logging import get_logger from smac.utils.multi_objective import normalize_costs +from smac.utils.numpyencoder import NumpyEncoder __copyright__ = "Copyright 2022, automl.org" __license__ = "3-clause BSD" @@ -803,6 +804,7 @@ def save(self, filename: str | Path = "runhistory.json") -> None: }, fp, indent=2, + cls=NumpyEncoder, ) def load(self, filename: str | Path, configspace: ConfigurationSpace) -> None: @@ -955,7 +957,7 @@ def _check_json_serializable( trial_value: TrialValue, ) -> None: try: - json.dumps(obj) + json.dumps(obj, cls=NumpyEncoder) except Exception as e: raise ValueError( "Cannot add %s: %s of type %s to runhistory because it raises an error during JSON encoding, " diff --git a/smac/scenario.py b/smac/scenario.py index 133ae57b07..ca0df81a20 100644 --- a/smac/scenario.py +++ b/smac/scenario.py @@ -11,9 +11,9 @@ import numpy as np from ConfigSpace import ConfigurationSpace -from ConfigSpace.read_and_write import json as cs_json from smac.utils.logging import get_logger +from smac.utils.numpyencoder import NumpyEncoder logger = get_logger(__name__) @@ -203,12 +203,11 @@ def save(self) -> None: # Save everything filename = self.output_directory / "scenario.json" with open(filename, "w") as fh: - json.dump(data, fh, indent=4) + json.dump(data, fh, indent=4, cls=NumpyEncoder) # Save configspace on its own configspace_filename = self.output_directory / "configspace.json" - with open(configspace_filename, "w") as f: - f.write(cs_json.write(self.configspace)) + self.configspace.to_json(configspace_filename) @staticmethod def load(path: Path) -> Scenario: @@ -224,9 +223,7 @@ def load(path: Path) -> Scenario: # Read configspace configspace_filename = path / "configspace.json" - with open(configspace_filename, "r") as f: - - configspace = cs_json.read(f.read()) + configspace = ConfigurationSpace.from_json(configspace_filename) data["configspace"] = configspace diff --git a/smac/utils/configspace.py b/smac/utils/configspace.py index 8f281ed3e9..8224f3ef90 100644 --- a/smac/utils/configspace.py +++ b/smac/utils/configspace.py @@ -101,22 +101,22 @@ def get_types( if can_be_inactive: raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters") - bounds[i] = (param._lower, param._upper) + bounds[i] = (param.lower_vectorized, param.upper_vectorized) elif isinstance(param, NormalIntegerHyperparameter): if can_be_inactive: raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters") - bounds[i] = (param.nfhp._lower, param.nfhp._upper) + bounds[i] = (param.lower_vectorized, param.upper_vectorized) elif isinstance(param, BetaFloatHyperparameter): if can_be_inactive: raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters") - bounds[i] = (param._lower, param._upper) + bounds[i] = (param.lower_vectorized, param.upper_vectorized) elif isinstance(param, BetaIntegerHyperparameter): if can_be_inactive: raise ValueError("Inactive parameters not supported for Beta and Normal Hyperparameters") - bounds[i] = (param.bfhp._lower, param.bfhp._upper) + bounds[i] = (param.lower_vectorized, param.upper_vectorized) elif not isinstance( param, ( diff --git a/smac/utils/numpyencoder.py b/smac/utils/numpyencoder.py new file mode 100644 index 0000000000..c7b2b6c7b4 --- /dev/null +++ b/smac/utils/numpyencoder.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import Any + +import json + +import numpy as np + + +class NumpyEncoder(json.JSONEncoder): + """Custom encoder for numpy data types + + From https://stackoverflow.com/a/61903895 + """ + + def default(self, obj: Any) -> Any: + """Handle numpy datatypes if present by converting to native python + + Parameters + ---------- + obj : Any + Object to serialize + + Returns + ------- + Any + Object in native python + """ + if isinstance( + obj, + ( + np.int_, + np.intc, + np.intp, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + ), + ): + return int(obj) + + elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): + return float(obj) + + elif isinstance(obj, (np.complex_, np.complex64, np.complex128)): + return {"real": obj.real, "imag": obj.imag} + + elif isinstance(obj, (np.ndarray,)): + return obj.tolist() + + elif isinstance(obj, (np.bool_)): + return bool(obj) + + elif isinstance(obj, (np.void)): + return None + + return json.JSONEncoder.default(self, obj) diff --git a/smac/utils/subspaces/__init__.py b/smac/utils/subspaces/__init__.py index 686099830f..77d5b97f5a 100644 --- a/smac/utils/subspaces/__init__.py +++ b/smac/utils/subspaces/__init__.py @@ -322,7 +322,7 @@ # hp_list.append(hp_new) # # We only consider plain hyperparameters -# self.cs_local.add_hyperparameters(hp_list) +# self.cs_local.add(hp_list) # forbiddens_ss = [] # forbiddens = config_space.get_forbiddens() # for forbidden in forbiddens: diff --git a/tests/conftest.py b/tests/conftest.py index 92a415162e..5b26d6b708 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -143,7 +143,6 @@ def pytest_sessionfinish(session: Session, exitstatus: ExitCode) -> None: proc = psutil.Process() kill_signal = signal.SIGTERM for child in proc.children(recursive=True): - # https://stackoverflow.com/questions/57336095/access-verbosity-level-in-a-pytest-helper-function if session.config.getoption("verbose") > 0: print(child, child.cmdline()) diff --git a/tests/fixtures/configspace.py b/tests/fixtures/configspace.py index 77cf9522d3..5e6e0ca3ff 100644 --- a/tests/fixtures/configspace.py +++ b/tests/fixtures/configspace.py @@ -18,7 +18,7 @@ def configspace_small() -> ConfigurationSpace: c = Categorical("c", ["cat", "dog", "mouse"], default="cat") # Add all hyperparameters at once: - cs.add_hyperparameters([a, b, c]) + cs.add([a, b, c]) return cs @@ -36,7 +36,7 @@ def configspace_large() -> ConfigurationSpace: learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True) # Add all hyperparameters at once: - cs.add_hyperparameters( + cs.add( [ n_layer, n_neurons, @@ -57,6 +57,6 @@ def configspace_large() -> ConfigurationSpace: use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"]) # We can also add multiple conditions on hyperparameters at once: - cs.add_conditions([use_lr, use_batch_size, use_lr_init]) + cs.add([use_lr, use_batch_size, use_lr_init]) return cs diff --git a/tests/fixtures/models.py b/tests/fixtures/models.py index 29f9bf152e..7a7a8fcc09 100644 --- a/tests/fixtures/models.py +++ b/tests/fixtures/models.py @@ -18,7 +18,7 @@ def configspace(self) -> ConfigurationSpace: cs = ConfigurationSpace(seed=0) x0 = Float("x0", (-5, 10), default=-3) x1 = Float("x1", (-5, 10), default=-4) - cs.add_hyperparameters([x0, x1]) + cs.add([x0, x1]) return cs @@ -58,7 +58,7 @@ def configspace(self) -> ConfigurationSpace: eta0 = Float("eta0", (0.00001, 1), default=0.1, log=True) # Add the parameters to configuration space - cs.add_hyperparameters([alpha, l1_ratio, learning_rate, eta0]) + cs.add([alpha, l1_ratio, learning_rate, eta0]) return cs diff --git a/tests/test_acquisition/test_maximizers.py b/tests/test_acquisition/test_maximizers.py index 6d373c237c..d7698e0a29 100644 --- a/tests/test_acquisition/test_maximizers.py +++ b/tests/test_acquisition/test_maximizers.py @@ -54,8 +54,8 @@ def get_array(self): def configspace_branin() -> ConfigurationSpace: """Returns the branin configspace.""" cs = ConfigurationSpace() - cs.add_hyperparameter(Float("x", (-5, 10))) - cs.add_hyperparameter(Float("y", (0, 15))) + cs.add(Float("x", (-5, 10))) + cs.add(Float("y", (0, 15))) return cs @@ -195,7 +195,7 @@ def configspace() -> ConfigurationSpace: c = Float("c", (0, 1), default=0.5) # Add all hyperparameters at once: - cs.add_hyperparameters([a, b, c]) + cs.add([a, b, c]) return cs @@ -262,7 +262,6 @@ def predict_marginalized(self, X): return X, X class AcquisitionFunction: - model = Model() def __call__(self, X): @@ -333,7 +332,7 @@ def test_local_and_random_search(configspace, acquisition_function): values = rs._maximize(start_points, 100) config_origins = [] v_old = np.inf - for (v, config) in values: + for v, config in values: config_origins += [config.origin] if isinstance(v, np.ndarray): v = float(v[0]) @@ -357,7 +356,7 @@ def configspace_rosenbrock(): x2 = UniformIntegerHyperparameter("x2", -5, 5, default_value=5) x3 = CategoricalHyperparameter("x3", [5, 2, 0, 1, -1, -2, 4, -3, 3, -5, -4], default_value=5) x4 = UniformIntegerHyperparameter("x4", -5, 5, default_value=5) - uniform_cs.add_hyperparameters([x1, x2, x3, x4]) + uniform_cs.add([x1, x2, x3, x4]) return uniform_cs @@ -373,7 +372,7 @@ def configspace_prior(): "x3", [5, 2, 0, 1, -1, -2, 4, -3, 3, -5, -4], default_value=5, weights=[999, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] ) x4 = UniformIntegerHyperparameter("x4", lower=-5, upper=5, default_value=5) - prior_cs.add_hyperparameters([x1, x2, x3, x4]) + prior_cs.add([x1, x2, x3, x4]) return prior_cs diff --git a/tests/test_initial_design/test_sobol_design.py b/tests/test_initial_design/test_sobol_design.py index 8663e2972f..8fbb427e8f 100644 --- a/tests/test_initial_design/test_sobol_design.py +++ b/tests/test_initial_design/test_sobol_design.py @@ -24,7 +24,7 @@ def test_sobol_design(make_scenario, configspace_large): def test_max_hyperparameters(make_scenario): cs = ConfigurationSpace() hyperparameters = [Float("x%d" % (i + 1), (0, 1)) for i in range(21202)] - cs.add_hyperparameters(hyperparameters) + cs.add(hyperparameters) scenario = make_scenario(cs) diff --git a/tests/test_intensifier/test_abstract_intensifier.py b/tests/test_intensifier/test_abstract_intensifier.py index b8dc91a1cb..6d878f9154 100644 --- a/tests/test_intensifier/test_abstract_intensifier.py +++ b/tests/test_intensifier/test_abstract_intensifier.py @@ -110,7 +110,7 @@ def test_incumbent_selection_multi_objective(make_scenario, configspace_small, m def test_config_rejection_single_objective(configspace_small, make_scenario): - """ Tests whether configs are rejected properly if they are worse than the incumbent. """ + """Tests whether configs are rejected properly if they are worse than the incumbent.""" scenario = make_scenario(configspace_small, use_instances=False) runhistory = RunHistory() intensifier = Intensifier(scenario=scenario) @@ -118,36 +118,21 @@ def test_config_rejection_single_objective(configspace_small, make_scenario): configs = configspace_small.sample_configuration(3) - runhistory.add(config=configs[0], - cost=5, - time=0.0, - seed=0, - status=StatusType.SUCCESS, - force_update=True) + runhistory.add(config=configs[0], cost=5, time=0.0, seed=0, status=StatusType.SUCCESS, force_update=True) intensifier.update_incumbents(configs[0]) assert intensifier._rejected_config_ids == [] # add config that yielded better results, updating incumbent and sending prior incumbent to rejected - runhistory.add(config=configs[1], - cost=1, - time=0.0, - seed=0, - status=StatusType.SUCCESS, - force_update=True) + runhistory.add(config=configs[1], cost=1, time=0.0, seed=0, status=StatusType.SUCCESS, force_update=True) intensifier.update_incumbents(config=configs[1]) - + assert intensifier._rejected_config_ids == [1] # add config that is no better should thus go to rejected - runhistory.add(config=configs[2], - cost=1, - time=0.0, - seed=0, - status=StatusType.SUCCESS, - force_update=True) + runhistory.add(config=configs[2], cost=1, time=0.0, seed=0, status=StatusType.SUCCESS, force_update=True) intensifier.update_incumbents(config=configs[2]) - + assert intensifier._rejected_config_ids == [1, 3] diff --git a/tests/test_intensifier/test_hyperband_utils.py b/tests/test_intensifier/test_hyperband_utils.py new file mode 100644 index 0000000000..33179d0ad1 --- /dev/null +++ b/tests/test_intensifier/test_hyperband_utils.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from smac.intensifier.hyperband_utils import ( + determine_HB, + determine_hyperband_for_multifidelity, + get_n_trials_for_hyperband_multifidelity, +) + + +def test_determine_HB(): + min_budget = 1.0 + max_budget = 81.0 + eta = 3 + + result = determine_HB(min_budget=min_budget, max_budget=max_budget, eta=eta) + + # Follow algorithm (not the table!) from https://arxiv.org/pdf/1603.06560.pdf (see https://github.com/automl/SMAC3/issues/977) + expected_max_iterations = {0: 5, 1: 4, 2: 3, 3: 2, 4: 1} + expected_n_configs_in_stage = { + 0: [81, 27, 9, 3, 1], + 1: [34, 11, 3, 1], + 2: [15, 5, 1], + 3: [8, 2], + 4: [5], + } + expected_budgets_in_stage = { + 0: [1, 3, 9, 27, 81], + 1: [3, 9, 27, 81], + 2: [9, 27, 81], + 3: [27, 81], + 4: [81], + } + expected_trials_used = 206 + expected_budget_used = 547 + expected_number_of_brackets = 5 + + assert result["max_iterations"] == expected_max_iterations + assert result["n_configs_in_stage"] == expected_n_configs_in_stage + assert result["budgets_in_stage"] == expected_budgets_in_stage + assert result["trials_used"] == expected_trials_used + assert result["budget_used"] == expected_budget_used + assert result["number_of_brackets"] == expected_number_of_brackets + + +def test_determine_hyperband_for_multifidelity(): + total_budget = 1000.0 + min_budget = 1.0 + max_budget = 81.0 + eta = 3 + + result = determine_hyperband_for_multifidelity( + total_budget=total_budget, min_budget=min_budget, max_budget=max_budget, eta=eta + ) + + expected_n_trials = 206 + 137 # 206 trials for one full round, and additional trials for the remaining budget + + assert result["n_trials"] == expected_n_trials + assert result["total_budget"] == total_budget + assert result["eta"] == eta + assert result["min_budget"] == min_budget + assert result["max_budget"] == max_budget + + +def test_get_n_trials_for_hyperband_multifidelity(): + total_budget = 1000.0 + min_budget = 1.0 + max_budget = 81.0 + eta = 3 + + n_trials = get_n_trials_for_hyperband_multifidelity( + total_budget=total_budget, min_budget=min_budget, max_budget=max_budget, eta=eta + ) + + assert n_trials == (206 + 137) diff --git a/tests/test_model/_test_gp_gpytorch.py b/tests/test_model/_test_gp_gpytorch.py index 95f3a59d89..d9e90f9b3a 100644 --- a/tests/test_model/_test_gp_gpytorch.py +++ b/tests/test_model/_test_gp_gpytorch.py @@ -325,7 +325,6 @@ def test_sampling_shape(self): X = np.arange(-5, 5, 0.1).reshape((-1, 1)) X_test = np.arange(-5.05, 5.05, 0.1).reshape((-1, 1)) for shape in (None, (-1, 1)): - if shape is None: y = np.sin(X).flatten() else: diff --git a/tests/test_model/test_gp.py b/tests/test_model/test_gp.py index 0c72eb2fa5..9e21925bc1 100644 --- a/tests/test_model/test_gp.py +++ b/tests/test_model/test_gp.py @@ -240,7 +240,6 @@ def __call__(self, X, eval_gradient=True, clone_kernel=True): raise np.linalg.LinAlgError with patch.object(sklearn.gaussian_process.GaussianProcessRegressor, "log_marginal_likelihood", Dummy().__call__): - seed = 1 rs = np.random.RandomState(seed) X, Y, n_dims = get_cont_data(rs) @@ -265,7 +264,6 @@ def __call__(self, X, Y=None): dummy = Dummy() with patch.object(GaussianProcess, "predict", dummy.__call__): - seed = 1 rs = np.random.RandomState(seed) @@ -375,7 +373,6 @@ def test_sampling_shape(): X = np.arange(-5, 5, 0.1).reshape((-1, 1)) X_test = np.arange(-5.05, 5.05, 0.1).reshape((-1, 1)) for shape in (None, (-1, 1)): - if shape is None: y = np.sin(X).flatten() else: diff --git a/tests/test_model/test_rf.py b/tests/test_model/test_rf.py index e59b2ded11..c81549a16e 100644 --- a/tests/test_model/test_rf.py +++ b/tests/test_model/test_rf.py @@ -127,7 +127,6 @@ def test_predict_marginalized(): def test_predict_marginalized_mocked(): - rs = np.random.RandomState(1) F = {} for i in range(10): diff --git a/tests/test_runhistory/test_runhistory.py b/tests/test_runhistory/test_runhistory.py index a1b7616ca4..428ec54adc 100644 --- a/tests/test_runhistory/test_runhistory.py +++ b/tests/test_runhistory/test_runhistory.py @@ -75,7 +75,6 @@ def test_add_and_pickle(runhistory, config1): def test_illegal_input(runhistory): - with pytest.raises(TypeError, match="Configuration must not be None."): runhistory.add(config=None, cost=1.23, time=2.34, status=StatusType.SUCCESS) @@ -87,7 +86,6 @@ def test_illegal_input(runhistory): def test_add_multiple_times(runhistory, config1): - for i in range(5): runhistory.add( config=config1, @@ -294,7 +292,6 @@ def test_full_update2(runhistory, config1, config2): def test_incremental_update(runhistory, config1): - runhistory.add( config=config1, cost=10, @@ -319,7 +316,6 @@ def test_incremental_update(runhistory, config1): def test_multiple_budgets(runhistory, config1): - runhistory.add( config=config1, cost=10, @@ -382,7 +378,6 @@ def test_get_configs_per_budget(runhistory, config1, config2, config3): def test_json_origin(configspace_small, config1): - for i, origin in enumerate(["test_origin", None]): config1.origin = origin runhistory = RunHistory() diff --git a/tests/test_runhistory/test_runhistory_encoder.py b/tests/test_runhistory/test_runhistory_encoder.py index e3a8f57303..ac9824534c 100644 --- a/tests/test_runhistory/test_runhistory_encoder.py +++ b/tests/test_runhistory/test_runhistory_encoder.py @@ -1,5 +1,7 @@ import numpy as np import pytest +from ConfigSpace import Configuration +from ConfigSpace.hyperparameters import CategoricalHyperparameter from smac.multi_objective.aggregation_strategy import MeanAggregationStrategy from smac.runhistory.encoder import ( @@ -13,9 +15,6 @@ from smac.runhistory.encoder.encoder import RunHistoryEncoder from smac.runner.abstract_runner import StatusType -from ConfigSpace import Configuration -from ConfigSpace.hyperparameters import CategoricalHyperparameter - @pytest.fixture def configs(configspace_small): @@ -42,9 +41,7 @@ def test_transform(runhistory, make_scenario, configspace_small, configs): ) # Normal encoder - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory # TODO: Please replace with the more general solution once ConfigSpace 1.0 @@ -54,9 +51,7 @@ def test_transform(runhistory, make_scenario, configspace_small, configs): # Categoricals are upperbounded by their size, rest of hyperparameters are # upperbounded by 1. upper_bounds = { - hp.name: (hp.get_size() - 1) - if isinstance(hp, CategoricalHyperparameter) - else 1.0 + hp.name: (hp.size - 1) if isinstance(hp, CategoricalHyperparameter) else 1.0 for hp in configspace_small.get_hyperparameters() } # Need to ensure they match the order in the Configuration vectorized form @@ -73,49 +68,37 @@ def test_transform(runhistory, make_scenario, configspace_small, configs): assert ((X1 <= upper) & (X1 >= lower)).all() # Log encoder - encoder = RunHistoryLogEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryLogEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() assert ((X <= upper) & (X >= lower)).all() - encoder = RunHistoryLogScaledEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryLogScaledEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() assert ((X <= upper) & (X >= lower)).all() - encoder = RunHistoryScaledEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryScaledEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() assert ((X <= upper) & (X >= lower)).all() - encoder = RunHistoryInverseScaledEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryInverseScaledEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() assert ((X <= upper) & (X >= lower)).all() - encoder = RunHistorySqrtScaledEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistorySqrtScaledEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() assert ((X <= upper) & (X >= lower)).all() - encoder = RunHistoryEIPSEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEIPSEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() assert Y.tolist() != Y1.tolist() @@ -160,9 +143,7 @@ def test_transform_conditionals(runhistory, make_scenario, configspace_large): status=StatusType.SUCCESS, ) - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform() @@ -184,9 +165,7 @@ def test_multi_objective(runhistory, make_scenario, configspace_small, configs): # Multi objective algorithm must be set with pytest.raises(AssertionError): - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory _, Y = encoder.transform() @@ -242,9 +221,7 @@ def test_ignore(runhistory, make_scenario, configspace_small, configs): ) # Normal encoder - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X1, Y1 = encoder.transform() @@ -283,14 +260,10 @@ def test_budgets(runhistory, make_scenario, configspace_small, configs): budget=2, ) - runhistory.add( - config=configs[1], cost=5, time=4, status=StatusType.SUCCESS, budget=2 - ) + runhistory.add(config=configs[1], cost=5, time=4, status=StatusType.SUCCESS, budget=2) # Normal encoder - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform(budget_subset=[2]) assert Y.tolist() == [[99999999]] @@ -319,14 +292,10 @@ def test_budgets(runhistory, make_scenario, configspace_small, configs): budget=2, ) - runhistory.add( - config=configs[1], cost=5, time=4, status=StatusType.SUCCESS, budget=2 - ) + runhistory.add(config=configs[1], cost=5, time=4, status=StatusType.SUCCESS, budget=2) # Normal encoder - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory X, Y = encoder.transform(budget_subset=[2]) assert Y.tolist() == [[99999999]] @@ -338,20 +307,12 @@ def test_budgets(runhistory, make_scenario, configspace_small, configs): def test_lower_budget_states(runhistory, make_scenario, configspace_small, configs): """Tests lower budgets based on budget subset and considered states.""" scenario = make_scenario(configspace_small) - encoder = RunHistoryEncoder( - scenario=scenario, considered_states=[StatusType.SUCCESS] - ) + encoder = RunHistoryEncoder(scenario=scenario, considered_states=[StatusType.SUCCESS]) encoder.runhistory = runhistory - runhistory.add( - config=configs[0], cost=1, time=1, status=StatusType.SUCCESS, budget=3 - ) - runhistory.add( - config=configs[0], cost=2, time=2, status=StatusType.SUCCESS, budget=4 - ) - runhistory.add( - config=configs[0], cost=3, time=4, status=StatusType.TIMEOUT, budget=5 - ) + runhistory.add(config=configs[0], cost=1, time=1, status=StatusType.SUCCESS, budget=3) + runhistory.add(config=configs[0], cost=2, time=2, status=StatusType.SUCCESS, budget=4) + runhistory.add(config=configs[0], cost=3, time=4, status=StatusType.TIMEOUT, budget=5) # We request a higher budget but can't find it, so we expect an empty list X, Y = encoder.transform(budget_subset=[500]) diff --git a/tests/test_utils/test_numpy_encoder.py b/tests/test_utils/test_numpy_encoder.py new file mode 100644 index 0000000000..9074e3cd80 --- /dev/null +++ b/tests/test_utils/test_numpy_encoder.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import json + +import numpy as np +import pytest + +from smac.utils.numpyencoder import NumpyEncoder + + +# Test cases for NumpyEncoder +def test_numpy_encoder(): + data = { + "int": np.int32(1), + "float": np.float32(1.23), + "complex": np.complex64(1 + 2j), + "array": np.array([1, 2, 3]), + "bool": np.bool_(True), + "void": np.void(b"void"), + } + + expected_output = { + "int": 1, + "float": 1.23, + "complex": {"real": 1.0, "imag": 2.0}, + "array": [1, 2, 3], + "bool": True, + "void": None, + } + + encoded_data = json.dumps(data, cls=NumpyEncoder) + decoded_data = json.loads(encoded_data) + + assert np.isclose(decoded_data["float"], expected_output["float"]) # float ist not exactly the same + del decoded_data["float"] + del expected_output["float"] + assert decoded_data == expected_output + + +# Test if default method raises TypeError for unsupported types +def test_numpy_encoder_unsupported_type(): + with pytest.raises(TypeError): + json.dumps(set([1, 2, 3]), cls=NumpyEncoder)