From 6bc9eec1557583cab1a4d0841d0bf09f94671772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s?= Date: Mon, 25 Dec 2023 19:28:24 -0300 Subject: [PATCH 1/7] support wrapped models in grid search --- darts/models/forecasting/forecasting_model.py | 119 +++++++++++++++++- .../forecasting/test_regression_models.py | 57 +++++++++ 2 files changed, 173 insertions(+), 3 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index f1ab933b05..f1efa57f6d 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1388,6 +1388,69 @@ def gridsearch( Currently this method only supports deterministic predictions (i.e. when models' predictions have only 1 sample). + Some darts models wrap scikit-learn like models (See, for instance :class:`RegressionModel`), i.e + they accept an argument ``model``. With the purpose of including 'model' in the grid search, + there are two possible options: + + 1. Give the key ``model`` in parameter as a valid list of instances of the scikit-learn like model used. + + Example + .. highlight:: python + .. code-block:: python + + from sklearn.ensemble import RandomForestRegressor + + from darts.models import RegressionModel + from darts.utils import timeseries_generation as tg + + parameters = { + "model": [ + RandomForestRegressor(min_samples_split=2, min_samples_leaf=1), + RandomForestRegressor(min_samples_split=3, min_samples_leaf=2), + ], + "lags": [1,2,3], + } + series = tg.sine_timeseries(length=100) + + RegressionModel.gridsearch( + parameters=parameters, series=series, forecast_horizon=1 + ) + .. + + 2. Give the key ``model`` in parameter as dictionary containing a special key + ``model_class`` which is the scikit-learn like model class that will be used + to pass arguments from the grid. The other keys/values are arguments passed to the wrapped + model class and they behave as an inner parameters dictionary + + Example + .. highlight:: python + .. code-block:: python + + from sklearn.ensemble import RandomForestRegressor + + from darts.models import RegressionModel + from darts.utils import timeseries_generation as tg + + parameters = { + "model": { + "model_class": RandomForestRegressor, + "min_samples_split": [2,3], + "min_samples_leaf": [1,2], + }, + "lags": [1,2,3], + } + series = tg.sine_timeseries(length=100) + + RegressionModel.gridsearch( + parameters=parameters, series=series, forecast_horizon=1 + ) + .. + + In order to keep consistency in the best-performing hyper-parameters returned in this method, + wrapped model arguments are returned with a suffix containing the name of the wrapped model class + and a dot separator. For example, the parameter ``min_samples_split`` in the example above will be + returned as ``RandomForestRegressor.min_samples_split`` + Parameters ---------- model_class @@ -1502,6 +1565,33 @@ def gridsearch( if predict_kwargs is None: predict_kwargs = dict() + # Used if the darts model wraps a scikit-learn like model + wrapped_model_class = None + + if "model" in parameters: + # Ask if model has been passed as a dictionary. This implies that the arguments + # of the wrapped model must be passed to the grid. If 'model' is passed as a + # list of instances of scikit-learn models, the behavior should work like + # any argument passed to the Darts model." + if isinstance(parameters["model"], dict): + if not "model_class" in parameters["model"]: + raise_log( + ValueError( + "When the 'model' key is set as a dictionary, it must contain " + "the 'model_class' key, which represents the class of the model " + "to be wrapped." + ) + ) + wrapped_model_class = parameters["model"].pop("model_class") + # Create a flat dictionary by adding a suffix to the arguments of the wrapped model in + # order to distinguish them from the other arguments of the Darts model + parameters.update( + { + f"{wrapped_model_class.__name__}.{k}": v + for k, v in parameters.pop("model").items() + } + ) + # compute all hyperparameter combinations from selection params_cross_product = list(product(*parameters.values())) @@ -1516,6 +1606,25 @@ def gridsearch( zip(params_cross_product), verbose, total=len(params_cross_product) ) + def _init_model_from_combination(param_combination_dict): + if wrapped_model_class is None: + return model_class(**param_combination_dict) + + # Decode new keys created with the suffix wrapped_model. + wrapped_model_kwargs = {} + darts_model_kwargs = {} + for k, v in param_combination_dict.items(): + if k.startswith(f"{wrapped_model_class.__name__}."): + wrapped_model_kwargs[ + k.replace(f"{wrapped_model_class.__name__}.", "") + ] = v + else: + darts_model_kwargs[k] = v + return model_class( + model=wrapped_model_class(**wrapped_model_kwargs), + **darts_model_kwargs, + ) + def _evaluate_combination(param_combination) -> float: param_combination_dict = dict( list(zip(parameters.keys(), param_combination)) @@ -1526,7 +1635,8 @@ def _evaluate_combination(param_combination) -> float: "model_name" ] = f"{current_time}_{param_combination_dict['model_name']}" - model = model_class(**param_combination_dict) + model = _init_model_from_combination(param_combination_dict) + if use_fitted_values: # fitted value mode model._fit_wrapper( series=series, @@ -1587,8 +1697,11 @@ def _evaluate_combination(param_combination) -> float: ) logger.info("Chosen parameters: " + str(best_param_combination)) - - return model_class(**best_param_combination), best_param_combination, min_error + return ( + _init_model_from_combination(best_param_combination), + best_param_combination, + min_error, + ) def residuals( self, diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 9d5c369526..bc7274290a 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -2441,6 +2441,63 @@ def test_lgbm_categorical_features_passed_to_fit_correctly(self, lgb_fit_patch): ) = self.lgbm_w_categorical_covariates._categorical_fit_param assert kwargs[cat_param_name] == [2, 3, 5] + def test_grid_search(self): + # Create grid over wrapped model parameters too + parameters = { + "model": { + "model_class": RandomForestRegressor, + "min_samples_split": [2, 3], + }, + "lags": [1], + } + result = RegressionModel.gridsearch( + parameters=parameters, series=self.sine_multivariate1, forecast_horizon=1 + ) + assert isinstance(result[0], RegressionModel) + assert { + "lags", + "RandomForestRegressor.min_samples_split", + } == set(result[1]) + assert isinstance(result[2], float) + + # Use model as instances of RandomForestRegressor directly + parameters = { + "model": [ + RandomForestRegressor(min_samples_split=2), + RandomForestRegressor(min_samples_split=3), + ], + "lags": [1], + } + + result = RegressionModel.gridsearch( + parameters=parameters, series=self.sine_multivariate1, forecast_horizon=1 + ) + + assert isinstance(result[0], RegressionModel) + assert { + "lags", + "model", + } == set(result[1]) + assert isinstance(result[1]["model"], RandomForestRegressor) + assert isinstance(result[2], float) + + def test_grid_search_invalid_wrapped_model_dict(self): + parameters = { + "model": {"fit_intercept": [True, False]}, + "lags": [1, 2, 3], + } + with pytest.raises( + ValueError, + match="When the 'model' key is set as a dictionary, it must contain " + "the 'model_class' key, which represents the class of the model " + "to be wrapped.", + ): + RegressionModel.gridsearch( + parameters=parameters, + series=self.sine_multivariate1, + forecast_horizon=1, + ) + def helper_create_LinearModel(self, multi_models=True, extreme_lags=False): if not extreme_lags: lags, lags_pc, lags_fc = 3, 3, [-3, -2, -1, 0] From 75ede7cc0ca2f7e344a3c9dcbf0dba2cba527eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s?= Date: Mon, 25 Dec 2023 19:46:50 -0300 Subject: [PATCH 2/7] remove extra comment --- darts/models/forecasting/forecasting_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index f1efa57f6d..9a4ae89186 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1610,7 +1610,7 @@ def _init_model_from_combination(param_combination_dict): if wrapped_model_class is None: return model_class(**param_combination_dict) - # Decode new keys created with the suffix wrapped_model. + # Decode new keys created with the suffix. wrapped_model_kwargs = {} darts_model_kwargs = {} for k, v in param_combination_dict.items(): From 7354b08d88ae67d91cd091a583b97959a4a436a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s?= Date: Wed, 27 Dec 2023 22:23:52 -0300 Subject: [PATCH 3/7] lint --- darts/models/forecasting/forecasting_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 9a4ae89186..6634203762 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1574,7 +1574,7 @@ def gridsearch( # list of instances of scikit-learn models, the behavior should work like # any argument passed to the Darts model." if isinstance(parameters["model"], dict): - if not "model_class" in parameters["model"]: + if "model_class" not in parameters["model"]: raise_log( ValueError( "When the 'model' key is set as a dictionary, it must contain " From fa31d06413e0d64300df704a4984aaf2fd95779e Mon Sep 17 00:00:00 2001 From: madtoinou Date: Tue, 12 Nov 2024 15:29:54 +0200 Subject: [PATCH 4/7] linting --- darts/models/forecasting/forecasting_model.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index a9ec972f1d..35aded11d1 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1827,12 +1827,10 @@ def gridsearch( wrapped_model_class = parameters["model"].pop("model_class") # Create a flat dictionary by adding a suffix to the arguments of the wrapped model in # order to distinguish them from the other arguments of the Darts model - parameters.update( - { - f"{wrapped_model_class.__name__}.{k}": v - for k, v in parameters.pop("model").items() - } - ) + parameters.update({ + f"{wrapped_model_class.__name__}.{k}": v + for k, v in parameters.pop("model").items() + }) # compute all hyperparameter combinations from selection params_cross_product = list(product(*parameters.values())) From 8514feb973bbaab0b8535f8bd94b38faaf01f717 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Tue, 12 Nov 2024 19:59:00 +0200 Subject: [PATCH 5/7] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66f6f90538..e17bed69dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improved** - Improvements to `ForecastingModel`: Improved `start` handling for historical forecasts, backtest, residuals, and gridsearch. If `start` is not within the trainable / forecastable points, uses the closest valid start point that is a round multiple of `stride` ahead of start. Raises a ValueError, if no valid start point exists. This guarantees that all historical forecasts are `n * stride` points away from start, and will simplify many downstream tasks. [#2560](https://github.com/unit8co/darts/issues/2560) by [Dennis Bader](https://github.com/dennisbader). +- Improvement to `gridsearch()`, now supports optimization of models wrapped in `RegressionModel`. [#2594](https://github.com/unit8co/darts/pull/2594) by [Andrés Sandoval](https://github.com/andresliszt) **Fixed** From 367829df46dd680489012a7e2e64011c6a2d8512 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Thu, 14 Nov 2024 13:48:08 +0200 Subject: [PATCH 6/7] fix: gridsearch parameters check --- darts/models/forecasting/forecasting_model.py | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 35aded11d1..6340e92420 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1775,12 +1775,33 @@ def gridsearch( ) ) + if "model" in parameters: + valid_model_list = isinstance(parameters["model"], list) + valid_nested_params = parameters["model"].get( + "wrapped_model_class" + ) and all( + isinstance(params, (list, np.ndarray)) + for p_name, params in parameters["model"].items() + if p_name != "wrapped_model_class" + ) + if not (valid_model_list or valid_nested_params): + raise_log( + ValueError( + "The 'model' entry in `parameters` must either be a list of instantiated models or " + "a dictionary containing as keys hyperparameter names, and as values lists of values " + "plus a 'wrapped_model_class': model_cls item.", + logger, + ) + ) + if not all( - isinstance(params, (list, np.ndarray)) for params in parameters.values() + isinstance(params, (list, np.ndarray)) + for p_name, params in parameters.items() + if p_name != "model" ): raise_log( ValueError( - "Every value in the `parameters` dictionary should be a list or a np.ndarray." + "Every hyper-parameter value in the `parameters` dictionary should be a list or a np.ndarray." ), logger, ) @@ -1812,19 +1833,12 @@ def gridsearch( if "model" in parameters: # Ask if model has been passed as a dictionary. This implies that the arguments - # of the wrapped model must be passed to the grid. If 'model' is passed as a - # list of instances of scikit-learn models, the behavior should work like - # any argument passed to the Darts model." - if isinstance(parameters["model"], dict): - if "model_class" not in parameters["model"]: - raise_log( - ValueError( - "When the 'model' key is set as a dictionary, it must contain " - "the 'model_class' key, which represents the class of the model " - "to be wrapped." - ) - ) - wrapped_model_class = parameters["model"].pop("model_class") + # of the wrapped model must be passed to the grid. + if ( + isinstance(parameters["model"], dict) + and "wrapped_model_class" in parameters["model"] + ): + wrapped_model_class = parameters["model"].pop("wrapped_model_class") # Create a flat dictionary by adding a suffix to the arguments of the wrapped model in # order to distinguish them from the other arguments of the Darts model parameters.update({ From ef416bb4177acdef6319356d37ccb448ff12f4c5 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Sun, 17 Nov 2024 21:44:29 +0100 Subject: [PATCH 7/7] fix: typo --- darts/models/forecasting/forecasting_model.py | 19 ++++++++++--------- .../forecasting/test_regression_models.py | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 6340e92420..44de994960 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1777,19 +1777,20 @@ def gridsearch( if "model" in parameters: valid_model_list = isinstance(parameters["model"], list) - valid_nested_params = parameters["model"].get( - "wrapped_model_class" - ) and all( - isinstance(params, (list, np.ndarray)) - for p_name, params in parameters["model"].items() - if p_name != "wrapped_model_class" + valid_nested_params = ( + not valid_model_list + and parameters["model"].get("wrapped_model_class") + and all( + isinstance(params, (list, np.ndarray)) + for p_name, params in parameters["model"].items() + if p_name != "wrapped_model_class" + ) ) if not (valid_model_list or valid_nested_params): raise_log( ValueError( - "The 'model' entry in `parameters` must either be a list of instantiated models or " - "a dictionary containing as keys hyperparameter names, and as values lists of values " - "plus a 'wrapped_model_class': model_cls item.", + "When the 'model' key is set as a dictionary, it must contain the 'wrapped_model_class' key, " + "which represents the class of the model to be wrapped.", logger, ) ) diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 4207fd1964..1e4a19e186 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -3253,7 +3253,7 @@ def test_grid_search(self): # Create grid over wrapped model parameters too parameters = { "model": { - "model_class": RandomForestRegressor, + "wrapped_model_class": RandomForestRegressor, "min_samples_split": [2, 3], }, "lags": [1], @@ -3297,7 +3297,7 @@ def test_grid_search_invalid_wrapped_model_dict(self): with pytest.raises( ValueError, match="When the 'model' key is set as a dictionary, it must contain " - "the 'model_class' key, which represents the class of the model " + "the 'wrapped_model_class' key, which represents the class of the model " "to be wrapped.", ): RegressionModel.gridsearch(