From e692c87e34d2fb913a0679b0d45b8c5ca23b3fbf Mon Sep 17 00:00:00 2001 From: v1docq Date: Tue, 16 Apr 2024 17:45:11 +0300 Subject: [PATCH] add monash example --- .../ts_forecasting/ts_forecasting_example.py | 1 + ...al_reserve_economic_data_forecasting.ipynb | 2820 +++++++++++++++++ fedot_ind/api/main.py | 16 +- .../interfaces/industrial_model_strategy.py | 3 +- .../core/repository/constanst_repository.py | 9 +- .../industrial_implementations/abstract.py | 32 +- .../initializer_industrial_models.py | 4 +- fedot_ind/core/tuning/search_space.py | 110 - 8 files changed, 2872 insertions(+), 123 deletions(-) create mode 100644 examples/real_world_examples/industrial_examples/economic_analysis/ts_forecasting/federal_reserve_economic_data_forecasting.ipynb diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py index 18087c64a..978f7eea8 100644 --- a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py +++ b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py @@ -26,6 +26,7 @@ metric='rmse', timeout=15, with_tuning=False, + pop_size=10, industrial_strategy='forecasting_assumptions', n_jobs=2, logging_level=30) diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/ts_forecasting/federal_reserve_economic_data_forecasting.ipynb b/examples/real_world_examples/industrial_examples/economic_analysis/ts_forecasting/federal_reserve_economic_data_forecasting.ipynb new file mode 100644 index 000000000..5138b925f --- /dev/null +++ b/examples/real_world_examples/industrial_examples/economic_analysis/ts_forecasting/federal_reserve_economic_data_forecasting.ipynb @@ -0,0 +1,2820 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "## Predict historical prices of Brent Oil, Crude Oil WTI, Natural Gas, Heating Oil from 2000-2022 with Fedot.Industrial" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Dataset published on Kaggle3 consists of historical prices of Brent Oil, CrudeOil WTI, Natural Gas, and Heating Oil from 2000 to 2022. This sample of DailyOilGasPrices was created by using 30 consecutive business days of **Crude Oil WTI close prices** and **traded volumes** as **predictors** and the **average natural gas close** price during each 30-day time frame as the **target** variable. The final dataset has 191 2-dimensional time series of length 30, of which 70% were randomly sampled as training data and the remaining 30% as testing data. This type of model could help companies and governments to better analyse and predict economic situations and correlations regarding oil and natural gas.\n", + "Link to the dataset - https://www.kaggl.com/datasets/prasertk/historical-daily-oil-and-natural-gas-prices" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:34:48.354623Z", + "start_time": "2023-08-28T10:34:39.594404Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from fedot.core.pipelines.pipeline_builder import PipelineBuilder\n", + "from fedot_ind.api.main import FedotIndustrial" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "def evaluate_loop(train_data, api_params, finetune: bool = False):\n", + " industrial = FedotIndustrial(**api_params)\n", + " if finetune:\n", + " industrial.finetune(train_data)\n", + " else:\n", + " industrial.fit(train_data)\n", + " return industrial" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-04-16 17:16:54,982 - PyTorch version 1.12.1+cu113 available.\n" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "dataset_name = 'fred_md'\n", + "horizon = 12\n", + "metric_names = ('smape', 'rmse', 'median_absolute_error')\n", + "train_data = load_dataset('monash_tsf', 'fred_md')\n", + "forecasting_metrics = ('smape', 'rmse')\n", + "params = dict(problem='ts_forecasting',\n", + " metric='rmse',\n", + " timeout=15,\n", + " pop_size = 10,\n", + " with_tuning = False,\n", + " task_params={'forecast_length': horizon},\n", + " industrial_strategy='forecasting_assumptions',\n", + " n_jobs=2,\n", + " logging_level=40)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Now we must download the dataset. It could be done by using `DataReader` class that implemented as attribute of `FedotIndustrial` class. This class firstly tries to read the data from local project folder `data_path` and then if it is not possible, it downloads the data from the UCR/UEA archive. The data will be saved in the `data` folder." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "df_pandas = train_data['train'].to_pandas()\n", + "id_list = df_pandas['item_id'].values.tolist()\n", + "ts_list = [df_pandas[df_pandas['item_id']==id]['target'].values[0] for id in id_list]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "train_data = ts_list[0]\n", + "target = train_data[-horizon:].flatten()\n", + "input_data = (train_data,target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Lets check our data." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Lets visualise our predictors." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "pd.DataFrame(train_data).plot(title='Example of FRED time series')\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Next steps are quite straightforward. We need to fit the model and then predict the values for the test data just like for any other model in sklearn.\n", + "\n", + "At the `fit` stage FedotIndustrial will transform initial time series data into features dataframe and will train regression model." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-28T10:35:27.965798Z" + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-04-16 17:16:59,719 - Initialising experiment setup\n", + "2024-04-16 17:16:59,828 - DataSourceSplitter - Stratificated splitting of data is disabled.\n", + "2024-04-16 17:16:59,829 - DataSourceSplitter - Hold out validation is applied.\n", + "2024-04-16 17:16:59,830 - SequentialTuner - Hyperparameters optimization start: estimation of metric for initial graph\n", + "2024-04-16 17:17:00,271 - SequentialTuner - Initial graph: {'depth': 2, 'length': 2, 'nodes': [ar, eigen_basis]}\n", + "ar - {'lag_1': 7, 'lag_2': 12}\n", + "eigen_basis - {'low_rank_approximation': False, 'rank_regularization': 'explained_dispersion'} \n", + "Initial metric: [249.69]\n", + " 0%| | 0/100 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rmsemedian_absolute_errorsmape
061.10959.410.34
\n" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## AutoML approach" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-04-16 17:19:52,536 - Initialising experiment setup\n", + "2024-04-16 17:19:52,537 - Initialising Industrial Repository\n", + "2024-04-16 17:19:52,538 - Initialising Dask Server\n", + "Creating Dask Server\n", + "2024-04-16 17:19:53,023 - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n", + "2024-04-16 17:19:53,053 - State start\n", + "2024-04-16 17:19:53,180 - Scheduler at: inproc://10.64.4.32/16652/1\n", + "2024-04-16 17:19:53,180 - dashboard at: http://10.64.4.32:56716/status\n", + "2024-04-16 17:19:53,181 - Registering Worker plugin shuffle\n", + "2024-04-16 17:19:53,321 - Start worker at: inproc://10.64.4.32/16652/4\n", + "2024-04-16 17:19:53,322 - Listening to: inproc10.64.4.32\n", + "2024-04-16 17:19:53,322 - Worker name: 0\n", + "2024-04-16 17:19:53,323 - dashboard at: 10.64.4.32:56717\n", + "2024-04-16 17:19:53,324 - Waiting to connect to: inproc://10.64.4.32/16652/1\n", + "2024-04-16 17:19:53,324 - -------------------------------------------------\n", + "2024-04-16 17:19:53,325 - Threads: 8\n", + "2024-04-16 17:19:53,325 - Memory: 31.95 GiB\n", + "2024-04-16 17:19:53,325 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-mdbnx57f\n", + "2024-04-16 17:19:53,326 - -------------------------------------------------\n", + "2024-04-16 17:19:53,330 - Register worker \n", + "2024-04-16 17:19:53,332 - Starting worker compute stream, inproc://10.64.4.32/16652/4\n", + "2024-04-16 17:19:53,332 - Starting established connection to inproc://10.64.4.32/16652/5\n", + "2024-04-16 17:19:53,333 - Starting Worker plugin shuffle\n", + "2024-04-16 17:19:53,333 - Registered to: inproc://10.64.4.32/16652/1\n", + "2024-04-16 17:19:53,334 - -------------------------------------------------\n", + "2024-04-16 17:19:53,335 - Starting established connection to inproc://10.64.4.32/16652/1\n", + "2024-04-16 17:19:53,337 - Receive client connection: Client-649c4fdd-fbfc-11ee-810c-b42e99a00ea1\n", + "2024-04-16 17:19:53,338 - Starting established connection to inproc://10.64.4.32/16652/6\n", + "2024-04-16 17:19:53,340 - LinK Dask Server - http://10.64.4.32:56716/status\n", + "2024-04-16 17:19:53,341 - Initialising solver\n", + "2024-04-16 17:19:53,411 - LaggedTransformationImplementation - Window size of lagged transformation was changed by WindowSizeSelector from 0 to 162\n", + "2024-04-16 17:19:53,446 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.5 MiB, max: 1.0 MiB\n", + "2024-04-16 17:19:53,448 - ApiComposer - Initial pipeline was fitted in 0.1 sec.\n", + "2024-04-16 17:19:53,449 - AssumptionsHandler - Preset was changed to best_quality due to fit time estimation for initial model.\n", + "2024-04-16 17:19:53,459 - ApiComposer - AutoML configured. Parameters tuning: False. Time limit: 5 min. Set of candidate models: ['ar', 'stl_arima', 'ets', 'cgru', 'glm', 'lagged', 'sparse_lagged', 'smoothing', 'gaussian_filter', 'ridge', 'lasso', 'eigen_basis', 'wavelet_basis', 'fourier_basis'].\n", + "2024-04-16 17:19:53,487 - ApiComposer - Pipeline composition started.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rmsemedian_absolute_errorsmape
049.41638.5810.265
\n" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto_metrics['eigen_ar']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "import numpy as np\n", + "border = 100\n", + "baseline = labels\n", + "ridge = auto_labels['lagged_ridge']\n", + "eigen = auto_labels['eigen_ar']\n", + "cgru = auto_labels['cgru']\n", + "plt.close()\n", + "real_values = train_data\n", + "if len(real_values) > border:\n", + " real_values = real_values[-border:]\n", + "indicies = np.arange(real_values.shape[0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel('Time index')\n", + "plt.ylabel('Series values')\n", + "plt.title('Forecating by Industrial')\n", + "plt.plot(real_values)\n", + "\n", + "real_last_value = real_values[-horizon- 1]\n", + "plt.plot(indicies[-horizon- 1:],\n", + " np.insert(baseline, 0, real_last_value), label='Finetune')\n", + "plt.plot(indicies[-horizon- 1:],\n", + " np.insert(eigen, 0, real_last_value), label='Eigen_AR')\n", + "\n", + "plt.grid()\n", + "plt.legend()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Compare with State of Art (SOTA) models" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py index 83bf295f4..8b86a7d50 100644 --- a/fedot_ind/api/main.py +++ b/fedot_ind/api/main.py @@ -12,9 +12,6 @@ from fedot.core.pipelines.pipeline import Pipeline from golem.core.optimisers.opt_history_objects.opt_history import OptHistory - -from fedot.core.pipelines.adapters import PipelineAdapter -from fedot.core.utils import fedot_project_root from fedot.core.visualisation.pipeline_specific_visuals import PipelineHistoryVisualizer from fedot_ind.api.utils.checkers_collections import DataCheck from fedot_ind.api.utils.industrial_strategy import IndustrialStrategy @@ -107,6 +104,7 @@ def __init__(self, **kwargs): self.predicted_probs = None self.predict_data = None self.target_encoder = None + self.is_finetuned = False # map Fedot params to Industrial params self.config_dict = kwargs @@ -177,6 +175,7 @@ def fit(self, self.solver = self.industrial_strategy_class.fit(self.train_data) else: self.solver.fit(self.train_data) + self.is_finetuned = False def predict(self, predict_data: tuple, @@ -196,7 +195,7 @@ def predict(self, self.predict_data = DataCheck(input_data=self.predict_data, task=self.config_dict['problem'], task_params=self.task_params).check_input_data() - if self.industrial_strategy is not None: + if self.industrial_strategy is not None and not self.is_finetuned: self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode) else: if self.condition_check.solver_is_fedot_class(self.solver): @@ -234,7 +233,7 @@ def predict_proba(self, self.predict_data = DataCheck(input_data=self.predict_data, task=self.config_dict['problem'], task_params=self.task_params).check_input_data() - if self.industrial_strategy is not None: + if self.industrial_strategy is not None and not self.is_finetuned: self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode) else: if self.condition_check.solver_is_fedot_class(self.solver): @@ -263,7 +262,9 @@ def finetune(self, """ if not self.condition_check.input_data_is_fedot_type(train_data): - input_preproc = DataCheck(input_data=train_data, task=self.config_dict['problem']) + input_preproc = DataCheck(input_data=train_data, + task=self.config_dict['problem'], + task_params=self.task_params) train_data = input_preproc.check_input_data() self.target_encoder = input_preproc.get_target_encoder() tuning_params = ApiConverter.tuning_params_is_none(tuning_params) @@ -275,12 +276,13 @@ def finetune(self, elif not self.condition_check.solver_is_none(model_to_tune): model_to_tune = model_to_tune else: - model_to_tune = deepcopy(self.config_dict['initial_assumption']) + model_to_tune = deepcopy(self.config_dict['initial_assumption']).build() tuning_params['tuner'] = tuner_type pipeline_tuner, model_to_tune = build_tuner(self, model_to_tune, tuning_params, train_data, mode) if abs(pipeline_tuner.obtained_metric) > tuned_metric: tuned_metric = abs(pipeline_tuner.obtained_metric) self.solver = model_to_tune + self.is_finetuned = True def get_metrics(self, target: Union[list, np.array] = None, diff --git a/fedot_ind/core/operation/interfaces/industrial_model_strategy.py b/fedot_ind/core/operation/interfaces/industrial_model_strategy.py index 759aaf942..1e394ed82 100644 --- a/fedot_ind/core/operation/interfaces/industrial_model_strategy.py +++ b/fedot_ind/core/operation/interfaces/industrial_model_strategy.py @@ -157,10 +157,11 @@ def _create_channel_params(self, train_data): family, link = self._check_glm_params(kurtosis(train_data.features), skew(train_data.features)) self.multi_dim_dispatcher.params_for_fit = {'family': family, 'link': link} + return train_data def fit(self, train_data: InputData): train_data = self.multi_dim_dispatcher._convert_input_data(train_data) - self._create_channel_params(train_data) + train_data = self._create_channel_params(train_data) return self.multi_dim_dispatcher.fit(train_data) def predict(self, trained_operation, predict_data: InputData, output_mode: str = 'labels') -> OutputData: diff --git a/fedot_ind/core/repository/constanst_repository.py b/fedot_ind/core/repository/constanst_repository.py index c55ad2c02..3e3471cc5 100644 --- a/fedot_ind/core/repository/constanst_repository.py +++ b/fedot_ind/core/repository/constanst_repository.py @@ -254,7 +254,10 @@ class FedotOperationConstant(Enum): 'classification': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node( 'logit'), 'regression': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('treg'), - 'ts_forecasting': PipelineBuilder().add_node('ar') + 'ts_forecasting': PipelineBuilder().add_node('eigen_basis', + params={'low_rank_approximation': False, + 'rank_regularization': 'explained_dispersion'}).add_node( + 'ar') } FEDOT_TS_FORECASTING_ASSUMPTIONS = { @@ -262,8 +265,8 @@ class FedotOperationConstant(Enum): 'eigen_ar': PipelineBuilder().add_node('eigen_basis', params={'low_rank_approximation': False, 'rank_regularization': 'explained_dispersion'}).add_node('ar'), - 'glm': PipelineBuilder().add_node('glm') - } + 'cgru': PipelineBuilder().add_node("lagged").add_node('cgru', params={'loss': 'mse', + 'optimizer': 'adamw'})} FEDOT_ENSEMBLE_ASSUMPTIONS = { 'classification': PipelineBuilder().add_node('logit'), diff --git a/fedot_ind/core/repository/industrial_implementations/abstract.py b/fedot_ind/core/repository/industrial_implementations/abstract.py index ec8afbf87..2a8ac2c78 100644 --- a/fedot_ind/core/repository/industrial_implementations/abstract.py +++ b/fedot_ind/core/repository/industrial_implementations/abstract.py @@ -235,6 +235,33 @@ def transform_smoothing(self, input_data: InputData) -> OutputData: return output_data +def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_length: int): + """ Method check if the length of the time series is not enough for + lagged transformation + + Args: + time_series: time series for transformation + forecast_length: forecast length + + Returns: + + """ + max_allowed_window_size = max(1, round((len(time_series) - forecast_length - 1) * 0.25)) + window_list = list(range(3 * forecast_length, max_allowed_window_size, round(1.5 * forecast_length))) + + if self.window_size == 0 or self.window_size > max_allowed_window_size: + window_size = np.random.choice(window_list) + self.log.message((f"Window size of lagged transformation was changed " + f"by WindowSizeSelector from {self.params.get('window_size')} to {window_size}")) + self.params.update(window_size=window_size) + + # Minimum threshold + if self.window_size < self.window_size_minimum: + self.log.info((f"Warning: window size of lagged transformation was changed " + f"from {self.params.get('window_size')} to {self.window_size_minimum}")) + self.params.update(window_size=self.window_size_minimum) + + def transform_lagged_for_fit(self, input_data: InputData) -> OutputData: """Method for transformation of time series to lagged form for fit stage @@ -249,7 +276,10 @@ def transform_lagged_for_fit(self, input_data: InputData) -> OutputData: forecast_length = new_input_data.task.task_params.forecast_length # Correct window size parameter self._check_and_correct_window_size(new_input_data.features, forecast_length) - window_size = 3*forecast_length + window_list = list(range(3 * forecast_length, + round(input_data.features.shape[0] * 0.25), + round(1.5 * forecast_length))) + window_size = np.random.choice(window_list) new_idx, transformed_cols, new_target = transform_features_and_target_into_lagged( input_data, forecast_length, diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 55e2e58de..4b46df7a1 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -17,7 +17,7 @@ from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.repository.industrial_implementations.abstract import merge_predicts, preprocess_predicts, \ predict_for_fit, predict, predict_operation, postprocess_predicts, update_column_types, transform_lagged, \ - transform_lagged_for_fit, transform_smoothing, _build, split_any + transform_lagged_for_fit, transform_smoothing, _build, split_any, _check_and_correct_window_size from fedot_ind.core.repository.industrial_implementations.optimisation import _get_default_industrial_mutations, \ MutationStrengthEnumIndustrial, has_no_data_flow_conflicts_in_industrial_pipeline, _crossover_by_type from fedot_ind.core.tuning.search_space import get_industrial_search_space @@ -79,6 +79,8 @@ def setup_repository(self): setattr(LaggedImplementation, 'transform', transform_lagged) setattr(LaggedImplementation, 'transform_for_fit', transform_lagged_for_fit) + setattr(LaggedImplementation, '_check_and_correct_window_size', + _check_and_correct_window_size) setattr(TsSmoothingImplementation, 'transform', transform_smoothing) class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index 4706c4e2c..38c7faf4d 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -519,116 +519,6 @@ def get_industrial_search_space(self): 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']], 'type': 'categorical'} }, - 'fast_ica': { - 'n_components': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 20], - 'type': 'discrete'}, - 'fun': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['logcosh', 'exp', 'cube']], - 'type': 'categorical'} - }, - 'ransac_lin_reg': { - 'min_samples': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.9], - 'type': 'continuous'}, - 'residual_threshold': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.1, 1000], - 'type': 'continuous'}, - 'max_trials': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [50, 500], - 'type': 'continuous'}, - 'max_skips': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [50, 500000], - 'type': 'continuous'} - }, - 'ransac_non_lin_reg': { - 'min_samples': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.9], - 'type': 'continuous'}, - 'residual_threshold': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.1, 1000], - 'type': 'continuous'}, - 'max_trials': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [50, 500], - 'type': 'continuous'}, - 'max_skips': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [50, 500000], - 'type': 'continuous'} - }, - 'isolation_forest_reg': { - 'max_samples': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 0.99], - 'type': 'continuous'}, - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 0.99], - 'type': 'continuous'}, - 'bootstrap': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, - 'isolation_forest_class': { - 'max_samples': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 0.99], - 'type': 'continuous'}, - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 0.99], - 'type': 'continuous'}, - 'bootstrap': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, - 'rfe_lin_reg': { - 'n_features_to_select': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.5, 0.9], - 'type': 'continuous'}, - 'step': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.2], - 'type': 'continuous'} - }, - 'rfe_non_lin_reg': { - 'n_features_to_select': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.5, 0.9], - 'type': 'continuous'}, - 'step': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.2], - 'type': 'continuous'} - }, - 'poly_features': { - 'degree': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 5], - 'type': 'discrete'}, - 'interaction_only': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, - 'polyfit': { - 'degree': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 6], - 'type': 'discrete'} - }, 'lagged': { 'window_size': { 'hyperopt-dist': hp.uniformint,