From 27b42cd0b0d3f1b916488f05e8ba47cc760ec2e8 Mon Sep 17 00:00:00 2001 From: v1docq Date: Tue, 9 Apr 2024 13:44:35 +0300 Subject: [PATCH] examples updates part 2 --- .../ts_classification_example.py | 34 ++---- .../ts_forecasting/forecasting_analysis.py | 38 ++++++ .../ts_forecasting/ts_forecasting_example.py | 108 ++++++++---------- .../ts_regression/ts_regression_example.py | 37 +++--- .../transformation/basis/eigen_basis.py | 7 +- .../core/optimizer/IndustrialEvoOptimizer.py | 1 + .../initializer_industrial_models.py | 2 +- fedot_ind/tools/example_utils.py | 61 +++++++++- 8 files changed, 176 insertions(+), 112 deletions(-) create mode 100644 examples/automl_example/api_example/time_series/ts_forecasting/forecasting_analysis.py diff --git a/examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py b/examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py index ca02ae164..495082016 100644 --- a/examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py +++ b/examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py @@ -1,31 +1,21 @@ from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot_ind.tools.example_utils import industrial_common_modelling_loop -from fedot_ind.api.main import FedotIndustrial -from fedot_ind.tools.loader import DataLoader if __name__ == "__main__": dataset_name = 'Handwriting' finetune = True - initial_assumption = PipelineBuilder().add_node('channel_filtration').\ + initial_assumption = PipelineBuilder().add_node('channel_filtration'). \ add_node('quantile_extractor').add_node('rf') + metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') + api_config = dict(problem='classification', + metric='f1', + timeout=5, + initial_assumption=initial_assumption, + n_jobs=2, + logging_level=20) - industrial = FedotIndustrial(problem='classification', - metric='f1', - timeout=5, - initial_assumption=initial_assumption, - n_jobs=2, - logging_level=20) - - train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() - if finetune: - model = industrial.finetune(train_data) - else: - model = industrial.fit(train_data) - - labels = industrial.predict(test_data) - probs = industrial.predict_proba(test_data) - metrics = industrial.get_metrics(target=test_data[1], - rounding_order=3, - metric_names=['f1', 'accuracy', 'precision', 'roc_auc']) + model, labels, metrics = industrial_common_modelling_loop(api_config=api_config, + dataset_name=dataset_name, + finetune=finetune) print(metrics) - _ = 1 diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/forecasting_analysis.py b/examples/automl_example/api_example/time_series/ts_forecasting/forecasting_analysis.py new file mode 100644 index 000000000..7e434eb99 --- /dev/null +++ b/examples/automl_example/api_example/time_series/ts_forecasting/forecasting_analysis.py @@ -0,0 +1,38 @@ +import os +import pandas as pd + +from fedot_ind.api.utils.path_lib import PROJECT_PATH + +forecast_result_path = PROJECT_PATH + '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/' + + +def read_results(forecast_result_path): + results = os.listdir(forecast_result_path) + df_forecast = [] + df_metrics = [] + for file in results: + df = pd.read_csv(f'{forecast_result_path}/{file}') + name = file.split('_')[0] + df['dataset_name'] = name + if file.__contains__('forecast'): + df_forecast.append(df) + else: + df_metrics.append(df) + return df_forecast, df_metrics + + +def create_comprasion_df(df, metric: str = 'rmse'): + df_full = pd.concat(df) + df_full = df_full[df_full['Unnamed: 0'] == metric] + df_full = df_full .drop('Unnamed: 0', axis=1) + df_full['Difference_industrial'] = (df_full.iloc[:, 1:2].min(axis=1) - df_full['industrial']) + df_full['industrial_Wins'] = df_full.apply(lambda row: 'Win' if row.loc['Difference_industrial'] > 0 else 'Loose', + axis=1) + return df_full + + +if __name__ == "__main__": + for metric in ['rmse', 'smape']: + df_forecast, df_metrics = read_results(forecast_result_path) + df_comprasion = create_comprasion_df(df_metrics, metric) + print(df_comprasion['industrial_Wins'].value_counts()) diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py index 6955712c8..368929af3 100644 --- a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py +++ b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py @@ -1,69 +1,61 @@ +import os + import pandas as pd from fedot.core.pipelines.pipeline_builder import PipelineBuilder -from fedot_ind.api.main import FedotIndustrial from fedot_ind.api.utils.path_lib import PROJECT_PATH -from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric from fedot_ind.core.repository.constanst_repository import M4_FORECASTING_BENCH -from fedot_ind.tools.loader import DataLoader +from fedot_ind.tools.example_utils import industrial_forecasting_modelling_loop, compare_forecast_with_sota if __name__ == "__main__": - - #dataset_name = 'D1317' + forecast_result_path = os.listdir(PROJECT_PATH + + '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/') + forecast_result_path = set([x.split('_')[0] for x in forecast_result_path]) + forecast_col = ['industrial', 'target', 'AG', 'NBEATS'] + metric_col = ['industrial', 'AG', 'NBEATS'] benchmark = 'M4' horizon = 14 finetune = False - for dataset_name in M4_FORECASTING_BENCH: - try: - autogluon = PROJECT_PATH + f'/benchmark/results/benchmark_results/autogluon/' \ - f'{dataset_name}_{horizon}_forecast_vs_actual.csv' - n_beats = PROJECT_PATH + f'/benchmark/results/benchmark_results/nbeats/' \ - f'{dataset_name}_{horizon}_forecast_vs_actual.csv' - n_beats = pd.read_csv(n_beats) - autogluon = pd.read_csv(autogluon) - - n_beats_forecast = calculate_forecasting_metric(target=n_beats['value'].values, - labels=n_beats['predict'].values) - autogluon_forecast = calculate_forecasting_metric(target=autogluon['value'].values, - labels=autogluon['predict'].values) - - initial_assumption = PipelineBuilder().add_node('eigen_basis', - params={'low_rank_approximation': False, - 'rank_regularization': 'explained_dispersion'}).add_node( - 'ar') - industrial = FedotIndustrial(problem='ts_forecasting', - metric='rmse', - task_params={'forecast_length': horizon}, - timeout=5, - with_tuning=False, - initial_assumption=initial_assumption, - n_jobs=2, - logging_level=30) - - train_data, _ = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark) - - if finetune: - model = industrial.finetune(train_data) - else: - model = industrial.fit(train_data) - - labels = industrial.predict(train_data) - metrics = industrial.get_metrics(target=train_data.values[-horizon:].flatten(), - metric_names=('smape', 'rmse', 'median_absolute_error')) - industrial.save_best_model() - forecast = pd.DataFrame([labels, - train_data.values[-horizon:].flatten(), - autogluon['predict'].values, - n_beats['predict'].values]).T - forecast.columns = ['industrial', 'target', - 'AG', - 'NBEATS'] - metrics_comprasion = pd.concat([metrics, autogluon_forecast, n_beats_forecast]).T - metrics_comprasion.columns = ['industrial', - 'AG', - 'NBEATS'] - forecast.to_csv(f'./{dataset_name}_forecast.csv') - metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv') - except Exception: - print(f'Skip {dataset_name}') + initial_assumption = PipelineBuilder().add_node('eigen_basis', + params={'low_rank_approximation': False, + 'rank_regularization': 'explained_dispersion'}).add_node( + 'ar') + api_config = dict(problem='ts_forecasting', + metric='rmse', + task_params={'forecast_length': horizon}, + timeout=5, + with_tuning=False, + initial_assumption=initial_assumption, + n_jobs=2, + logging_level=30) + for dataset_name in M4_FORECASTING_BENCH: + if dataset_name in forecast_result_path: + print('Already evaluated') + else: + try: + n_beats_forecast, n_beats_metrics, \ + autogluon_forecast, autogluon_metrics = compare_forecast_with_sota(dataset_name=dataset_name, + horizon=horizon) + model, labels, metrics, target = industrial_forecasting_modelling_loop(dataset_name=dataset_name, + benchmark=benchmark, + horizon=horizon, + api_config=api_config, + finetune=finetune) + + forecast = pd.DataFrame([labels, + target, + n_beats_forecast, + autogluon_forecast]).T + forecast.columns = forecast_col + + metrics_comprasion = pd.concat([metrics, + autogluon_forecast, + n_beats_forecast]).T + metrics_comprasion.columns = metric_col + + forecast.to_csv(f'./{dataset_name}_forecast.csv') + metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv') + + except Exception as ex: + print(f'Skip {dataset_name}. Reason - {ex}') diff --git a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py b/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py index ed825ef12..fa7839561 100644 --- a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py +++ b/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py @@ -1,29 +1,20 @@ from fedot.core.pipelines.pipeline_builder import PipelineBuilder -from fedot_ind.api.main import FedotIndustrial -from fedot_ind.tools.loader import DataLoader +from fedot_ind.tools.example_utils import industrial_common_modelling_loop if __name__ == "__main__": - dataset_name = 'IEEEPPG' #BeijingPM10Quality + dataset_name = 'IEEEPPG' # BeijingPM10Quality finetune = True - initial_assumption = PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('treg') - - industrial = FedotIndustrial(problem='regression', - metric='rmse', - timeout=5, - initial_assumption=initial_assumption, - n_jobs=2, - logging_level=20) - - train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() - if finetune: - model = industrial.finetune(train_data) - else: - model = industrial.fit(train_data) - - labels = industrial.predict(test_data) - metrics = industrial.get_metrics(target=test_data[1], - rounding_order=3, - metric_names=('r2', 'rmse', 'mae')) + initial_assumption = PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node( + 'treg') + api_config = dict(problem='regression', + metric='rmse', + timeout=5, + initial_assumption=initial_assumption, + n_jobs=2, + logging_level=20) + metric_names = ('r2', 'rmse', 'mae') + model, labels, metrics = industrial_common_modelling_loop(api_config=api_config, + dataset_name=dataset_name, + finetune=finetune) print(metrics) - diff --git a/fedot_ind/core/operation/transformation/basis/eigen_basis.py b/fedot_ind/core/operation/transformation/basis/eigen_basis.py index ab4e58821..3a707b40f 100644 --- a/fedot_ind/core/operation/transformation/basis/eigen_basis.py +++ b/fedot_ind/core/operation/transformation/basis/eigen_basis.py @@ -157,10 +157,9 @@ def mode_func(x): number_of_dim = list(range(data.shape[1])) if len(number_of_dim) == 1: - try: - svd_numbers = [self._transform_one_sample(signal, svd_flag=True) for signal in data[:, 0, :]] - except Exception: - _ = 1 + svd_numbers = [self._transform_one_sample(signal, svd_flag=True) for signal in data[:, 0, :]] + if len(svd_numbers) == 0: + raise ValueError('Error in spectrum calculation') else: for dimension in number_of_dim: dimension_rank = [] diff --git a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py index 8ee78e056..13941bc9e 100644 --- a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py +++ b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py @@ -25,6 +25,7 @@ def __init__(self, super().__init__(objective, initial_graphs, requirements, graph_generation_params, graph_optimizer_params) + self.operators.remove(self.crossover) self.eval_dispatcher = IndustrialDispatcher(adapter=graph_generation_params.adapter, n_jobs=requirements.n_jobs, graph_cleanup_fn=_try_unfit_graph, diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 71d918d69..55e2e58de 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -60,7 +60,7 @@ def setup_repository(self): get_industrial_search_space) setattr(ApiParamsRepository, "_get_default_mutations", _get_default_industrial_mutations) - setattr(Crossover, '_crossover_by_type', _crossover_by_type) + #setattr(Crossover, '_crossover_by_type', _crossover_by_type) ## replace data merger setattr(ImageDataMerger, "preprocess_predicts", preprocess_predicts) setattr(ImageDataMerger, "merge_predicts", merge_predicts) diff --git a/fedot_ind/tools/example_utils.py b/fedot_ind/tools/example_utils.py index e1369cadf..e69a8af60 100644 --- a/fedot_ind/tools/example_utils.py +++ b/fedot_ind/tools/example_utils.py @@ -1,11 +1,11 @@ import random from pathlib import Path +from fedot_ind.api.main import FedotIndustrial +from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric + +from fedot_ind.tools.loader import DataLoader import pandas as pd -from fedot.core.data.data import InputData -from fedot.core.data.data_split import train_test_data_setup -from fedot.core.repository.dataset_types import DataTypesEnum -from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from sklearn.metrics import f1_score, roc_auc_score from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.architecture.settings.computational import backend_methods as np @@ -28,3 +28,56 @@ def evaluate_metric(target, prediction): metric = f1_score(target, np.argmax( prediction, axis=1), average='weighted') return metric + + +def compare_forecast_with_sota(dataset_name, horizon): + autogluon = PROJECT_PATH + f'/benchmark/results/benchmark_results/autogluon/' \ + f'{dataset_name}_{horizon}_forecast_vs_actual.csv' + n_beats = PROJECT_PATH + f'/benchmark/results/benchmark_results/nbeats/' \ + f'{dataset_name}_{horizon}_forecast_vs_actual.csv' + n_beats = pd.read_csv(n_beats) + autogluon = pd.read_csv(autogluon) + + n_beats_forecast = calculate_forecasting_metric(target=n_beats['value'].values, + labels=n_beats['predict'].values) + autogluon_forecast = calculate_forecasting_metric(target=autogluon['value'].values, + labels=autogluon['predict'].values) + return n_beats['predict'].values,n_beats_forecast, autogluon['predict'].values, autogluon_forecast + + +def industrial_forecasting_modelling_loop(dataset_name: str = None, + benchmark: str = None, + horizon: int = 1, + finetune: bool = False, + api_config: dict = None): + industrial = FedotIndustrial(**api_config) + train_data, _ = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark) + target = train_data.values[-horizon:].flatten() + if finetune: + model = industrial.finetune(train_data) + else: + model = industrial.fit(train_data) + + labels = industrial.predict(train_data) + metrics = industrial.get_metrics(target=target, + metric_names=('smape', 'rmse', 'median_absolute_error')) + return model, labels, metrics, target + + +def industrial_common_modelling_loop(dataset_name: str = None, + finetune: bool = False, + api_config: dict = None, + metric_names: tuple = ('r2', 'rmse', 'mae')): + industrial = FedotIndustrial(**api_config) + + train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() + if finetune: + model = industrial.finetune(train_data) + else: + model = industrial.fit(train_data) + + labels = industrial.predict(test_data) + metrics = industrial.get_metrics(target=test_data[1], + rounding_order=3, + metric_names=metric_names) + return model, labels, metrics