diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py index 3ed5a8a7d..6955712c8 100644 --- a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py +++ b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py @@ -1,38 +1,69 @@ +import pandas as pd from fedot.core.pipelines.pipeline_builder import PipelineBuilder from fedot_ind.api.main import FedotIndustrial +from fedot_ind.api.utils.path_lib import PROJECT_PATH +from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric +from fedot_ind.core.repository.constanst_repository import M4_FORECASTING_BENCH from fedot_ind.tools.loader import DataLoader if __name__ == "__main__": - dataset_name = 'D1317' + + #dataset_name = 'D1317' benchmark = 'M4' horizon = 14 finetune = False - initial_assumption = PipelineBuilder().add_node('eigen_basis', - params={'low_rank_approximation': False, - 'rank_regularization': 'explained_dispersion'}).add_node( - 'ar') - - industrial = FedotIndustrial(problem='ts_forecasting', - metric='rmse', - task_params={'forecast_length': horizon}, - timeout=5, - with_tuning=False, - initial_assumption=initial_assumption, - n_jobs=2, - logging_level=10) - - train_data, test_data = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark) - - if finetune: - model = industrial.finetune(train_data) - else: - model = industrial.fit(train_data) - - labels = industrial.predict(test_data) - probs = industrial.predict_proba(test_data) - metrics = industrial.get_metrics(target=test_data[1], - rounding_order=3, - metric_names=['f1', 'accuracy', 'precision', 'roc_auc']) - print(metrics) - _ = 1 + for dataset_name in M4_FORECASTING_BENCH: + try: + autogluon = PROJECT_PATH + f'/benchmark/results/benchmark_results/autogluon/' \ + f'{dataset_name}_{horizon}_forecast_vs_actual.csv' + n_beats = PROJECT_PATH + f'/benchmark/results/benchmark_results/nbeats/' \ + f'{dataset_name}_{horizon}_forecast_vs_actual.csv' + n_beats = pd.read_csv(n_beats) + autogluon = pd.read_csv(autogluon) + + n_beats_forecast = calculate_forecasting_metric(target=n_beats['value'].values, + labels=n_beats['predict'].values) + autogluon_forecast = calculate_forecasting_metric(target=autogluon['value'].values, + labels=autogluon['predict'].values) + + initial_assumption = PipelineBuilder().add_node('eigen_basis', + params={'low_rank_approximation': False, + 'rank_regularization': 'explained_dispersion'}).add_node( + 'ar') + industrial = FedotIndustrial(problem='ts_forecasting', + metric='rmse', + task_params={'forecast_length': horizon}, + timeout=5, + with_tuning=False, + initial_assumption=initial_assumption, + n_jobs=2, + logging_level=30) + + train_data, _ = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark) + + if finetune: + model = industrial.finetune(train_data) + else: + model = industrial.fit(train_data) + + labels = industrial.predict(train_data) + metrics = industrial.get_metrics(target=train_data.values[-horizon:].flatten(), + metric_names=('smape', 'rmse', 'median_absolute_error')) + industrial.save_best_model() + forecast = pd.DataFrame([labels, + train_data.values[-horizon:].flatten(), + autogluon['predict'].values, + n_beats['predict'].values]).T + forecast.columns = ['industrial', 'target', + 'AG', + 'NBEATS'] + metrics_comprasion = pd.concat([metrics, autogluon_forecast, n_beats_forecast]).T + metrics_comprasion.columns = ['industrial', + 'AG', + 'NBEATS'] + forecast.to_csv(f'./{dataset_name}_forecast.csv') + metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv') + except Exception: + print(f'Skip {dataset_name}') + diff --git a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py b/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py index 0d68d36f4..ed825ef12 100644 --- a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py +++ b/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py @@ -22,9 +22,8 @@ model = industrial.fit(train_data) labels = industrial.predict(test_data) - probs = industrial.predict_proba(test_data) metrics = industrial.get_metrics(target=test_data[1], rounding_order=3, metric_names=('r2', 'rmse', 'mae')) print(metrics) - _ = 1 + diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/requirements.txt b/examples/real_world_examples/industrial_examples/economic_analysis/requirements.txt deleted file mode 100644 index 1fe3f5d55..000000000 --- a/examples/real_world_examples/industrial_examples/economic_analysis/requirements.txt +++ /dev/null @@ -1,33 +0,0 @@ -fedot @ https://github.com/aimclub/FEDOT.git@fi_exp_prep - -# workaround for macos -catboost==1.1.1; sys_platform == 'darwin' - -giotto_tda==0.6.0 -hyperopt==0.2.7 -matplotlib>=3.3.1; python_version >= '3.8' -MKLpy==0.6 - -numpy>=1.16.0, !=1.24.0 -pandas>=1.3.0; python_version >='3.8' - -Pillow==10.2.0 -PyMonad==2.4.0 -PyWavelets==1.4.1 -PyYAML==6.0.1 - -ripser==0.6.4 - -scikit_learn>=1.0.0; python_version >= '3.8' - -scipy>=1.10.0 -sktime==0.16.1 -tensorly==0.8.1 -torch==2.0.0 -torchmetrics==0.11.4 -torchvision==0.15.1 -tensorboard>=2.12.0 -statsforecast==1.5.0 - -chardet -tqdm diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/__init__.py b/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/oil_gas_prices.ipynb b/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/oil_gas_prices.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/economic_analysis/oil_gas_prices.ipynb rename to examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/oil_gas_prices.ipynb diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/scoring_prediction.py b/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scoring_prediction.py similarity index 100% rename from examples/real_world_examples/industrial_examples/economic_analysis/scoring_prediction.py rename to examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scoring_prediction.py diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/scoring_train.csv b/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scoring_train.csv similarity index 100% rename from examples/real_world_examples/industrial_examples/economic_analysis/scoring_train.csv rename to examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scoring_train.csv diff --git a/examples/real_world_examples/industrial_examples/economic_analysis/scroing_prediction.ipynb b/examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scroing_prediction.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/economic_analysis/scroing_prediction.ipynb rename to examples/real_world_examples/industrial_examples/economic_analysis/ts_regression/scroing_prediction.ipynb diff --git a/examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/__init__.py b/examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_electricity.ipynb b/examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_electricity.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_electricity.ipynb rename to examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_electricity.ipynb diff --git a/examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_hotwater.ipynb b/examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_hotwater.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_hotwater.ipynb rename to examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_hotwater.ipynb diff --git a/examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_steam.ipynb b/examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_steam.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/energy_monitoring/building_energy_consumption_steam.ipynb rename to examples/real_world_examples/industrial_examples/energy_monitoring/ts_regression/building_energy_consumption_steam.ipynb diff --git a/examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/__init__.py b/examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/industrial_examples/sentiment_analysis/bitcoin_analysis.ipynb b/examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/bitcoin_analysis.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/sentiment_analysis/bitcoin_analysis.ipynb rename to examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/bitcoin_analysis.ipynb diff --git a/examples/real_world_examples/industrial_examples/sentiment_analysis/ethereum_analysis.ipynb b/examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/ethereum_analysis.ipynb similarity index 100% rename from examples/real_world_examples/industrial_examples/sentiment_analysis/ethereum_analysis.ipynb rename to examples/real_world_examples/industrial_examples/sentiment_analysis/ts_regression/ethereum_analysis.ipynb diff --git a/fedot_ind/core/metrics/metrics_implementation.py b/fedot_ind/core/metrics/metrics_implementation.py index d4a70723c..cc8c7f001 100644 --- a/fedot_ind/core/metrics/metrics_implementation.py +++ b/fedot_ind/core/metrics/metrics_implementation.py @@ -9,6 +9,9 @@ from sklearn.metrics import d2_absolute_error_score, explained_variance_score, max_error, median_absolute_error from fedot_ind.core.architecture.settings.computational import backend_methods as np +from sktime.performance_metrics.forecasting import mean_absolute_scaled_error + +import numpy as np class ParetoMetrics: @@ -70,8 +73,8 @@ def metric(self) -> float: class SMAPE(QualityMetric): def metric(self): return 1 / len(self.predicted_labels) \ - * np.sum(2 * np.abs(self.target - self.predicted_labels) / (np.abs(self.predicted_labels) - + np.abs(self.target)) * 100) + * np.sum(2 * np.abs(self.target - self.predicted_labels) / (np.abs(self.predicted_labels) + + np.abs(self.target)) * 100) class MSE(QualityMetric): @@ -167,6 +170,14 @@ def metric(self) -> float: return accuracy_score(y_true=self.target, y_pred=self.predicted_labels) +def MASE(A, F, y_train): + return mean_absolute_scaled_error(A, F, y_train=y_train) + + +def SMAPE(a, f, _=None): + return 1 / len(a) * np.sum(2 * np.abs(f - a) / (np.abs(a) + np.abs(f)) * 100) + + def calculate_regression_metric(target, labels, rounding_order=3, @@ -194,6 +205,30 @@ def rmse(y_true, y_pred): return df.round(rounding_order) +def calculate_forecasting_metric(target, + labels, + rounding_order=3, + metric_names=('smape', 'rmse', 'median_absolute_error'), + **kwargs): + target = target.astype(float) + + def rmse(y_true, y_pred): + return np.sqrt(mean_squared_error(y_true, y_pred)) + + metric_dict = { + 'rmse': rmse, + 'mae': mean_absolute_error, + 'median_absolute_error': median_absolute_error, + 'smape': SMAPE, + 'mase': MASE + } + + df = pd.DataFrame({name: func(target, labels) for name, func in metric_dict.items() + if name in metric_names}, + index=[0]) + return df.round(rounding_order) + + def calculate_classification_metric(target, labels, probs, diff --git a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py index cb24f9203..37001157b 100644 --- a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py +++ b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py @@ -324,6 +324,8 @@ def __init__(self, operation_type: str, params: Optional[OperationParameters] = params = IndustrialOperationParameters().from_params(operation_type, params) if params \ else IndustrialOperationParameters().from_operation_type(operation_type) super().__init__(operation_type, params) + self.multi_dim_dispatcher.concat_func = np.vstack + self.ensemble_func = np.sum def fit(self, train_data: InputData): train_data = self.multi_dim_dispatcher._convert_input_data(train_data) @@ -334,15 +336,23 @@ def predict(self, trained_operation, output_mode: str = 'default'): converted_predict_data = self.multi_dim_dispatcher._convert_input_data( predict_data) - return self.multi_dim_dispatcher.predict(trained_operation, converted_predict_data, output_mode=output_mode) + predict_output = self.multi_dim_dispatcher.predict(trained_operation, converted_predict_data, + output_mode=output_mode) + predict_output.predict = np.reshape(predict_output.predict, (len(trained_operation), -1)) + predict_output.predict = self.ensemble_func(predict_output.predict, axis=0) + return predict_output def predict_for_fit(self, trained_operation, predict_data: InputData, output_mode: str = 'default') -> OutputData: converted_predict_data = self.multi_dim_dispatcher._convert_input_data( predict_data) - return self.multi_dim_dispatcher.predict_for_fit(trained_operation, converted_predict_data, - output_mode=output_mode) + predict_output = self.multi_dim_dispatcher.predict_for_fit(trained_operation, + converted_predict_data, + output_mode=output_mode) + predict_output.predict = np.reshape(predict_output.predict, (len(trained_operation), -1)) + predict_output.predict = self.ensemble_func(predict_output.predict, axis=0) + return predict_output class IndustrialClassificationPreprocessingStrategy(IndustrialCustomPreprocessingStrategy): diff --git a/fedot_ind/core/repository/constanst_repository.py b/fedot_ind/core/repository/constanst_repository.py index 15a2b4439..688d28ed7 100644 --- a/fedot_ind/core/repository/constanst_repository.py +++ b/fedot_ind/core/repository/constanst_repository.py @@ -14,7 +14,8 @@ from torch import nn from golem.core.tuning.simultaneous import SimultaneousTuner from golem.core.tuning.sequential import SequentialTuner -from fedot_ind.core.metrics.metrics_implementation import calculate_classification_metric, calculate_regression_metric +from fedot_ind.core.metrics.metrics_implementation import calculate_classification_metric, calculate_regression_metric, \ + calculate_forecasting_metric from fedot_ind.core.models.nn.network_modules.losses import CenterLoss, CenterPlusLoss, ExpWeightedLoss, FocalLoss, \ HuberLoss, LogCoshLoss, MaskedLossWrapper, RMSELoss, SMAPELoss, TweedieLoss from fedot_ind.core.models.quantile.stat_features import autocorrelation, ben_corr, crest_factor, energy, \ @@ -193,8 +194,10 @@ class FedotOperationConstant(Enum): with_tuning=True ) FEDOT_GET_METRICS = {'regression': calculate_regression_metric, + 'ts_forecasting': calculate_forecasting_metric, 'classification': calculate_classification_metric} FEDOT_TUNING_METRICS = {'classification': ClassificationMetricsEnum.accuracy, + 'ts_forecasting': RegressionMetricsEnum.RMSE, 'regression': RegressionMetricsEnum.RMSE} FEDOT_TUNER_STRATEGY = { 'sequential': partial(SequentialTuner, inverse_node_order=True), @@ -229,7 +232,7 @@ class FedotOperationConstant(Enum): 'classification': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node( 'logit'), 'regression': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('treg'), - 'ts_forecasting': PipelineBuilder().add_node('ssa_forecaster') + 'ts_forecasting': PipelineBuilder().add_node('ar') } FEDOT_ENSEMBLE_ASSUMPTIONS = { diff --git a/fedot_ind/core/repository/data/industrial_data_operation_repository.json b/fedot_ind/core/repository/data/industrial_data_operation_repository.json index 8478907d5..efcb3f594 100644 --- a/fedot_ind/core/repository/data/industrial_data_operation_repository.json +++ b/fedot_ind/core/repository/data/industrial_data_operation_repository.json @@ -169,7 +169,9 @@ "fast_train" ], "tags": [ - "basis" + "basis", + "non_lagged", + "ts_to_ts" ] }, "wavelet_basis": { @@ -178,7 +180,7 @@ "fast_train" ], "tags": [ - "basis" + "basis","non_applicable_for_ts" ] }, "fourier_basis": { @@ -187,7 +189,7 @@ "fast_train" ], "tags": [ - "basis" + "basis","non_applicable_for_ts" ] }, "quantile_extractor": { @@ -205,7 +207,7 @@ "fast_train" ], "tags": [ - "extractor" + "extractor","non_applicable_for_ts" ] }, "recurrence_extractor": { @@ -214,7 +216,7 @@ "fast_train" ], "tags": [ - "extractor" + "extractor","non_applicable_for_ts" ] }, "topological_features": { @@ -231,7 +233,7 @@ "fast_train" ], "tags": [ - "extractor" + "extractor","non_applicable_for_ts" ] }, "chronos_extractor": { @@ -240,7 +242,7 @@ "fast_train" ], "tags": [ - "extractor" + "extractor","non_applicable_for_ts" ] }, "channel_filtration": { @@ -249,7 +251,7 @@ "fast_train" ], "tags": [ - "extractor" + "extractor","non_applicable_for_ts" ] }, "cat_features": { @@ -258,7 +260,7 @@ "fast_train" ], "tags": [ - "cat_features" + "cat_features","non_applicable_for_ts" ] }, "data_source_img": { diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 79cba4449..71d918d69 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -81,7 +81,7 @@ def setup_repository(self): transform_lagged_for_fit) setattr(TsSmoothingImplementation, 'transform', transform_smoothing) - # class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) + class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) MutationStrengthEnum = MutationStrengthEnumIndustrial return OperationTypesRepository @@ -108,7 +108,6 @@ def __enter__(self): get_industrial_search_space) setattr(ApiComposer, "_get_default_mutations", _get_default_industrial_mutations) - class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) def __exit__(self, exc_type, exc_val, exc_tb): """ diff --git a/fedot_ind/core/repository/model_repository.py b/fedot_ind/core/repository/model_repository.py index a638c66cd..eb6c8ae41 100644 --- a/fedot_ind/core/repository/model_repository.py +++ b/fedot_ind/core/repository/model_repository.py @@ -181,7 +181,7 @@ class AtomizedModel(Enum): 'ets': ExpSmoothingImplementation, 'cgru': CGRUImplementation, 'glm': GLMImplementation, - 'locf': RepeatLastValueImplementation, + #'locf': RepeatLastValueImplementation, #'ssa_forecaster': SSAForecasterImplementation } diff --git a/fedot_ind/tools/loader.py b/fedot_ind/tools/loader.py index fd8678ef9..97edee104 100644 --- a/fedot_ind/tools/loader.py +++ b/fedot_ind/tools/loader.py @@ -4,7 +4,7 @@ import urllib.request as request import zipfile from pathlib import Path - +from datasets import load_dataset import chardet import pandas as pd from datasetsforecast.m3 import M3 @@ -39,7 +39,8 @@ def __init__(self, dataset_name: str, folder: str = None): self.forecast_data_source = {'M3': M3.load, # 'M4': M4.load, 'M4': self.local_m4_load, - 'M5': M5.load + 'M5': M5.load, + 'monash_tsf': load_dataset } def load_forecast_data(self, folder=None):