From a8658f827d4bde2837be300c2bf9e95d07972a9a Mon Sep 17 00:00:00 2001 From: VK-notebook <119884857+PvtKaefsky@users.noreply.github.com> Date: Mon, 20 May 2024 18:27:38 +0300 Subject: [PATCH] Markov AR init based on Fedot AR implemenation --- .../core/models/ts_forecasting/markov_ar.py | 162 ++++++++++++++++++ .../data/industrial_model_repository.json | 11 ++ fedot_ind/core/repository/model_repository.py | 4 +- 3 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 fedot_ind/core/models/ts_forecasting/markov_ar.py diff --git a/fedot_ind/core/models/ts_forecasting/markov_ar.py b/fedot_ind/core/models/ts_forecasting/markov_ar.py new file mode 100644 index 000000000..f9e17c3e5 --- /dev/null +++ b/fedot_ind/core/models/ts_forecasting/markov_ar.py @@ -0,0 +1,162 @@ +from copy import copy + +import numpy as np +import statsmodels.api as sm +from statsmodels.tsa.ar_model import AutoReg +from statsmodels.tsa.exponential_smoothing.ets import ETSModel + +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.data_operations.ts_transformations import ts_to_table +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ModelImplementation +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.dataset_types import DataTypesEnum + + +class MarkovAR(ModelImplementation): + + def __init__(self, params: OperationParameters): + super().__init__(params) + self.autoreg = None + self.actual_ts_len = None + + def fit(self, input_data): + """ Class fit ar model on data + + :param input_data: data with features, target and ids to process + """ + + source_ts = np.array(input_data.features) + self.actual_ts_len = len(source_ts) + + self.autoreg = sm.tsa.MarkovAutoregression(source_ts, k_regimes=2, order=4, switching_ar=False).fit() + self.actual_ts_len = input_data.idx.shape[0] + + return self.autoreg + + def predict(self, input_data): + """ Method for time series prediction on forecast length + + :param input_data: data with features, target and ids to process + :return output_data: output data with smoothed time series + """ + input_data = copy(input_data) + parameters = input_data.task.task_params + forecast_length = parameters.forecast_length + + # in case in(out) sample forecasting + self.handle_new_data(input_data) + start_id = self.actual_ts_len + end_id = start_id + forecast_length - 1 + predicted = self.autoreg.predict(start=start_id, end=end_id) + predict = np.array(predicted).reshape(1, -1) + + output_data = self._convert_to_output(input_data, + predict=predict, + data_type=DataTypesEnum.table) + return output_data + + def predict_for_fit(self, input_data: InputData) -> OutputData: + input_data = copy(input_data) + parameters = input_data.task.task_params + forecast_length = parameters.forecast_length + idx = input_data.idx + target = input_data.target + predicted = self.autoreg.predict(start=idx[0], end=idx[-1]) + # adding nan to target as in predicted + nan_mask = np.isnan(predicted) + target = target.astype(float) + target = target[~nan_mask] + idx = idx[~nan_mask] + predicted = predicted[~nan_mask] + new_idx, predict = ts_to_table(idx=idx, + time_series=predicted, + window_size=forecast_length) + _, target_columns = ts_to_table(idx=idx, + time_series=target, + window_size=forecast_length) + input_data.idx = new_idx + input_data.target = target_columns + output_data = self._convert_to_output(input_data, + predict=predict, + data_type=DataTypesEnum.table) + return output_data + + def handle_new_data(self, input_data: InputData): + """ + Method to update x samples inside a model (used when we want to use old model to a new data) + + :param input_data: new input_data + """ + if input_data.idx[0] > self.actual_ts_len: + self.autoreg.model.endog = input_data.features[-self.actual_ts_len:] + self.autoreg.model._setup_regressors() + + +class ExpSmoothingImplementation(ModelImplementation): + """ Exponential smoothing implementation from statsmodels """ + + def __init__(self, params: OperationParameters): + super().__init__(params) + self.model = None + if self.params.get("seasonal"): + self.seasonal_periods = int(self.params.get("seasonal_periods")) + else: + self.seasonal_periods = None + + def fit(self, input_data): + self.model = ETSModel( + input_data.features.astype("float64"), + error=self.params.get("error"), + trend=self.params.get("trend"), + seasonal=self.params.get("seasonal"), + damped_trend=self.params.get("damped_trend") if self.params.get("trend") else None, + seasonal_periods=self.seasonal_periods + ) + self.model = self.model.fit(disp=False) + return self.model + + def predict(self, input_data): + input_data = copy(input_data) + idx = input_data.idx + + start_id = idx[0] + end_id = idx[-1] + predictions = self.model.predict(start=start_id, + end=end_id) + predict = predictions + predict = np.array(predict).reshape(1, -1) + new_idx = np.arange(start_id, end_id + 1) + + input_data.idx = new_idx + + output_data = self._convert_to_output(input_data, + predict=predict, + data_type=DataTypesEnum.table) + return output_data + + def predict_for_fit(self, input_data: InputData) -> OutputData: + input_data = copy(input_data) + parameters = input_data.task.task_params + forecast_length = parameters.forecast_length + idx = input_data.idx + target = input_data.target + + # Indexing for statsmodels is different + start_id = idx[0] + end_id = idx[-1] + predictions = self.model.predict(start=start_id, + end=end_id) + _, predict = ts_to_table(idx=idx, + time_series=predictions, + window_size=forecast_length) + new_idx, target_columns = ts_to_table(idx=idx, + time_series=target, + window_size=forecast_length) + + input_data.idx = new_idx + input_data.target = target_columns + + output_data = self._convert_to_output(input_data, + predict=predict, + data_type=DataTypesEnum.table) + return output_data \ No newline at end of file diff --git a/fedot_ind/core/repository/data/industrial_model_repository.json b/fedot_ind/core/repository/data/industrial_model_repository.json index 9c4ee72f9..faf731095 100644 --- a/fedot_ind/core/repository/data/industrial_model_repository.json +++ b/fedot_ind/core/repository/data/industrial_model_repository.json @@ -311,6 +311,17 @@ ], "input_type": "[DataTypesEnum.ts]" }, + "markov_ar": { + "meta": "ts_model", + "presets": ["fast_train", "ts"], + "tags": [ + "simple", + "interpretable", + "non_lagged", + "linear" + ], + "input_type": "[DataTypesEnum.ts]" + }, "arima": { "meta": "ts_model", "presets": ["ts"], diff --git a/fedot_ind/core/repository/model_repository.py b/fedot_ind/core/repository/model_repository.py index b37ade244..b5ed890a6 100644 --- a/fedot_ind/core/repository/model_repository.py +++ b/fedot_ind/core/repository/model_repository.py @@ -53,6 +53,7 @@ from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor from fedot_ind.core.models.ts_forecasting.glm import GLMIndustrial +from fedot_ind.core.models.ts_forecasting.markov_ar import MarkovAR from fedot_ind.core.operation.dummy.dummy_operation import DummyOperation from fedot_ind.core.operation.filtration.channel_filtration import ChannelCentroidFilter from fedot_ind.core.operation.filtration.feature_filtration import FeatureFilter @@ -176,7 +177,8 @@ class AtomizedModel(Enum): 'stl_arima': STLForecastARIMAImplementation, 'ets': ExpSmoothingImplementation, 'cgru': CGRUImplementation, - 'glm': GLMIndustrial + 'glm': GLMIndustrial, + 'markov_ar': MarkovAR } FORECASTING_PREPROC = {