-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ts forecasting with decomposition (#81)
SSA method for ts forecasting was implemented
- Loading branch information
1 parent
9047c31
commit 2e0df00
Showing
39 changed files
with
786,071 additions
and
43,580 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import random | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from fedot.core.composer.metrics import smape | ||
from fedot.core.data.data import InputData | ||
from fedot.core.data.data_split import train_test_data_setup | ||
from fedot.core.pipelines.pipeline_builder import PipelineBuilder | ||
from fedot.core.repository.dataset_types import DataTypesEnum | ||
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams | ||
from matplotlib import pyplot as plt | ||
|
||
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels | ||
|
||
datasets = { | ||
'm4_yearly': f'../data/ts/M4YearlyTest.csv', | ||
'm4_weekly': f'../data/ts/M4WeeklyTest.csv', | ||
'm4_daily': f'../data/ts/M4DailyTest.csv', | ||
'm4_monthly': f'../data/ts/M4MonthlyTest.csv', | ||
'm4_quarterly': f'../data/ts/M4QuarterlyTest.csv'} | ||
|
||
|
||
def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None): | ||
time_series = pd.read_csv(datasets[dataset]) | ||
|
||
task = Task(TaskTypesEnum.ts_forecasting, | ||
TsForecastingParams(forecast_length=horizon)) | ||
if not m4_id: | ||
label = random.choice(np.unique(time_series['label'])) | ||
else: | ||
label = m4_id | ||
print(label) | ||
time_series = time_series[time_series['label'] == label] | ||
|
||
if dataset not in ['australia']: | ||
idx = pd.to_datetime(time_series['idx'].values) | ||
else: | ||
# non datetime indexes | ||
idx = time_series['idx'].values | ||
|
||
time_series = time_series['value'].values | ||
train_input = InputData(idx=idx, | ||
features=time_series, | ||
target=time_series, | ||
task=task, | ||
data_type=DataTypesEnum.ts) | ||
train_data, test_data = train_test_data_setup(train_input) | ||
return train_data, test_data, label | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
forecast_length = 13 | ||
|
||
train_data, test_data, label = get_ts_data('m4_monthly', forecast_length) | ||
|
||
with IndustrialModels(): | ||
pipeline = PipelineBuilder().add_node('data_driven_basis_for_forecasting', | ||
params={'window_size': int(len(train_data.features) * 0.35)} | ||
).build() | ||
pipeline.fit(train_data) | ||
ssa_predict = np.ravel(pipeline.predict(test_data).predict) | ||
|
||
baseline = PipelineBuilder().add_node('ar').build() | ||
baseline.fit(train_data) | ||
no_ssa = np.ravel(baseline.predict(test_data).predict) | ||
|
||
plt.title(label) | ||
plt.plot(train_data.idx, test_data.features, label='features') | ||
plt.plot(test_data.idx, test_data.target, label='target') | ||
plt.plot(test_data.idx, ssa_predict, label='predicted ssa') | ||
plt.plot(test_data.idx, no_ssa, label='predicted baseline') | ||
plt.grid() | ||
plt.legend() | ||
plt.show() | ||
|
||
print(f"SSA smape: {smape(test_data.target, ssa_predict)}") | ||
print(f"no SSA smape: {smape(test_data.target, no_ssa)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
148 changes: 148 additions & 0 deletions
148
fedot_ind/core/architecture/experiment/TimeSeriesForecasingWithDecomposition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
import logging | ||
from typing import List, Union | ||
from typing import Optional | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from fedot.api.main import Fedot | ||
from fedot.core.data.data import InputData | ||
from fedot.core.operations.operation_parameters import OperationParameters | ||
from fedot.core.pipelines.node import PipelineNode | ||
from fedot.core.pipelines.pipeline import Pipeline | ||
from fedot.core.pipelines.pipeline_builder import PipelineBuilder | ||
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder | ||
from fedot.core.repository.dataset_types import DataTypesEnum | ||
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum | ||
from fedot.core.repository.tasks import Task, TaskTypesEnum | ||
from golem.core.tuning.simultaneous import SimultaneousTuner | ||
|
||
from fedot_ind.api.utils.path_lib import default_path_to_save_results | ||
from fedot_ind.core.metrics.evaluation import PerformanceAnalyzer | ||
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels | ||
|
||
np.random.seed(0) | ||
|
||
|
||
class TimeSeriesForecastingWithDecompositionPreset: | ||
""". | ||
""" | ||
|
||
def __init__(self, params: Optional[OperationParameters] = None): | ||
self.test_data_preprocessed = None | ||
|
||
self.model_params = params.get('model_params') | ||
self.dataset_name = params.get('dataset') | ||
self.output_dir = params.get('output_dir', default_path_to_save_results()) | ||
|
||
self.logger = logging.getLogger('TimeSeriesForecastingWithDecomposition') | ||
|
||
self.prediction_label = None | ||
self.predictor = None | ||
self.y_train = None | ||
self.train_features = None | ||
self.test_features = None | ||
self.input_test_data = None | ||
|
||
self.logger.info('initialised') | ||
|
||
# TODO: put some datatype | ||
# TODO: add multidata option | ||
def _init_input_data(self, X, y): | ||
input_data = InputData(idx=np.arange(len(X)), | ||
features=X.values, | ||
target=y.values, | ||
task=Task(TaskTypesEnum.ts_forecasting), data_type=DataTypesEnum.table) | ||
|
||
# Multidata option | ||
|
||
# train_data = InputData(idx=np.arange(len(train_data[0])), | ||
# features=np.array(train_data[0].values.tolist()), | ||
# target=train_data[1].reshape(-1, 1), | ||
# task=Task(TaskTypesEnum.classification), data_type=DataTypesEnum.image) | ||
|
||
return input_data | ||
|
||
def _build_pipeline(self): | ||
pipeline_builder = PipelineBuilder().add_node('data_driven_basis_for_forecasting').add_node( | ||
'series_reconstruction').build() | ||
return pipeline_builder.build() | ||
|
||
def _tune_pipeline(self, pipeline: Pipeline, train_data: InputData): | ||
pipeline_tuner = TunerBuilder(train_data.task) \ | ||
.with_tuner(SimultaneousTuner) \ | ||
.with_metric(ClassificationMetricsEnum.f1) \ | ||
.with_iterations(30) \ | ||
.build(train_data) | ||
pipeline = pipeline_tuner.tune(pipeline) | ||
return pipeline | ||
|
||
def fit(self, train_ts_frame, | ||
train_target: np.ndarray = None, | ||
**kwargs) -> object: | ||
|
||
with IndustrialModels(): | ||
self.train_data = self._init_input_data(train_ts_frame, train_target) | ||
self.prerpocessing_pipeline = self._build_pipeline() | ||
self.prerpocessing_pipeline = self._tune_pipeline(self.prerpocessing_pipeline, | ||
self.train_data) | ||
self.prerpocessing_pipeline.fit(self.train_data) | ||
|
||
rf_node = self.prerpocessing_pipeline.nodes[0] | ||
self.prerpocessing_pipeline.update_node(rf_node, PipelineNode('cat_features')) | ||
rf_node.nodes_from = [] | ||
rf_node.unfit() | ||
self.prerpocessing_pipeline.fit(self.train_data) | ||
|
||
train_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(self.train_data) | ||
train_data_preprocessed.predict = np.squeeze(train_data_preprocessed.predict) | ||
|
||
train_data_preprocessed = InputData(idx=train_data_preprocessed.idx, | ||
features=train_data_preprocessed.predict, | ||
target=train_data_preprocessed.target, | ||
data_type=train_data_preprocessed.data_type, | ||
task=train_data_preprocessed.task) | ||
|
||
metric = 'roc_auc' if train_data_preprocessed.num_classes == 2 else 'f1' | ||
self.model_params.update({'metric': metric}) | ||
self.predictor = Fedot(**self.model_params) | ||
|
||
self.predictor.fit(train_data_preprocessed) | ||
|
||
return self.predictor | ||
|
||
def predict(self, test_features, test_target) -> dict: | ||
if self.test_data_preprocessed is None: | ||
test_data = self._init_input_data(test_features, test_target) | ||
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data) | ||
test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict) | ||
self.test_data_preprocessed = InputData(idx=test_data_preprocessed.idx, | ||
features=test_data_preprocessed.predict, | ||
target=test_data_preprocessed.target, | ||
data_type=test_data_preprocessed.data_type, | ||
task=test_data_preprocessed.task) | ||
|
||
self.prediction_label = self.predictor.predict(self.test_data_preprocessed) | ||
return self.prediction_label | ||
|
||
def predict_proba(self, test_features, test_target) -> dict: | ||
if self.test_data_preprocessed is None: | ||
test_data = self._init_input_data(test_features, test_target) | ||
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data) | ||
self.test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict) | ||
|
||
self.prediction_proba = self.predictor.predict_proba(self.test_data_preprocessed) | ||
return self.prediction_proba | ||
|
||
def get_metrics(self, target: Union[np.ndarray, pd.Series], metric_names: Union[str, List[str]]): | ||
analyzer = PerformanceAnalyzer() | ||
return analyzer.calculate_metrics(target=target, | ||
predicted_labels=self.prediction_label, | ||
predicted_probs=self.prediction_proba, | ||
target_metrics=metric_names) | ||
|
||
def save_prediction(self, predicted_data: np.ndarray, kind: str): | ||
self.saver.save(predicted_data, kind) | ||
|
||
def save_metrics(self, metrics: dict): | ||
self.saver.save(metrics, 'metrics') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.