Skip to content

Commit

Permalink
Ts forecasting with decomposition (#81)
Browse files Browse the repository at this point in the history
SSA method for ts forecasting was implemented
  • Loading branch information
valer1435 authored and technocreep committed Sep 29, 2023
1 parent 9047c31 commit 2e0df00
Show file tree
Hide file tree
Showing 39 changed files with 786,071 additions and 43,580 deletions.
510,082 changes: 510,082 additions & 0 deletions examples/data/ts/M4DailyTest.csv

Large diffs are not rendered by default.

46,989 changes: 46,989 additions & 0 deletions examples/data/ts/M4MonthlyTest.csv

Large diffs are not rendered by default.

19,814 changes: 19,814 additions & 0 deletions examples/data/ts/M4QuarterlyTest.csv

Large diffs are not rendered by default.

207,465 changes: 207,465 additions & 0 deletions examples/data/ts/M4WeeklyTest.csv

Large diffs are not rendered by default.

3,693 changes: 0 additions & 3,693 deletions examples/data/ts/M4Yearly.csv

This file was deleted.

35,041 changes: 0 additions & 35,041 deletions examples/data/ts/ts_long.csv

This file was deleted.

3,785 changes: 0 additions & 3,785 deletions examples/data/ts/ts_sea_level.csv

This file was deleted.

78 changes: 78 additions & 0 deletions examples/ts_forecasting/with_decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import random

import numpy as np
import pandas as pd
from fedot.core.composer.metrics import smape
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from matplotlib import pyplot as plt

from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

datasets = {
'm4_yearly': f'../data/ts/M4YearlyTest.csv',
'm4_weekly': f'../data/ts/M4WeeklyTest.csv',
'm4_daily': f'../data/ts/M4DailyTest.csv',
'm4_monthly': f'../data/ts/M4MonthlyTest.csv',
'm4_quarterly': f'../data/ts/M4QuarterlyTest.csv'}


def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None):
time_series = pd.read_csv(datasets[dataset])

task = Task(TaskTypesEnum.ts_forecasting,
TsForecastingParams(forecast_length=horizon))
if not m4_id:
label = random.choice(np.unique(time_series['label']))
else:
label = m4_id
print(label)
time_series = time_series[time_series['label'] == label]

if dataset not in ['australia']:
idx = pd.to_datetime(time_series['idx'].values)
else:
# non datetime indexes
idx = time_series['idx'].values

time_series = time_series['value'].values
train_input = InputData(idx=idx,
features=time_series,
target=time_series,
task=task,
data_type=DataTypesEnum.ts)
train_data, test_data = train_test_data_setup(train_input)
return train_data, test_data, label


if __name__ == '__main__':

forecast_length = 13

train_data, test_data, label = get_ts_data('m4_monthly', forecast_length)

with IndustrialModels():
pipeline = PipelineBuilder().add_node('data_driven_basis_for_forecasting',
params={'window_size': int(len(train_data.features) * 0.35)}
).build()
pipeline.fit(train_data)
ssa_predict = np.ravel(pipeline.predict(test_data).predict)

baseline = PipelineBuilder().add_node('ar').build()
baseline.fit(train_data)
no_ssa = np.ravel(baseline.predict(test_data).predict)

plt.title(label)
plt.plot(train_data.idx, test_data.features, label='features')
plt.plot(test_data.idx, test_data.target, label='target')
plt.plot(test_data.idx, ssa_predict, label='predicted ssa')
plt.plot(test_data.idx, no_ssa, label='predicted baseline')
plt.grid()
plt.legend()
plt.show()

print(f"SSA smape: {smape(test_data.target, ssa_predict)}")
print(f"no SSA smape: {smape(test_data.target, no_ssa)}")
3 changes: 3 additions & 0 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def __init_solver(self):
# solver = TaskEnum[self.config_dict['task']].value['nn']
else:
solver = TaskEnum[self.config_dict['task']].value['default']
elif self.config_dict['task'] == 'ts_forecasting':
if self.config_dict['strategy'] == 'decomposition':
solver = TaskEnum[self.config_dict['task']].value['fedot_preset']

else:
solver = TaskEnum[self.config_dict['task']].value[0]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import logging
from typing import List, Union
from typing import Optional

import numpy as np
import pandas as pd
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot_ind.api.utils.path_lib import default_path_to_save_results
from fedot_ind.core.metrics.evaluation import PerformanceAnalyzer
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

np.random.seed(0)


class TimeSeriesForecastingWithDecompositionPreset:
""".
"""

def __init__(self, params: Optional[OperationParameters] = None):
self.test_data_preprocessed = None

self.model_params = params.get('model_params')
self.dataset_name = params.get('dataset')
self.output_dir = params.get('output_dir', default_path_to_save_results())

self.logger = logging.getLogger('TimeSeriesForecastingWithDecomposition')

self.prediction_label = None
self.predictor = None
self.y_train = None
self.train_features = None
self.test_features = None
self.input_test_data = None

self.logger.info('initialised')

# TODO: put some datatype
# TODO: add multidata option
def _init_input_data(self, X, y):
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=y.values,
task=Task(TaskTypesEnum.ts_forecasting), data_type=DataTypesEnum.table)

# Multidata option

# train_data = InputData(idx=np.arange(len(train_data[0])),
# features=np.array(train_data[0].values.tolist()),
# target=train_data[1].reshape(-1, 1),
# task=Task(TaskTypesEnum.classification), data_type=DataTypesEnum.image)

return input_data

def _build_pipeline(self):
pipeline_builder = PipelineBuilder().add_node('data_driven_basis_for_forecasting').add_node(
'series_reconstruction').build()
return pipeline_builder.build()

def _tune_pipeline(self, pipeline: Pipeline, train_data: InputData):
pipeline_tuner = TunerBuilder(train_data.task) \
.with_tuner(SimultaneousTuner) \
.with_metric(ClassificationMetricsEnum.f1) \
.with_iterations(30) \
.build(train_data)
pipeline = pipeline_tuner.tune(pipeline)
return pipeline

def fit(self, train_ts_frame,
train_target: np.ndarray = None,
**kwargs) -> object:

with IndustrialModels():
self.train_data = self._init_input_data(train_ts_frame, train_target)
self.prerpocessing_pipeline = self._build_pipeline()
self.prerpocessing_pipeline = self._tune_pipeline(self.prerpocessing_pipeline,
self.train_data)
self.prerpocessing_pipeline.fit(self.train_data)

rf_node = self.prerpocessing_pipeline.nodes[0]
self.prerpocessing_pipeline.update_node(rf_node, PipelineNode('cat_features'))
rf_node.nodes_from = []
rf_node.unfit()
self.prerpocessing_pipeline.fit(self.train_data)

train_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(self.train_data)
train_data_preprocessed.predict = np.squeeze(train_data_preprocessed.predict)

train_data_preprocessed = InputData(idx=train_data_preprocessed.idx,
features=train_data_preprocessed.predict,
target=train_data_preprocessed.target,
data_type=train_data_preprocessed.data_type,
task=train_data_preprocessed.task)

metric = 'roc_auc' if train_data_preprocessed.num_classes == 2 else 'f1'
self.model_params.update({'metric': metric})
self.predictor = Fedot(**self.model_params)

self.predictor.fit(train_data_preprocessed)

return self.predictor

def predict(self, test_features, test_target) -> dict:
if self.test_data_preprocessed is None:
test_data = self._init_input_data(test_features, test_target)
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data)
test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict)
self.test_data_preprocessed = InputData(idx=test_data_preprocessed.idx,
features=test_data_preprocessed.predict,
target=test_data_preprocessed.target,
data_type=test_data_preprocessed.data_type,
task=test_data_preprocessed.task)

self.prediction_label = self.predictor.predict(self.test_data_preprocessed)
return self.prediction_label

def predict_proba(self, test_features, test_target) -> dict:
if self.test_data_preprocessed is None:
test_data = self._init_input_data(test_features, test_target)
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data)
self.test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict)

self.prediction_proba = self.predictor.predict_proba(self.test_data_preprocessed)
return self.prediction_proba

def get_metrics(self, target: Union[np.ndarray, pd.Series], metric_names: Union[str, List[str]]):
analyzer = PerformanceAnalyzer()
return analyzer.calculate_metrics(target=target,
predicted_labels=self.prediction_label,
predicted_probs=self.prediction_proba,
target_metrics=metric_names)

def save_prediction(self, predicted_data: np.ndarray, kind: str):
self.saver.save(predicted_data, kind)

def save_metrics(self, metrics: dict):
self.saver.save(metrics, 'metrics')
2 changes: 1 addition & 1 deletion fedot_ind/core/architecture/pipelines/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pymonad.either import Right
from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipelines
from fedot_ind.core.architecture.preprocessing.DatasetLoader import DataLoader
from fedot_ind.core.operation.transformation.basis.data_driven import DataDrivenBasisImplementation
from fedot_ind.core.operation.implementation.basis.data_driven import DataDrivenBasisImplementation
from functools import partial


Expand Down
5 changes: 3 additions & 2 deletions fedot_ind/core/architecture/settings/pipeline_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from fedot_ind.core.models.recurrence.RecurrenceExtractor import RecurrenceExtractor
from fedot_ind.core.models.signal.SignalExtractor import SignalExtractor
from fedot_ind.core.models.topological.TopologicalExtractor import TopologicalExtractor
from fedot_ind.core.operation.implementation.basis.fourier import FourierBasisImplementation
from fedot_ind.core.operation.implementation.basis.wavelet import WaveletBasisImplementation
from fedot_ind.core.operation.transformation.basis.data_driven import DataDrivenBasisImplementation
from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation
from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation



class BasisTransformations(Enum):
Expand Down
3 changes: 3 additions & 0 deletions fedot_ind/core/architecture/settings/task_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fedot_ind.core.architecture.experiment.TimeSeriesClassifierNN import TimeSeriesClassifierNN
from fedot_ind.core.architecture.experiment.TimeSeriesClassifierPreset import TimeSeriesClassifierPreset
from fedot_ind.core.architecture.experiment.TimeSeriesRegression import TimeSeriesRegression
from fedot_ind.core.architecture.experiment import TimeSeriesForecasingWithDecomposition
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble


Expand All @@ -24,3 +25,5 @@ class TaskEnum(Enum):
image_classification = (CVExperimenter,)
object_detection = (CVExperimenter,)
semantic_segmentation = (CVExperimenter,)


2 changes: 1 addition & 1 deletion fedot_ind/core/models/signal/SignalExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fedot_ind.core.metrics.metrics_implementation import *
from fedot_ind.core.models.WindowedFeaturesExtractor import WindowedFeatureExtractor
from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor
from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation
from fedot_ind.core.operation.implementation.basis.wavelet import WaveletBasisImplementation


class SignalExtractor(WindowedFeatureExtractor):
Expand Down
5 changes: 1 addition & 4 deletions fedot_ind/core/operation/IndustrialCachableOperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,5 @@ def transform(self, input_data: InputData, use_cache: bool = False) -> OutputDat
predict = self._convert_to_output(input_data, predict, data_type=self.data_type)
return predict

def _transform(self, input_data) -> np.array:
"""
Method for feature generation for all series
"""
def _transform(self, input_data):
pass
Loading

0 comments on commit 2e0df00

Please sign in to comment.