Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ts forecasting with decomposition #81

Merged
merged 21 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
510,082 changes: 510,082 additions & 0 deletions examples/data/ts/M4DailyTest.csv

Large diffs are not rendered by default.

46,989 changes: 46,989 additions & 0 deletions examples/data/ts/M4MonthlyTest.csv

Large diffs are not rendered by default.

19,814 changes: 19,814 additions & 0 deletions examples/data/ts/M4QuarterlyTest.csv

Large diffs are not rendered by default.

207,465 changes: 207,465 additions & 0 deletions examples/data/ts/M4WeeklyTest.csv

Large diffs are not rendered by default.

3,693 changes: 0 additions & 3,693 deletions examples/data/ts/M4Yearly.csv

This file was deleted.

35,041 changes: 0 additions & 35,041 deletions examples/data/ts/ts_long.csv

This file was deleted.

3,785 changes: 0 additions & 3,785 deletions examples/data/ts/ts_sea_level.csv

This file was deleted.

78 changes: 78 additions & 0 deletions examples/ts_forecasting/with_decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import random

import numpy as np
import pandas as pd
from fedot.core.composer.metrics import smape
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from matplotlib import pyplot as plt

from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

datasets = {
'm4_yearly': f'../data/ts/M4YearlyTest.csv',
'm4_weekly': f'../data/ts/M4WeeklyTest.csv',
'm4_daily': f'../data/ts/M4DailyTest.csv',
'm4_monthly': f'../data/ts/M4MonthlyTest.csv',
'm4_quarterly': f'../data/ts/M4QuarterlyTest.csv'}


def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None):
time_series = pd.read_csv(datasets[dataset])

task = Task(TaskTypesEnum.ts_forecasting,
TsForecastingParams(forecast_length=horizon))
if not m4_id:
label = random.choice(np.unique(time_series['label']))
else:
label = m4_id
print(label)
time_series = time_series[time_series['label'] == label]

if dataset not in ['australia']:
idx = pd.to_datetime(time_series['idx'].values)
else:
# non datetime indexes
idx = time_series['idx'].values

time_series = time_series['value'].values
train_input = InputData(idx=idx,
features=time_series,
target=time_series,
task=task,
data_type=DataTypesEnum.ts)
train_data, test_data = train_test_data_setup(train_input)
return train_data, test_data, label


if __name__ == '__main__':

forecast_length = 13

train_data, test_data, label = get_ts_data('m4_monthly', forecast_length)

with IndustrialModels():
pipeline = PipelineBuilder().add_node('data_driven_basis_for_forecasting',
params={'window_size': int(len(train_data.features) * 0.35)}
).build()
pipeline.fit(train_data)
ssa_predict = np.ravel(pipeline.predict(test_data).predict)

baseline = PipelineBuilder().add_node('ar').build()
baseline.fit(train_data)
no_ssa = np.ravel(baseline.predict(test_data).predict)

plt.title(label)
plt.plot(train_data.idx, test_data.features, label='features')
plt.plot(test_data.idx, test_data.target, label='target')
plt.plot(test_data.idx, ssa_predict, label='predicted ssa')
plt.plot(test_data.idx, no_ssa, label='predicted baseline')
plt.grid()
plt.legend()
plt.show()

print(f"SSA smape: {smape(test_data.target, ssa_predict)}")
print(f"no SSA smape: {smape(test_data.target, no_ssa)}")
3 changes: 3 additions & 0 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def __init_solver(self):
# solver = TaskEnum[self.config_dict['task']].value['nn']
else:
solver = TaskEnum[self.config_dict['task']].value['default']
elif self.config_dict['task'] == 'ts_forecasting':
if self.config_dict['strategy'] == 'decomposition':
solver = TaskEnum[self.config_dict['task']].value['fedot_preset']

else:
solver = TaskEnum[self.config_dict['task']].value[0]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import logging
from typing import List, Union
from typing import Optional

import numpy as np
import pandas as pd
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot_ind.api.utils.path_lib import default_path_to_save_results
from fedot_ind.core.metrics.evaluation import PerformanceAnalyzer
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

np.random.seed(0)


class TimeSeriesForecastingWithDecompositionPreset:
""".
"""

def __init__(self, params: Optional[OperationParameters] = None):
self.test_data_preprocessed = None

self.model_params = params.get('model_params')
self.dataset_name = params.get('dataset')
self.output_dir = params.get('output_dir', default_path_to_save_results())

self.logger = logging.getLogger('TimeSeriesForecastingWithDecomposition')

self.prediction_label = None
self.predictor = None
self.y_train = None
self.train_features = None
self.test_features = None
self.input_test_data = None

self.logger.info('initialised')

# TODO: put some datatype
# TODO: add multidata option
def _init_input_data(self, X, y):
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=y.values,
task=Task(TaskTypesEnum.ts_forecasting), data_type=DataTypesEnum.table)

# Multidata option

# train_data = InputData(idx=np.arange(len(train_data[0])),
# features=np.array(train_data[0].values.tolist()),
# target=train_data[1].reshape(-1, 1),
# task=Task(TaskTypesEnum.classification), data_type=DataTypesEnum.image)

return input_data

def _build_pipeline(self):
pipeline_builder = PipelineBuilder().add_node('data_driven_basis_for_forecasting').add_node(
'series_reconstruction').build()
return pipeline_builder.build()

def _tune_pipeline(self, pipeline: Pipeline, train_data: InputData):
pipeline_tuner = TunerBuilder(train_data.task) \
.with_tuner(SimultaneousTuner) \
.with_metric(ClassificationMetricsEnum.f1) \
.with_iterations(30) \
.build(train_data)
pipeline = pipeline_tuner.tune(pipeline)
return pipeline

def fit(self, train_ts_frame,
train_target: np.ndarray = None,
**kwargs) -> object:

with IndustrialModels():
self.train_data = self._init_input_data(train_ts_frame, train_target)
self.prerpocessing_pipeline = self._build_pipeline()
self.prerpocessing_pipeline = self._tune_pipeline(self.prerpocessing_pipeline,
self.train_data)
self.prerpocessing_pipeline.fit(self.train_data)

rf_node = self.prerpocessing_pipeline.nodes[0]
self.prerpocessing_pipeline.update_node(rf_node, PipelineNode('cat_features'))
rf_node.nodes_from = []
rf_node.unfit()
self.prerpocessing_pipeline.fit(self.train_data)

train_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(self.train_data)
train_data_preprocessed.predict = np.squeeze(train_data_preprocessed.predict)

train_data_preprocessed = InputData(idx=train_data_preprocessed.idx,
features=train_data_preprocessed.predict,
target=train_data_preprocessed.target,
data_type=train_data_preprocessed.data_type,
task=train_data_preprocessed.task)

metric = 'roc_auc' if train_data_preprocessed.num_classes == 2 else 'f1'
self.model_params.update({'metric': metric})
self.predictor = Fedot(**self.model_params)

self.predictor.fit(train_data_preprocessed)

return self.predictor

def predict(self, test_features, test_target) -> dict:
if self.test_data_preprocessed is None:
test_data = self._init_input_data(test_features, test_target)
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data)
test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict)
self.test_data_preprocessed = InputData(idx=test_data_preprocessed.idx,
features=test_data_preprocessed.predict,
target=test_data_preprocessed.target,
data_type=test_data_preprocessed.data_type,
task=test_data_preprocessed.task)

self.prediction_label = self.predictor.predict(self.test_data_preprocessed)
return self.prediction_label

def predict_proba(self, test_features, test_target) -> dict:
if self.test_data_preprocessed is None:
test_data = self._init_input_data(test_features, test_target)
test_data_preprocessed = self.prerpocessing_pipeline.root_node.predict(test_data)
self.test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict)

self.prediction_proba = self.predictor.predict_proba(self.test_data_preprocessed)
return self.prediction_proba

def get_metrics(self, target: Union[np.ndarray, pd.Series], metric_names: Union[str, List[str]]):
analyzer = PerformanceAnalyzer()
return analyzer.calculate_metrics(target=target,
predicted_labels=self.prediction_label,
predicted_probs=self.prediction_proba,
target_metrics=metric_names)

def save_prediction(self, predicted_data: np.ndarray, kind: str):
self.saver.save(predicted_data, kind)

def save_metrics(self, metrics: dict):
self.saver.save(metrics, 'metrics')
2 changes: 1 addition & 1 deletion fedot_ind/core/architecture/pipelines/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pymonad.either import Right
from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipelines
from fedot_ind.core.architecture.preprocessing.DatasetLoader import DataLoader
from fedot_ind.core.operation.transformation.basis.data_driven import DataDrivenBasisImplementation
from fedot_ind.core.operation.implementation.basis.data_driven import DataDrivenBasisImplementation
from functools import partial


Expand Down
5 changes: 3 additions & 2 deletions fedot_ind/core/architecture/settings/pipeline_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from fedot_ind.core.models.recurrence.RecurrenceExtractor import RecurrenceExtractor
from fedot_ind.core.models.signal.SignalExtractor import SignalExtractor
from fedot_ind.core.models.topological.TopologicalExtractor import TopologicalExtractor
from fedot_ind.core.operation.implementation.basis.fourier import FourierBasisImplementation
from fedot_ind.core.operation.implementation.basis.wavelet import WaveletBasisImplementation
from fedot_ind.core.operation.transformation.basis.data_driven import DataDrivenBasisImplementation
from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation
from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation



class BasisTransformations(Enum):
Expand Down
3 changes: 3 additions & 0 deletions fedot_ind/core/architecture/settings/task_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fedot_ind.core.architecture.experiment.TimeSeriesClassifierNN import TimeSeriesClassifierNN
from fedot_ind.core.architecture.experiment.TimeSeriesClassifierPreset import TimeSeriesClassifierPreset
from fedot_ind.core.architecture.experiment.TimeSeriesRegression import TimeSeriesRegression
from fedot_ind.core.architecture.experiment import TimeSeriesForecasingWithDecomposition
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble


Expand All @@ -24,3 +25,5 @@ class TaskEnum(Enum):
image_classification = (CVExperimenter,)
object_detection = (CVExperimenter,)
semantic_segmentation = (CVExperimenter,)


2 changes: 1 addition & 1 deletion fedot_ind/core/models/signal/SignalExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fedot_ind.core.metrics.metrics_implementation import *
from fedot_ind.core.models.WindowedFeaturesExtractor import WindowedFeatureExtractor
from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor
from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation
from fedot_ind.core.operation.implementation.basis.wavelet import WaveletBasisImplementation


class SignalExtractor(WindowedFeatureExtractor):
Expand Down
5 changes: 1 addition & 4 deletions fedot_ind/core/operation/IndustrialCachableOperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,5 @@ def transform(self, input_data: InputData, use_cache: bool = False) -> OutputDat
predict = self._convert_to_output(input_data, predict, data_type=self.data_type)
return predict

def _transform(self, input_data) -> np.array:
"""
Method for feature generation for all series
"""
def _transform(self, input_data):
pass
Loading