Skip to content

Commit

Permalink
add exog forecasting
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Apr 18, 2024
1 parent fbd5e89 commit 2926a5e
Show file tree
Hide file tree
Showing 11 changed files with 782 additions and 284 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
import pandas as pd

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.api.utils.path_lib import PROJECT_PATH

if __name__ == "__main__":
dataset_name = PROJECT_PATH + '/examples/data/forecasting\monash_benchmark\MonashBitcoin_30.csv'
horizon = 30
metric_names = ('smape', 'rmse', 'median_absolute_error')

train_data = pd.read_csv(dataset_name)
variables = train_data['label'].unique().tolist()
exog_var = ['send_usd', 'market_cap', 'median_transaction_value', 'google_trends']
exog_ts = np.vstack([train_data[train_data['label'] == var]['value'].values for var in exog_var])
exog_ts = exog_ts[0, :]
ts = train_data[train_data['label'] == 'price']['value'].values
target = ts[-horizon:].flatten()
input_data = (ts, target)

api_config = dict(problem='ts_forecasting',
metric='rmse',
timeout=15,
with_tuning=False,
pop_size=10,
industrial_strategy_params={'exog_variable': exog_ts},
task_params={'forecast_length': horizon},
industrial_strategy='forecasting_exogenous',
n_jobs=2,
logging_level=30)
industrial = FedotIndustrial(**api_config)
industrial.fit(input_data)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def fit(self,
"""
self.train_data = deepcopy(input_data) # we do not want to make inplace changes
input_preproc = DataCheck(input_data=self.train_data, task=self.config_dict['problem'],
task_params=self.task_params)
task_params=self.task_params, industrial_task_params=self.industrial_strategy_params)
self.train_data = input_preproc.check_input_data()
self.target_encoder = input_preproc.get_target_encoder()
self.__init_solver()
Expand Down Expand Up @@ -207,7 +207,8 @@ def predict(self,
self.predict_data = deepcopy(predict_data) # we do not want to make inplace changes
self.predict_data = DataCheck(input_data=self.predict_data,
task=self.config_dict['problem'],
task_params=self.task_params).check_input_data()
task_params=self.task_params,
industrial_task_params=self.industrial_strategy_params).check_input_data()
if self.industrial_strategy is not None and not self.is_finetuned:
self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode)
else:
Expand Down Expand Up @@ -245,7 +246,8 @@ def predict_proba(self,
predict_data) # we do not want to make inplace changes
self.predict_data = DataCheck(input_data=self.predict_data,
task=self.config_dict['problem'],
task_params=self.task_params).check_input_data()
task_params=self.task_params,
industrial_task_params=self.industrial_strategy_params).check_input_data()
if self.industrial_strategy is not None and not self.is_finetuned:
self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode)
else:
Expand Down
9 changes: 6 additions & 3 deletions fedot_ind/api/utils/checkers_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ class DataCheck:
def __init__(self,
input_data: Union[tuple, InputData] = None,
task: str = None,
task_params=None):
task_params=None,
industrial_task_params = None):
self.logger = logging.getLogger(self.__class__.__name__)
self.industrial_task_params = industrial_task_params
self.input_data = input_data
self.data_convertor = DataConverter(data=self.input_data)
self.task = task
Expand Down Expand Up @@ -98,8 +100,9 @@ def _init_input_data(self) -> None:
features_array = self.data_convertor.convert_to_1d_array()
task = Task(TaskTypesEnum.ts_forecasting,
TsForecastingParams(forecast_length=self.task_params['forecast_length']))
features_array = features_array[:-self.task_params['forecast_length']]
target = features_array[-self.task_params['forecast_length']:]
if self.industrial_task_params is None:
features_array = features_array[:-self.task_params['forecast_length']]
target = features_array[-self.task_params['forecast_length']:]
self.input_data = InputData.from_numpy_time_series(
features_array=features_array,
target_array=target,
Expand Down
45 changes: 42 additions & 3 deletions fedot_ind/api/utils/industrial_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@

import numpy as np
from fedot import Fedot
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum

from fedot_ind.core.ensemble.kernel_ensemble import KernelEnsembler
from fedot_ind.core.ensemble.random_automl_forest import RAFensembler
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \
FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_TUNING_METRICS, FEDOT_TUNER_STRATEGY, FEDOT_TS_FORECASTING_ASSUMPTIONS
FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_TUNING_METRICS, FEDOT_TUNER_STRATEGY, FEDOT_TS_FORECASTING_ASSUMPTIONS, \
FEDOT_TASK
from fedot_ind.core.repository.industrial_implementations.abstract import build_tuner
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

Expand All @@ -21,10 +27,13 @@ def __init__(self, industrial_strategy_params,
self.industrial_strategy = industrial_strategy
self.industrial_strategy_fit = {'federated_automl': self._federated_strategy,
'kernel_automl': self._kernel_strategy,
'forecasting_assumptions': self._forecasting_strategy}
'forecasting_assumptions': self._forecasting_strategy,
'forecasting_exogenous': self._forecasting_exogenous_strategy
}
self.industrial_strategy_predict = {'federated_automl': self._federated_predict,
'kernel_automl': self._kernel_predict,
'forecasting_assumptions': self._forecasting_predict}
'forecasting_assumptions': self._forecasting_predict,
'forecasting_exogenous': self._forecasting_predict}
self.config_dict = api_config
self.logger = logger
self.repo = IndustrialModels().setup_repository()
Expand Down Expand Up @@ -66,6 +75,36 @@ def _forecasting_strategy(self, input_data):
industrial.fit(input_data)
self.solver.update({model_name: industrial})

def _forecasting_exogenous_strategy(self, input_data):
self.logger.info('TS exogenous forecasting algorithm was applied')
self.solver = {}
init_assumption = PipelineBuilder().add_node('lagged', 0)
task = FEDOT_TASK[self.config_dict['problem']]
train_lagged, predict_lagged = train_test_data_setup(InputData(idx=np.arange(len(input_data.features)),
features=input_data.features,
target=input_data.features,
task=task,
data_type=DataTypesEnum.ts), 2)
dataset_dict = {'lagged': train_lagged}
exog_variable = self.industrial_strategy_params['exog_variable']
init_assumption.add_node('exog_ts', 1)

# Exogenous time series
train_exog, predict_exog = train_test_data_setup(InputData(idx=np.arange(len(exog_variable)),
features=exog_variable,
target=input_data.features,
task=task,
data_type=DataTypesEnum.ts), 2)
dataset_dict.update({f'exog_ts': train_exog})

train_dataset = MultiModalData(dataset_dict)
init_assumption = init_assumption.join_branches('ridge')
self.config_dict['initial_assumption'] = init_assumption.build()

industrial = Fedot(**self.config_dict)
industrial.fit(train_dataset)
self.solver = {'exog_model': industrial}

def _finetune_loop(self,
kernel_ensemble: dict,
kernel_data: dict,
Expand Down
4 changes: 3 additions & 1 deletion fedot_ind/core/repository/model_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
'one_hot_encoding': OneHotEncodingImplementation,
'label_encoding': LabelEncodingImplementation
},
'FORECASTING_PREPROC': {'exog_ts': ExogDataTransformationImplementation},
'INDUSTRIAL_PREPROC_MODEL': {
'cat_features': DummyOperation,
'dimension_reduction': FeatureFilter,
Expand Down Expand Up @@ -187,7 +188,8 @@ class AtomizedModel(Enum):
'lagged': LaggedTransformationImplementation,
'sparse_lagged': SparseLaggedTransformationImplementation,
'smoothing': TsSmoothingImplementation,
'gaussian_filter': GaussianFilterImplementation
'gaussian_filter': GaussianFilterImplementation,
'exog_ts': ExogDataTransformationImplementation,
}

NEURAL_MODEL = {
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,5 @@ hyperopt~=0.2.7
statsmodels~=0.14.1
xgboost~=2.0.3
seaborn~=0.13.2
datasets
librosa
pywt

0 comments on commit 2926a5e

Please sign in to comment.