Skip to content

Commit

Permalink
examples updates part 2
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Apr 9, 2024
1 parent 787135d commit 27b42cd
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 112 deletions.
Original file line number Diff line number Diff line change
@@ -1,31 +1,21 @@
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot_ind.tools.example_utils import industrial_common_modelling_loop

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.tools.loader import DataLoader

if __name__ == "__main__":
dataset_name = 'Handwriting'
finetune = True
initial_assumption = PipelineBuilder().add_node('channel_filtration').\
initial_assumption = PipelineBuilder().add_node('channel_filtration'). \
add_node('quantile_extractor').add_node('rf')
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
api_config = dict(problem='classification',
metric='f1',
timeout=5,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=20)

industrial = FedotIndustrial(problem='classification',
metric='f1',
timeout=5,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=20)

train_data, test_data = DataLoader(dataset_name=dataset_name).load_data()
if finetune:
model = industrial.finetune(train_data)
else:
model = industrial.fit(train_data)

labels = industrial.predict(test_data)
probs = industrial.predict_proba(test_data)
metrics = industrial.get_metrics(target=test_data[1],
rounding_order=3,
metric_names=['f1', 'accuracy', 'precision', 'roc_auc'])
model, labels, metrics = industrial_common_modelling_loop(api_config=api_config,
dataset_name=dataset_name,
finetune=finetune)
print(metrics)
_ = 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import pandas as pd

from fedot_ind.api.utils.path_lib import PROJECT_PATH

forecast_result_path = PROJECT_PATH + '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/'


def read_results(forecast_result_path):
results = os.listdir(forecast_result_path)
df_forecast = []
df_metrics = []
for file in results:
df = pd.read_csv(f'{forecast_result_path}/{file}')
name = file.split('_')[0]
df['dataset_name'] = name
if file.__contains__('forecast'):
df_forecast.append(df)
else:
df_metrics.append(df)
return df_forecast, df_metrics


def create_comprasion_df(df, metric: str = 'rmse'):
df_full = pd.concat(df)
df_full = df_full[df_full['Unnamed: 0'] == metric]
df_full = df_full .drop('Unnamed: 0', axis=1)
df_full['Difference_industrial'] = (df_full.iloc[:, 1:2].min(axis=1) - df_full['industrial'])
df_full['industrial_Wins'] = df_full.apply(lambda row: 'Win' if row.loc['Difference_industrial'] > 0 else 'Loose',
axis=1)
return df_full


if __name__ == "__main__":
for metric in ['rmse', 'smape']:
df_forecast, df_metrics = read_results(forecast_result_path)
df_comprasion = create_comprasion_df(df_metrics, metric)
print(df_comprasion['industrial_Wins'].value_counts())
Original file line number Diff line number Diff line change
@@ -1,69 +1,61 @@
import os

import pandas as pd
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric
from fedot_ind.core.repository.constanst_repository import M4_FORECASTING_BENCH
from fedot_ind.tools.loader import DataLoader
from fedot_ind.tools.example_utils import industrial_forecasting_modelling_loop, compare_forecast_with_sota

if __name__ == "__main__":

#dataset_name = 'D1317'
forecast_result_path = os.listdir(PROJECT_PATH +
'/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/')
forecast_result_path = set([x.split('_')[0] for x in forecast_result_path])
forecast_col = ['industrial', 'target', 'AG', 'NBEATS']
metric_col = ['industrial', 'AG', 'NBEATS']
benchmark = 'M4'
horizon = 14
finetune = False
for dataset_name in M4_FORECASTING_BENCH:
try:
autogluon = PROJECT_PATH + f'/benchmark/results/benchmark_results/autogluon/' \
f'{dataset_name}_{horizon}_forecast_vs_actual.csv'
n_beats = PROJECT_PATH + f'/benchmark/results/benchmark_results/nbeats/' \
f'{dataset_name}_{horizon}_forecast_vs_actual.csv'
n_beats = pd.read_csv(n_beats)
autogluon = pd.read_csv(autogluon)

n_beats_forecast = calculate_forecasting_metric(target=n_beats['value'].values,
labels=n_beats['predict'].values)
autogluon_forecast = calculate_forecasting_metric(target=autogluon['value'].values,
labels=autogluon['predict'].values)

initial_assumption = PipelineBuilder().add_node('eigen_basis',
params={'low_rank_approximation': False,
'rank_regularization': 'explained_dispersion'}).add_node(
'ar')
industrial = FedotIndustrial(problem='ts_forecasting',
metric='rmse',
task_params={'forecast_length': horizon},
timeout=5,
with_tuning=False,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=30)

train_data, _ = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark)

if finetune:
model = industrial.finetune(train_data)
else:
model = industrial.fit(train_data)

labels = industrial.predict(train_data)
metrics = industrial.get_metrics(target=train_data.values[-horizon:].flatten(),
metric_names=('smape', 'rmse', 'median_absolute_error'))
industrial.save_best_model()
forecast = pd.DataFrame([labels,
train_data.values[-horizon:].flatten(),
autogluon['predict'].values,
n_beats['predict'].values]).T
forecast.columns = ['industrial', 'target',
'AG',
'NBEATS']
metrics_comprasion = pd.concat([metrics, autogluon_forecast, n_beats_forecast]).T
metrics_comprasion.columns = ['industrial',
'AG',
'NBEATS']
forecast.to_csv(f'./{dataset_name}_forecast.csv')
metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv')
except Exception:
print(f'Skip {dataset_name}')
initial_assumption = PipelineBuilder().add_node('eigen_basis',
params={'low_rank_approximation': False,
'rank_regularization': 'explained_dispersion'}).add_node(
'ar')
api_config = dict(problem='ts_forecasting',
metric='rmse',
task_params={'forecast_length': horizon},
timeout=5,
with_tuning=False,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=30)

for dataset_name in M4_FORECASTING_BENCH:
if dataset_name in forecast_result_path:
print('Already evaluated')
else:
try:
n_beats_forecast, n_beats_metrics, \
autogluon_forecast, autogluon_metrics = compare_forecast_with_sota(dataset_name=dataset_name,
horizon=horizon)
model, labels, metrics, target = industrial_forecasting_modelling_loop(dataset_name=dataset_name,
benchmark=benchmark,
horizon=horizon,
api_config=api_config,
finetune=finetune)

forecast = pd.DataFrame([labels,
target,
n_beats_forecast,
autogluon_forecast]).T
forecast.columns = forecast_col

metrics_comprasion = pd.concat([metrics,
autogluon_forecast,
n_beats_forecast]).T
metrics_comprasion.columns = metric_col

forecast.to_csv(f'./{dataset_name}_forecast.csv')
metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv')

except Exception as ex:
print(f'Skip {dataset_name}. Reason - {ex}')
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.tools.loader import DataLoader
from fedot_ind.tools.example_utils import industrial_common_modelling_loop

if __name__ == "__main__":
dataset_name = 'IEEEPPG' #BeijingPM10Quality
dataset_name = 'IEEEPPG' # BeijingPM10Quality
finetune = True
initial_assumption = PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('treg')

industrial = FedotIndustrial(problem='regression',
metric='rmse',
timeout=5,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=20)

train_data, test_data = DataLoader(dataset_name=dataset_name).load_data()
if finetune:
model = industrial.finetune(train_data)
else:
model = industrial.fit(train_data)

labels = industrial.predict(test_data)
metrics = industrial.get_metrics(target=test_data[1],
rounding_order=3,
metric_names=('r2', 'rmse', 'mae'))
initial_assumption = PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node(
'treg')
api_config = dict(problem='regression',
metric='rmse',
timeout=5,
initial_assumption=initial_assumption,
n_jobs=2,
logging_level=20)
metric_names = ('r2', 'rmse', 'mae')
model, labels, metrics = industrial_common_modelling_loop(api_config=api_config,
dataset_name=dataset_name,
finetune=finetune)
print(metrics)

7 changes: 3 additions & 4 deletions fedot_ind/core/operation/transformation/basis/eigen_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,9 @@ def mode_func(x):

number_of_dim = list(range(data.shape[1]))
if len(number_of_dim) == 1:
try:
svd_numbers = [self._transform_one_sample(signal, svd_flag=True) for signal in data[:, 0, :]]
except Exception:
_ = 1
svd_numbers = [self._transform_one_sample(signal, svd_flag=True) for signal in data[:, 0, :]]
if len(svd_numbers) == 0:
raise ValueError('Error in spectrum calculation')
else:
for dimension in number_of_dim:
dimension_rank = []
Expand Down
1 change: 1 addition & 0 deletions fedot_ind/core/optimizer/IndustrialEvoOptimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self,

super().__init__(objective, initial_graphs, requirements,
graph_generation_params, graph_optimizer_params)
self.operators.remove(self.crossover)
self.eval_dispatcher = IndustrialDispatcher(adapter=graph_generation_params.adapter,
n_jobs=requirements.n_jobs,
graph_cleanup_fn=_try_unfit_graph,
Expand Down
2 changes: 1 addition & 1 deletion fedot_ind/core/repository/initializer_industrial_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setup_repository(self):
get_industrial_search_space)
setattr(ApiParamsRepository, "_get_default_mutations",
_get_default_industrial_mutations)
setattr(Crossover, '_crossover_by_type', _crossover_by_type)
#setattr(Crossover, '_crossover_by_type', _crossover_by_type)
## replace data merger
setattr(ImageDataMerger, "preprocess_predicts", preprocess_predicts)
setattr(ImageDataMerger, "merge_predicts", merge_predicts)
Expand Down
61 changes: 57 additions & 4 deletions fedot_ind/tools/example_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import random
from pathlib import Path
from fedot_ind.api.main import FedotIndustrial
from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric

from fedot_ind.tools.loader import DataLoader

import pandas as pd
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from sklearn.metrics import f1_score, roc_auc_score
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.settings.computational import backend_methods as np
Expand All @@ -28,3 +28,56 @@ def evaluate_metric(target, prediction):
metric = f1_score(target, np.argmax(
prediction, axis=1), average='weighted')
return metric


def compare_forecast_with_sota(dataset_name, horizon):
autogluon = PROJECT_PATH + f'/benchmark/results/benchmark_results/autogluon/' \
f'{dataset_name}_{horizon}_forecast_vs_actual.csv'
n_beats = PROJECT_PATH + f'/benchmark/results/benchmark_results/nbeats/' \
f'{dataset_name}_{horizon}_forecast_vs_actual.csv'
n_beats = pd.read_csv(n_beats)
autogluon = pd.read_csv(autogluon)

n_beats_forecast = calculate_forecasting_metric(target=n_beats['value'].values,
labels=n_beats['predict'].values)
autogluon_forecast = calculate_forecasting_metric(target=autogluon['value'].values,
labels=autogluon['predict'].values)
return n_beats['predict'].values,n_beats_forecast, autogluon['predict'].values, autogluon_forecast


def industrial_forecasting_modelling_loop(dataset_name: str = None,
benchmark: str = None,
horizon: int = 1,
finetune: bool = False,
api_config: dict = None):
industrial = FedotIndustrial(**api_config)
train_data, _ = DataLoader(dataset_name=dataset_name).load_forecast_data(folder=benchmark)
target = train_data.values[-horizon:].flatten()
if finetune:
model = industrial.finetune(train_data)
else:
model = industrial.fit(train_data)

labels = industrial.predict(train_data)
metrics = industrial.get_metrics(target=target,
metric_names=('smape', 'rmse', 'median_absolute_error'))
return model, labels, metrics, target


def industrial_common_modelling_loop(dataset_name: str = None,
finetune: bool = False,
api_config: dict = None,
metric_names: tuple = ('r2', 'rmse', 'mae')):
industrial = FedotIndustrial(**api_config)

train_data, test_data = DataLoader(dataset_name=dataset_name).load_data()
if finetune:
model = industrial.finetune(train_data)
else:
model = industrial.fit(train_data)

labels = industrial.predict(test_data)
metrics = industrial.get_metrics(target=test_data[1],
rounding_order=3,
metric_names=metric_names)
return model, labels, metrics

0 comments on commit 27b42cd

Please sign in to comment.