Skip to content

Commit

Permalink
Release Fedot.Industrial 0.5. Anomaly detection, Forecasting,Sampling…
Browse files Browse the repository at this point in the history
…,Multimodal strategy. (#157)

* add random samping example, refactor anomaly detection
* add statistical detector for anomaly detection, example for anomaly_detection, update search_space, update interfaces
* refactor in monadic way
* refactor mutation during optimisation, refactor IndustrialDispatcher, refactor centroid channel filtration

---------

Co-authored-by: v1docq <[email protected]>
Co-authored-by: Lopa10ko <[email protected]>
Co-authored-by: autopep8 bot <[email protected]>
Co-authored-by: technocreep <[email protected]>
  • Loading branch information
5 people authored Jul 12, 2024
1 parent 7a4843c commit 6440157
Show file tree
Hide file tree
Showing 144 changed files with 60,779 additions and 9,425 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/poetry_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 30
timeout-minutes: 60
strategy:
matrix:
python-version: [3.9, '3.10']
Expand Down Expand Up @@ -38,6 +38,9 @@ jobs:
- name: Install dependencies
run: poetry install

- name: Bump up FEDOT to a stable revision (temporary)
run: poetry add git+https://github.com/aimclub/FEDOT.git@e0b4ee7

- name: Run tests with pytest
run: poetry run pytest --cov=fedot_ind --cov-report xml:coverage.xml tests/unit

Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.tools.example_utils import industrial_common_modelling_loop
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate

if __name__ == "__main__":
return_history = True
opt_hist = PROJECT_PATH + '/examples/data/forecasting/D1679_opt_history/'
dataset_name = 'Lightning7'
finetune = False
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
metric_names = ('f1', 'accuracy')
api_config = dict(problem='classification',
metric='f1',
timeout=5,
pop_size=10,
pop_size=5,
with_tuning=False,
n_jobs=2,
cv_folds=3,
n_jobs=-1,
logging_level=10)

industrial, labels, metrics = industrial_common_modelling_loop(
api_config=api_config, dataset_name=dataset_name, finetune=finetune)
if return_history:
opt_hist = industrial.save_optimization_history(return_history=True)
else:
# tutorial sample of opt history
opt_hist = PROJECT_PATH + '/examples/data/forecasting/D1679_opt_history/'
opt_hist = industrial.vis_optimisation_history(
result_dict = ApiTemplate(api_config=api_config,
metric_list=('f1', 'accuracy')).eval(dataset=dataset_name,
finetune=finetune)

opt_hist = result_dict['industrial_model'].save_optimization_history(return_history=True)
opt_hist = result_dict['industrial_model'].vis_optimisation_history(
opt_history_path=opt_hist, return_history=True)
Original file line number Diff line number Diff line change
@@ -1,40 +1,31 @@
import numpy as np
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from fedot_ind.api.utils.data import init_input_data
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.loader import DataLoader
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate

if __name__ == "__main__":
dataset_name = 'Earthquakes'
train_data, test_data = DataLoader(dataset_name=dataset_name).load_data()
input_train_data = init_input_data(train_data[0], train_data[1])
input_test_data = init_input_data(test_data[0], test_data[1])

metric_dict = {'accuracy': accuracy_score,
'f1': f1_score, 'roc_auc': roc_auc_score}
with IndustrialModels():
pipeline = PipelineBuilder().add_node(
'recurrence_extractor',
params={
'window_size': 30,
'stride': 5,
'image_mode': True}) .add_node(
'resnet_model',
params={
'epochs': 50,
'model_name': 'ResNet50one'}) .build()
pipeline.fit(input_train_data)
output = pipeline.predict(input_test_data)

predict = np.array(output.predict.flatten()) + 1

metrics = {'accuracy': accuracy_score(test_data[1], predict)}
if len(set(test_data[1])) > 2:
metrics['f1'] = f1_score(test_data[1], predict)
else:
metrics['roc_auc'] = roc_auc_score(test_data[1], predict)
dataset_name = 'Lightning7'
finetune = False
metric_names = ('f1', 'accuracy')
multimodal_pipeline = {'recurrence_extractor': {
'window_size': 30,
'stride': 5,
'image_mode': True},
'resnet_model': {
'epochs': 1,
'batch_size': 16,
'model_name': 'ResNet50'}}
explain_config = {'method': 'recurrence',
'samples': 1,
'metric': 'mean'}
api_config = dict(problem='classification',
metric='f1',
timeout=0.1,
pop_size=5,
with_tuning=False,
cv_folds=3,
n_jobs=-1,
logging_level=10)

print(metrics)
_ = 1
result_dict = ApiTemplate(api_config=api_config,
metric_list=('f1', 'accuracy')).eval(dataset=dataset_name,
finetune=finetune,
initial_assumption=multimodal_pipeline)
result_dict['industrial_model'].explain(explain_config)

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
timeout=0.1,
n_jobs=2,
industrial_strategy='federated_automl',
industrial_task_params={'industrial_task': 'classification',
'data_type': 'time_series'},
industrial_strategy_params={},
logging_level=20)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,50 @@
from fedot_ind.api.main import FedotIndustrial
from fedot_ind.tools.loader import DataLoader

dataset_name = 'Lightning7'
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
api_config = dict(problem='ts_forecasting',
metric='rmse',
timeout=15,
with_tuning=False,
industrial_strategy='forecasting_assumptions',
industrial_strategy_params={},
logging_level=20)
train_data, test_data = DataLoader(dataset_name).load_data()
industrial = FedotIndustrial(**api_config)
industrial.fit(train_data)
predict = industrial.predict(test_data)
_ = 1
import pickle

from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.repository.constanst_repository import M4_FORECASTING_BENCH

finetune = False


def forecasting_loop(dataset_dict, api_config):
metric_names = ('rmse', 'smape')
result_dict = ApiTemplate(api_config=api_config,
metric_list=metric_names).eval(dataset=dataset_dict,
finetune=finetune)

return result_dict


def evaluate_for_M4(type: str = 'M'):
dataset_list = [data for data in M4_FORECASTING_BENCH if data.__contains__(type)]
return dataset_list


if __name__ == "__main__":
bench = 'M4'
group = 'M'
forecast_params = {'forecast_length': 8}
horizon = forecast_params['forecast_length']
dataset_list = evaluate_for_M4(group)
api_config = dict(
problem='ts_forecasting',
metric='rmse',
timeout=5,
with_tuning=False,
industrial_strategy='forecasting_assumptions',
industrial_strategy_params={
'industrial_task': 'ts_forecasting',
'data_type': 'time_series'},
task_params=forecast_params,
logging_level=50)
result_dict = {}

for dataset_name in dataset_list:
dataset_dict = {'benchmark': bench,
'dataset': dataset_name,
'task_params': forecast_params}
result_dict = forecasting_loop(dataset_dict, api_config)
result_dict.update({dataset_name: result_dict})

with open(f'{bench}_{group}_forecast_length_{horizon}.pkl', 'wb') as f:
pickle.dump(result_dict, f)
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,27 @@
from fedot_ind.tools.loader import DataLoader

dataset_name = 'Lightning7'
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
api_config = dict(problem='classification',
metric='f1',
timeout=5,
n_jobs=2,
with_tuning=False,
industrial_strategy='kernel_automl',
industrial_strategy_params={},
logging_level=20)
train_data, test_data = DataLoader(dataset_name).load_data()
industrial = FedotIndustrial(**api_config)
industrial.fit(train_data)
predict = industrial.predict(test_data, 'ensemble')
predict_proba = industrial.predict_proba(test_data, 'ensemble')
metric = industrial.get_metrics(target=test_data[1],
metric_names=metric_names)
_ = 1
metric_names = ('f1', 'accuracy')
api_config = dict(
problem='classification',
metric='f1',
timeout=5,
n_jobs=2,
with_tuning=False,
industrial_strategy='kernel_automl',
industrial_strategy_params={
'industrial_task': 'classification',
'data_type': 'tensor',
'learning_strategy': 'all_classes',
'head_model': 'rf'
},
logging_level=20)

if __name__ == "__main__":
train_data, test_data = DataLoader(dataset_name).load_data()
industrial = FedotIndustrial(**api_config)
industrial.fit(train_data)
predict = industrial.predict(test_data, 'ensemble')
predict_proba = industrial.predict_proba(test_data, 'ensemble')
metric = industrial.get_metrics(target=test_data[1],
metric_names=metric_names)
Original file line number Diff line number Diff line change
@@ -1,76 +1,20 @@
import numpy as np
from fedot.core.pipelines.pipeline import Pipeline
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.api.utils.data import init_input_data
from fedot_ind.tools.loader import DataLoader


# sklearn-compatible interface
class SklearnCompatibleClassifier(BaseEstimator, ClassifierMixin):
"""Wrapper for FedotIndustrial to make it compatible with sklearn.
Args:
estimator (Pipeline): FedotIndustrial pipeline.
"""

def __init__(self, estimator: Pipeline):
self.estimator = estimator
self.classes_ = None

def fit(self, X, y):
self.estimator.fit(init_input_data(X, y))
self.classes_ = np.unique(y)
return self

def predict(self, X):
return self.estimator.predict(init_input_data(X, None)).predict

def predict_proba(self, X):
return self.estimator.predict(
init_input_data(
X, None), output_mode='probs').predict

from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate

if __name__ == "__main__":
dataset_name = 'Libras'
industrial = FedotIndustrial(problem='classification',
metric='f1',
timeout=2,
n_jobs=2,
logging_level=20)

train_data, test_data = DataLoader(dataset_name=dataset_name).load_data()
X_train, X_val, y_train, y_val = train_test_split(
train_data[0], train_data[1], test_size=0.2)
train_data_for_calibration = (X_train, y_train)
val_data = (X_val, y_val)

model = industrial.fit(train_data)

# uncalibrated prediction
proba = industrial.predict_proba(test_data)

# calibration
from sklearn.calibration import CalibratedClassifierCV

model_sklearn = SklearnCompatibleClassifier(model)
model_sklearn.fit(
train_data_for_calibration[0], train_data_for_calibration[1])
cal_clf = CalibratedClassifierCV(
model_sklearn, method="sigmoid", cv="prefit")
cal_clf.fit(val_data[0], val_data[1])
# calibrated prediction
calibrated_proba = cal_clf.predict_proba(test_data[0])

print('base')
print(classification_report(
test_data[1], model_sklearn.classes_[np.argmax(proba, axis=1)]))
print()
print('calibrated')
print(classification_report(test_data[1], model_sklearn.classes_[
np.argmax(calibrated_proba, axis=1)]))
finetune = False
api_config = dict(problem='classification',
metric='f1',
timeout=0.1,
n_jobs=2,
logging_level=20)
api_client = ApiTemplate(api_config=api_config,
metric_list=('f1', 'accuracy'))
result_dict = api_client.eval(dataset=dataset_name, finetune=finetune)
uncalibrated_labels, uncalibrated_probs = result_dict['industrial_model'].predicted_labels, \
result_dict['industrial_model'].predicted_probs
calibrated_probs = result_dict['industrial_model'].predict_proba(predict_data=api_client.test_data,
calibrate_probs=True)
calibrated_labels = np.argmax(calibrated_probs, axis=1) + np.min(uncalibrated_labels)
Loading

0 comments on commit 6440157

Please sign in to comment.