Skip to content

Commit

Permalink
[FI-60] Unit and Integration tests improvement (#145)
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep authored Jul 4, 2024
1 parent d5ff39d commit 7a4843c
Show file tree
Hide file tree
Showing 75 changed files with 1,477 additions and 1,030 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pep8_action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
id: autopep8
uses: peter-evans/autopep8@v2
with:
args: --exit-code --recursive --in-place --aggressive --aggressive .
args: --exit-code --recursive --in-place --max-line-length 120 --experimental --aggressive --aggressive .
- name: Commit autopep8 changes
if: steps.autopep8.outputs.exit-code == 2
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/poetry_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
python-version: [3.8, 3.9, '3.10']
python-version: [3.9, '3.10']

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -44,6 +44,6 @@ jobs:
- name: Codecov-coverage
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
flags: unittests
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

api_config = dict(problem='classification',
metric='accuracy',
timeout=15,
timeout=0.1,
with_tuning=False,
industrial_strategy='lora_strategy',
industrial_strategy_params=lora_params,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
from fedot_ind.api.main import FedotIndustrial
from fedot_ind.tools.loader import DataLoader
from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator

dataset_name = 'Lightning7'
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
api_config = dict(problem='classification',
metric='f1',
timeout=5,
timeout=0.1,
n_jobs=2,
industrial_strategy='federated_automl',
industrial_strategy_params={},
logging_level=20)
train_data, test_data = DataLoader(dataset_name).load_data()

# Huge synthetic dataset for experiment
train_data, test_data = TimeSeriesDatasetsGenerator(num_samples=1800,
task='classification',
max_ts_len=50,
binary=True,
test_size=0.5,
multivariate=False).generate_data()

industrial = FedotIndustrial(**api_config)
industrial.fit(train_data)
predict = industrial.predict(test_data)
_ = 1
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,10 @@ def enable_disable_lora(enabled=True):
# The original weights have been moved to net.linear1.parametrizations.weight.original
# More info here:
# https://pytorch.org/tutorials/intermediate/parametrizations.html#inspecting-a-parametrized-module
assert torch.equal(docnn_model.linear1.weight, docnn_model.linear1.parametrizations.weight.original +
(docnn_model.linear1.parametrizations.weight[0].lora_B @ docnn_model.linear1.parametrizations.weight[0].lora_A) *
docnn_model.linear1.parametrizations.weight[0].scale)
assert torch.equal(
docnn_model.linear1.weight, docnn_model.linear1.parametrizations.weight.original +
(docnn_model.linear1.parametrizations.weight[0].lora_B @ docnn_model.linear1.parametrizations.weight[0].lora_A) *
docnn_model.linear1.parametrizations.weight[0].scale)

enable_disable_lora(enabled=False)
# If we disable LoRA, the linear1.weight is the original one
Expand Down
2 changes: 1 addition & 1 deletion fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __init__(self, **kwargs):
api_config=self.config_dict,
industrial_strategy=self.industrial_strategy,
industrial_strategy_params=self.industrial_strategy_params,
logger=self.logger)
)

def __init_experiment_setup(self):
self.logger.info('Initialising experiment setup')
Expand Down
81 changes: 46 additions & 35 deletions fedot_ind/api/utils/industrial_strategy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from copy import deepcopy

import numpy as np
Expand All @@ -9,7 +10,7 @@
from fedot.core.repository.dataset_types import DataTypesEnum

from fedot_ind.core.ensemble.kernel_ensemble import KernelEnsembler
from fedot_ind.core.ensemble.random_automl_forest import RAFensembler
from fedot_ind.core.ensemble.random_automl_forest import RAFEnsembler
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \
FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_TUNING_METRICS, FEDOT_TUNER_STRATEGY, FEDOT_TS_FORECASTING_ASSUMPTIONS, \
FEDOT_TASK
Expand All @@ -18,10 +19,10 @@


class IndustrialStrategy:
def __init__(self, industrial_strategy_params,
def __init__(self,
industrial_strategy_params,
industrial_strategy,
api_config,
logger=None
):
self.industrial_strategy_params = industrial_strategy_params
self.industrial_strategy = industrial_strategy
Expand Down Expand Up @@ -51,7 +52,7 @@ def __init__(self, industrial_strategy_params,
self.ensemble_strategy = list(self.ensemble_strategy_dict.keys())
self.random_label = None
self.config_dict = api_config
self.logger = logger
self.logger = logging.getLogger('IndustrialStrategy')
self.repo = IndustrialModels().setup_repository()
self.kernel_ensembler = KernelEnsembler
self.RAF_workers = None
Expand All @@ -62,33 +63,39 @@ def fit(self, input_data):
return self.solver

def predict(self, input_data, predict_mode):
return self.industrial_strategy_predict[self.industrial_strategy](
input_data, predict_mode)
return self.industrial_strategy_predict[self.industrial_strategy](input_data,
predict_mode)

def _federated_strategy(self, input_data):
if input_data.features.shape[0] > BATCH_SIZE_FOR_FEDOT_WORKER:

n_samples = input_data.features.shape[0]
if n_samples > BATCH_SIZE_FOR_FEDOT_WORKER:
self.logger.info('RAF algorithm was applied')

if self.RAF_workers is None:
batch_size = FEDOT_WORKER_NUM
else:
batch_size = round(
input_data.features.shape[0] /
self.RAF_workers)
# batch_size = round(input_data.features.shape[0] / self.RAF_workers if self.RAF_workers
# is not None else FEDOT_WORKER_NUM)
batch_timeout = round(
self.config_dict['timeout'] /
FEDOT_WORKER_TIMEOUT_PARTITION)
self.config_dict['timeout'] = batch_timeout
self.logger.info(
f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}')
self.solver = RAFensembler(composing_params=self.config_dict,
self.RAF_workers = FEDOT_WORKER_NUM
batch_size = round(input_data.features.shape[0] / self.RAF_workers)

min_timeout = 0.5
selected_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION)
self.config_dict['timeout'] = max(min_timeout, selected_timeout)

self.logger.info(f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}')

self.solver = RAFEnsembler(composing_params=self.config_dict,
n_splits=self.RAF_workers,
batch_size=batch_size)
self.logger.info(
f'Number of AutoMl models in ensemble - {self.solver.n_splits}')

self.solver.fit(input_data)

else:
self.logger.info(f'RAF algorithm is not applicable: n_samples={n_samples} < {BATCH_SIZE_FOR_FEDOT_WORKER}. '
f'FEDOT algorithm was applied')
self.solver = Fedot(**self.config_dict)
self.solver.fit(input_data)

def _forecasting_strategy(self, input_data):
self.logger.info('TS forecasting algorithm was applied')
self.config_dict['timeout'] = round(self.config_dict['timeout'] / 3)
Expand Down Expand Up @@ -170,23 +177,27 @@ def _lora_strategy(self, input_data):
def _federated_predict(self,
input_data,
mode: str = 'labels'):
self.predicted_branch_probs = [
x.predict(input_data).predict for x in self.solver.root_node.nodes_from]
self.predicted_branch_labels = [
np.argmax(x, axis=1) for x in self.predicted_branch_probs]
n_samples, n_channels, n_classes = self.predicted_branch_probs[0].shape[0], \
len(self.predicted_branch_probs), \
self.predicted_branch_probs[0].shape[1]
head_model = deepcopy(self.solver.root_node)
valid_nodes = self.solver.current_pipeline.root_node.nodes_from
self.predicted_branch_probs = [x.predict(input_data).predict for x in valid_nodes]

# reshape if binary
if len(self.predicted_branch_probs[0].shape) < 2:
self.predicted_branch_probs = [np.array([x, 1 - x]).T for x in self.predicted_branch_probs]

self.predicted_branch_labels = [np.argmax(x, axis=1) for x in self.predicted_branch_probs]

n_samples = self.predicted_branch_probs[0].shape[0]
n_channels = len(self.predicted_branch_probs)

head_model = deepcopy(self.solver.current_pipeline.root_node)
head_model.nodes_from = []
input_data.features = np.hstack(
self.predicted_branch_labels).reshape(
n_samples, n_channels, 1)
head_predict = head_model.predict(self.predict_data).predict
input_data.features = np.hstack(self.predicted_branch_labels).reshape(n_samples,
n_channels,
1)
if mode == 'labels':
return head_predict
return head_model.predict(input_data, 'labels').predict
else:
return np.argmax(head_predict, axis=1)
return head_model.predict(input_data).predict

def _forecasting_predict(self,
input_data,
Expand Down
91 changes: 0 additions & 91 deletions fedot_ind/core/architecture/pipelines/abstract_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.loader import DataLoader


BENCHMARK = 'M4'


Expand Down Expand Up @@ -77,93 +76,3 @@ def evaluate_pipeline(self, node_list, dataset):
predict_labels=predict.predict,
predict_probs=predict_proba.predict,
quality_metric=metric)

# class AbstractPipelines:
# def __init__(self, train_data, test_data):
# self.train_features = train_data[0]
# self.train_target = train_data[1]
# self.test_features = test_data[0]
# self.test_target = test_data[1]
# self.basis = None
#
# self.basis_dict = {i.name: i.value for i in BasisTransformations}
# self.model_dict = {i.name: i.value for i in MlModel}
# self.feature_generator_dict = {
# i.name: i.value for i in FeatureGenerator}
#
# self.generators_with_matrix_input = ['topological',
# 'wavelet',
# 'recurrence',
# 'quantile']
#
# def _evaluate(self, classificator, train_features, test_features):
# fitted_model = classificator.fit(train_features=train_features,
# train_target=self.train_target)
# predicted_probs_labels = (classificator.predict(test_features=test_features),
# classificator.predict_proba(test_features=test_features))
# metrics = PerformanceAnalyzer().calculate_metrics(target=self.test_target,
# predicted_labels=predicted_probs_labels[0],
# predicted_probs=predicted_probs_labels[1])
# return fitted_model, metrics
#
# def get_feature_generator(self, **kwargs):
# pass
#
# def _get_feature_matrix(self, list_of_features, mode: str = 'Multi', **kwargs):
# if mode == '1D':
# feature_matrix = pd.concat(list_of_features, axis=0)
# if feature_matrix.shape[0] != len(list_of_features):
# feature_matrix = pd.concat(list_of_features, axis=1)
# elif mode == 'MultiEnsemble':
# feature_matrix = []
# for i in range(len(list_of_features[0])):
# _ = []
# for feature_set in list_of_features:
# _.append(feature_set[i])
# feature_matrix.append(pd.concat(_, axis=0))
# elif mode == 'list_of_ts':
# feature_matrix = []
# for ts in list_of_features:
# list_of_windows = []
# for step in range(0, ts.shape[1], kwargs['window_length']):
# list_of_windows.append(
# ts[:, step:step + kwargs['window_length']])
# feature_matrix.append(list_of_windows)
# else:
# feature_matrix = pd.concat(
# [pd.concat(feature_set, axis=1) for feature_set in list_of_features], axis=0)
# return feature_matrix
#
# def _init_pipeline_nodes(self, model_type: str = 'tsc', **kwargs):
# if 'feature_generator_type' not in kwargs.keys():
# generator = self.feature_generator_dict['quantile']
# else:
# generator = self.feature_generator_dict[kwargs['feature_generator_type']]
# try:
# feature_extractor = generator(params=kwargs['feature_hyperparams'])
#
# except AttributeError:
# with open(PATH_TO_DEFAULT_PARAMS, 'r') as file:
# _feature_gen_params = json.load(file)
# params = _feature_gen_params[f'{generator}_extractor']
# feature_extractor = generator(params)
# try:
# classificator = self.model_dict[model_type](model_hyperparams=kwargs['model_hyperparams'],
# generator_name=kwargs['feature_generator_type'],
# generator_runner=feature_extractor)
# except Exception:
# classificator = None
#
# lambda_func_dict = {'create_list_of_ts': lambda x: ListMonad(*x.values.tolist()),
# 'scale': lambda time_series: pd.DataFrame(MinMaxScaler().fit_transform(
# time_series.to_numpy())),
# 'transpose_matrix': lambda time_series: time_series.T,
# 'reduce_basis': lambda x: x[:, 0] if x.shape[1] == 1 else x[:, kwargs['component']],
# 'extract_features': lambda x: feature_extractor.get_features(x),
# 'fit_model': lambda x: classificator.fit(train_features=x, train_target=self.train_target),
# 'predict': lambda x: ListMonad({'predicted_labels': classificator.predict(test_features=x),
# 'predicted_probs': classificator.predict_proba(
# test_features=x)})
# }
#
# return feature_extractor, classificator, lambda_func_dict
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,10 @@ def update_individual(cls: Type[Individual], json_obj: Dict[str, Any]):
class RenameUnpickler(pickle.Unpickler):
def find_class(self, module: str, name: str):
renamed_module = module
changed_import_list = [
'fedot_ind.core.repository.initializer_industrial_models']
changed_import_list = ['fedot_ind.core.repository.initializer_industrial_models']
if module in changed_import_list:
renamed_module = module.replace(
"golem.core.utilities",
"fedot_ind.core.repository.industrial_implementations.optimisation")
renamed_module = module.replace("golem.core.utilities",
"fedot_ind.core.repository.industrial_implementations.optimisation")
return super(RenameUnpickler, self).find_class(renamed_module, name)


Expand Down
20 changes: 8 additions & 12 deletions fedot_ind/core/architecture/postprocessing/results_picker.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,18 @@ def run(self, get_metrics_df: bool = False, add_info: bool = False):
return proba_dict, metric_dict

def _create_metrics_df(self, metric_dict):
columns = ['dataset', 'experiment']
metrics_df = pd.DataFrame()
for ds in metric_dict.keys():
for exp in metric_dict[ds].keys():
metrics = metric_dict[ds][exp].to_dict(orient='records')[0]
metrics_df = metrics_df.append({'dataset': ds,
'experiment': exp,
'f1': metrics.get('f1'),
'roc_auc': metrics.get('roc_auc'),
'accuracy': metrics.get('accuracy'),
'precision': metrics.get('precision'),
'logloss': metrics.get('logloss')},
ignore_index=True)

metrics_df = pd.concat([metrics_df[['dataset', 'experiment']], metrics_df[[
col for col in metrics_df.columns if col not in columns]]], axis=1)
df = pd.DataFrame.from_dict({'dataset': ds,
'experiment': exp,
'f1': metrics.get('f1'),
'roc_auc': metrics.get('roc_auc'),
'accuracy': metrics.get('accuracy'),
'precision': metrics.get('precision'),
'logloss': metrics.get('logloss')}, orient='index').T
metrics_df = pd.concat([metrics_df, df], axis=0)
return metrics_df

def get_metrics_and_proba(self):
Expand Down
Loading

0 comments on commit 7a4843c

Please sign in to comment.