Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix integration tests after major feature supplementation #160

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jobs:
- name: Install dependencies
run: poetry install

- name: Bump up FEDOT to a stable revision (temporary)
run: poetry add git+https://github.com/aimclub/FEDOT.git
- name: Run tests with pytest
run: poetry run pytest --cov=fedot_ind -s tests/integration

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/poetry_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: poetry install

- name: Bump up FEDOT to a stable revision (temporary)
run: poetry add git+https://github.com/aimclub/FEDOT.git@e0b4ee7
run: poetry add git+https://github.com/aimclub/FEDOT.git

- name: Run tests with pytest
run: poetry run pytest --cov=fedot_ind --cov-report xml:coverage.xml tests/unit
Expand Down
4 changes: 2 additions & 2 deletions fedot_ind/core/architecture/abstraction/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def decorated_func(self, *args, **kwargs):


def use_industrial_fedot_client(func):
def decorated_func(self, *args):
def decorated_func(self, *args, **kwargs):
repo = IndustrialModels()
result = func(self, *args)
result = func(self, *args, **kwargs)
repo.setup_repository()
return result

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def convert_input_to_output(self):
def convert_to_industrial_composing_format(self, mode):
if mode == 'one_dimensional':
new_features, new_target = [
array.reshape(array.shape[0], array.shape[1] * array.shape[2])
array.reshape(array.shape[0], np.prod(array.shape[1:]))
if array is not None and len(array.shape) > 2 else array
for array in [self.input_data.features, self.input_data.target]]
input_data = InputData(
Expand Down
14 changes: 5 additions & 9 deletions fedot_ind/core/models/automl/fedot_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@ class FedotAutomlImplementation(ModelImplementation):
'classification')

def __init__(self, params: Optional[OperationParameters] = None):
if not params:
params = OperationParameters()
else:
params = params.to_dict()
if 'available_operations' not in params.keys():
params.update({'available_operations': self.AVAILABLE_OPERATIONS})
self.model = Fedot(**params)
super(FedotAutomlImplementation, self).__init__()
super().__init__(params)
if 'available_operations' not in self.params.keys():
self.params.update({'available_operations': self.AVAILABLE_OPERATIONS})
self.model = Fedot(**self.params.to_dict())

def fit(self, input_data: InputData):
self.model.fit(input_data)
Expand Down Expand Up @@ -93,4 +89,4 @@ def predict(
self,
input_data: InputData,
output_mode='default') -> OutputData:
return self.model.predict(input_data)
return self.model.predict(input_data, output_mode=output_mode)
10 changes: 5 additions & 5 deletions fedot_ind/core/models/recurrence/reccurence_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ class RecurrenceExtractor(BaseExtractor):

def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.window_size = params.get('window_size', 0)
self.stride = params.get('stride', 1)
self.window_size = self.params.get('window_size', 0)
self.stride = self.params.get('stride', 1)
# TODO add threshold for other metrics
self.rec_metric = params.get('rec_metric', 'cosine')
self.image_mode = params.get('image_mode', False)
self.rec_metric = self.params.get('rec_metric', 'cosine')
self.image_mode = self.params.get('image_mode', False)
self.transformer = TSTransformer
self.extractor = RecurrenceFeatureExtractor

Expand Down Expand Up @@ -75,7 +75,7 @@ def _generate_features_from_ts(self, ts: np.array):

predict = InputData(idx=np.arange(len(features)),
features=features,
target='no_target',
target=None,
task='no_task',
data_type=DataTypesEnum.table,
supplementary_data=col_names)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@


class RSVDDecomposition:
def __init__(self, params: Optional[OperationParameters] = {}):
def __init__(self, params: Optional[OperationParameters] = None):
params = params or {}
self.rank = params.get('rank', 1)
# Polynom degree for power iteration procedure.
self.poly_deg = params.get('power_iter', 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def predict(self, trained_operation, predict_data: InputData,
predict_data = self.multi_dim_dispatcher._convert_input_data(
predict_data)
return self.multi_dim_dispatcher.predict(
trained_operation, predict_data, output_mode='labels')
trained_operation, predict_data, output_mode=output_mode)

def predict_for_fit(
self,
Expand All @@ -202,7 +202,7 @@ def predict_for_fit(
predict_data = self.multi_dim_dispatcher._convert_input_data(
predict_data)
return self.multi_dim_dispatcher.predict_for_fit(
trained_operation, predict_data, output_mode='labels')
trained_operation, predict_data, output_mode=output_mode)


class IndustrialSkLearnForecastingStrategy(
Expand All @@ -228,7 +228,7 @@ def predict(self, trained_operation, predict_data: InputData,
predict_data = self.multi_dim_dispatcher._convert_input_data(
predict_data, mode=self.multi_dim_dispatcher.mode)
predict_output = self.multi_dim_dispatcher.predict(
trained_operation, predict_data, output_mode='labels')
trained_operation, predict_data, output_mode=output_mode)
predict_output.predict = self.ensemble_func(
predict_output.predict, axis=0)
return predict_output
Expand All @@ -241,7 +241,7 @@ def predict_for_fit(
predict_data = self.multi_dim_dispatcher._convert_input_data(
predict_data, mode=self.multi_dim_dispatcher.mode)
predict_output = self.multi_dim_dispatcher.predict_for_fit(
trained_operation, predict_data, output_mode='labels')
trained_operation, predict_data, output_mode=output_mode)
predict_output.predict = self.ensemble_func(
predict_output.predict, axis=0)
return predict_output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,6 @@
},
"iforest_detector": {
"window_length": 10,
"anomaly_thr": null,
"n_jobs": 2,
"contamination": 0.0005,
"random_state": 42
Expand Down
2 changes: 1 addition & 1 deletion fedot_ind/core/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
'recurrence_extractor':
{'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]},
'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]},
'rec_metric': (hp.choice, [['cosine', 'euclidean']]),
'rec_metric': {'hyperopt-dist': hp.choice, 'sampling-scope': [['cosine', 'euclidean']]},
'image_mode': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}},
'minirocket_extractor':
{'num_features': {'hyperopt-dist': hp.choice,
Expand Down
67 changes: 67 additions & 0 deletions tests/integration/integration_test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from fedot_ind.api.main import FedotIndustrial

from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate

METRICS = {
'classification': 'f1',
'regression': 'mse',
'ts_forecasting': 'mse'
}
FINETUNE = False


def data(name):
train_data, test_data = DataLoader(dataset_name=name).load_data()
return train_data, test_data


def basic_launch(task, train_data, test_data):
industrial = FedotIndustrial(problem=task,
timeout=0.1,
n_jobs=-1,
)
industrial.fit(train_data)
labels = industrial.predict(test_data)
probs = industrial.predict_proba(test_data)
assert labels is not None
assert probs is not None
return labels, probs


def launch_api(problem, industrial_strategy, train_data, test_data, **other_configs):
api_config = dict(problem=problem,
metric=METRICS[problem],
timeout=0.1,
n_jobs=-1,
industrial_strategy=industrial_strategy,
industrial_task_params={'industrial_task': problem,
'data_type': 'time_series'},
use_input_preprocessing=True,
industrial_strategy_params={},
logging_level=20) | other_configs

industrial = FedotIndustrial(**api_config)

industrial.fit(train_data)
labels = industrial.predict(test_data)
probs = industrial.predict_proba(test_data)
assert labels is not None
assert probs is not None
return labels, probs


def launch_api(problem, industrial_strategy, dataset_name, **other_configs):
api_config = dict(problem=problem,
metric=METRICS[problem],
timeout=0.1,
n_jobs=-1,
industrial_strategy=industrial_strategy,
industrial_task_params={'industrial_task': problem,
'data_type': 'time_series'},
use_input_preprocessing=True,
industrial_strategy_params={},
logging_level=20) | other_configs
result_dict = ApiTemplate(api_config=api_config,
metric_list=METRICS[problem]
).eval(dataset=dataset_name, finetune=FINETUNE)
assert result_dict is not None
44 changes: 25 additions & 19 deletions tests/integration/repository/test_pipeline_tuning.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import pytest

from fedot.api.main import Fedot
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot.core.composer.metrics import F1
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
Expand All @@ -13,12 +16,16 @@ def test_fedot_multi_series():
with IndustrialModels():
train_data, test_data = initialize_multi_data()
pipeline = PipelineBuilder() \
.add_node('eigen_basis', params={'window_size': None}) \
.add_node('eigen_basis') \
.add_node('quantile_extractor') \
.add_node('rf') \
.build()
pipeline.fit(train_data)
predict = pipeline.predict(test_data, output_mode='labels')
# TODO: output_mode doesn't affect predict form
# TODO: remove temp workaround with argmax
if test_data.target.shape != predict.predict.shape:
predict.predict = np.argmax(predict.predict, axis=1)
print(F1.metric(test_data, predict))


Expand All @@ -36,25 +43,24 @@ def initialize_multi_data():
return train_input_data, test_input_data


def test_industrial_uni_series():
@pytest.mark.parametrize('extractor', (
'quantile_extractor',
'recurrence_extractor',
'topological_extractor',
# 'signal_extractor',
))
def test_industrial_uni_series(extractor):
with IndustrialModels():
train_data, test_data = initialize_uni_data()

metrics = {}
for extractor_name in ['topological_extractor',
'quantile_extractor',
# 'signal_extractor',
'recurrence_extractor']:
pipeline = PipelineBuilder() \
.add_node('eigen_basis') \
.add_node(extractor_name) \
.add_node('rf').build()
model = Fedot(problem='classification', timeout=1,
initial_assumption=pipeline, n_jobs=1)
model.fit(train_data)
model.predict(test_data)
model.get_metrics()
print(metrics)
pipeline = PipelineBuilder() \
.add_node('eigen_basis') \
.add_node(extractor) \
.add_node('rf').build()
model = Fedot(problem='classification', timeout=1,
initial_assumption=pipeline, n_jobs=1)
model.fit(train_data)
model.predict(test_data)
model.get_metrics()


def test_tuner_industrial_uni_series():
Expand All @@ -63,7 +69,7 @@ def test_tuner_industrial_uni_series():
# search_space = SearchSpace(get_industrial_search_space(1))
pipeline_builder = PipelineBuilder()
pipeline_builder.add_node('eigen_basis')
pipeline_builder.add_node('quantile_extractor')
pipeline_builder.add_node('recurrence_extractor')
pipeline_builder.add_node('rf')

pipeline = pipeline_builder.build()
Expand Down
37 changes: 19 additions & 18 deletions tests/integration/ts_anomaly_detection/test_anomaly_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def convert_anomalies_dict_to_points(
series: np.array,
anomaly_dict: Dict) -> np.array:
points = np.array(['no_anomaly' for _ in range(len(series))], dtype=object)
points = np.zeros(len(series))
for anomaly_class in anomaly_dict:
for interval in anomaly_dict[anomaly_class]:
points[interval[0]:interval[1]] = anomaly_class
Expand Down Expand Up @@ -50,7 +50,7 @@ def generate_time_series(ts_length: int = 500,
else:
time_series = np.vstack([np.random.normal(0, 1, ts_length)
for _ in range(dimension)]).swapaxes(1, 0)
anomaly_classes = [f'anomaly{i + 1}' for i in range(num_anomaly_classes)]
anomaly_classes = [i + 1 for i in range(num_anomaly_classes)]

anomaly_intervals = {}

Expand All @@ -63,8 +63,7 @@ def generate_time_series(ts_length: int = 500,
end_idx = start_idx + np.random.randint(min_anomaly_length,
max_anomaly_length + 1)

anomaly = np.random.normal(
int(anomaly_class[-1]), 1, end_idx - start_idx)
anomaly = np.random.normal(anomaly_class, 1, end_idx - start_idx)

if dimension == 1:
time_series[start_idx:end_idx] += anomaly
Expand All @@ -83,20 +82,22 @@ def generate_time_series(ts_length: int = 500,
@pytest.mark.parametrize('dimension', [1, 3])
def test_anomaly_detection(dimension):
np.random.seed(42)
time_series, anomaly_intervals = generate_time_series(
ts_length=1000,
dimension=dimension,
num_anomaly_classes=2,
num_of_anomalies=50)

time_series, anomaly_intervals = generate_time_series(ts_length=1000,
dimension=dimension,
num_anomaly_classes=2,
num_of_anomalies=50)
series_train, anomaly_train, series_test, anomaly_test = split_series(
time_series, anomaly_intervals, test_part=300)

point_train = convert_anomalies_dict_to_points(series_train, anomaly_train)
point_test = convert_anomalies_dict_to_points(series_test, anomaly_test)

industrial = FedotIndustrial(task='anomaly_detection',
industrial = FedotIndustrial(problem='classification',
dataset='custom_dataset',
strategy='fedot_preset',
industrial_strategy='anomaly_detection',
industrial_task_params={
'detection_window': 10,
'data_type': 'time_series',
},
branch_nodes=['eigen_basis'],
tuning_timeout=2,
tuning_iterations=2,
Expand All @@ -105,14 +106,13 @@ def test_anomaly_detection(dimension):
n_jobs=-1,
logging_level=20)

model = industrial.fit(features=series_train,
anomaly_dict=anomaly_train)
model = industrial.fit(input_data=(series_train, point_train))

# industrial.solver.save('model')

# prediction before loading
labels_before = industrial.predict(features=series_test)
probs_before = industrial.predict_proba(features=series_test)
labels_before = industrial.predict(predict_data=(series_test, point_test))
probs_before = industrial.predict_proba(predict_data=(series_test, point_test))

# industrial.solver.load('model')

Expand All @@ -121,7 +121,8 @@ def test_anomaly_detection(dimension):
# probs_after = industrial.predict_proba(features=series_test)

metrics = industrial.solver.get_metrics(target=point_test,
metric_names=['f1', 'roc_auc'])
rounding_order=3,
metric_names=('f1', 'roc_auc'))

# shutil.rmtree('model')
#
Expand Down
Loading
Loading