Reproducibility fixes (#1351)

* Reproducibility fixes #1278 * fix requirements --------- Co-authored-by: nicl-nno <[email protected]> Co-authored-by: Вадим Ахмеров <[email protected]>
aimclub · Dec 22, 2024 · 1510d77 · 1510d77
1 parent b0618df
commit 1510d77
Show file tree

Hide file tree

Showing 13 changed files with 67 additions and 59 deletions.
diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py
@@ -67,7 +67,8 @@ def default_params_for_task(task_type: TaskTypesEnum) -> dict:
             cache_dir=default_fedot_data_dir(),
             keep_history=True,
             history_dir=default_fedot_data_dir(),
-            with_tuning=True
+            with_tuning=True,
+            seed=None
         )
         return default_param_values_dict
 
@@ -118,6 +119,7 @@ def get_params_for_gp_algorithm_params(self, params: dict) -> dict:
             gp_algorithm_params['genetic_scheme_type'] = GeneticSchemeTypesEnum.steady_state
 
         gp_algorithm_params['mutation_types'] = ApiParamsRepository._get_default_mutations(self.task_type, params)
+        gp_algorithm_params['seed'] = params['seed']
         return gp_algorithm_params
 
     @staticmethod

diff --git a/fedot/api/api_utils/params.py b/fedot/api/api_utils/params.py
@@ -26,14 +26,15 @@
 class ApiParams(UserDict):
 
     def __init__(self, input_params: Dict[str, Any], problem: str, task_params: Optional[TaskParams] = None,
-                 n_jobs: int = -1, timeout: float = 5):
+                 n_jobs: int = -1, timeout: float = 5, seed=None):
         self.log: LoggerAdapter = default_log(self)
         self.task: Task = self._get_task_with_params(problem, task_params)
         self.n_jobs: int = determine_n_jobs(n_jobs)
         self.timeout = timeout
 
         self._params_repository = ApiParamsRepository(self.task.task_type)
         parameters: dict = self._params_repository.check_and_set_default_params(input_params)
+        parameters['seed'] = seed
         super().__init__(parameters)
         self._check_timeout_vs_generations()
 
@@ -139,9 +140,14 @@ def init_optimizer_params(self, multi_objective: bool) -> GPAlgorithmParameters:
         """Method to initialize ``GPAlgorithmParameters``"""
         gp_algorithm_parameters = self._params_repository.get_params_for_gp_algorithm_params(self.data)
 
+        # workaround for "{TypeError}__init__() got an unexpected keyword argument 'seed'"
+        seed = gp_algorithm_parameters['seed']
+        del gp_algorithm_parameters['seed']
+
         self.optimizer_params = GPAlgorithmParameters(
             multi_objective=multi_objective, **gp_algorithm_parameters
         )
+        self.optimizer_params.seed = seed
         return self.optimizer_params
 
     def init_graph_generation_params(self, requirements: PipelineComposerRequirements) -> GraphGenerationParams:

diff --git a/fedot/api/main.py b/fedot/api/main.py
@@ -33,9 +33,9 @@
 from fedot.explainability.explainers import explain_pipeline
 from fedot.preprocessing.base_preprocessing import BasePreprocessor
 from fedot.remote.remote_evaluator import RemoteEvaluator
+from fedot.utilities.composer_timer import fedot_composer_timer
 from fedot.utilities.define_metric_by_task import MetricByTask
 from fedot.utilities.memory import MemoryAnalytics
-from fedot.utilities.composer_timer import fedot_composer_timer
 from fedot.utilities.project_import_export import export_project_to_zip, import_project_from_zip
 
 NOT_FITTED_ERR_MSG = 'Model not fitted yet'
@@ -95,7 +95,7 @@ def __init__(self,
         self.log = self._init_logger(logging_level)
 
         # Attributes for dealing with metrics, data sources and hyperparameters
-        self.params = ApiParams(composer_tuner_params, problem, task_params, n_jobs, timeout)
+        self.params = ApiParams(composer_tuner_params, problem, task_params, n_jobs, timeout, seed)
 
         default_metrics = MetricByTask.get_default_quality_metrics(self.params.task.task_type)
         passed_metrics = self.params.get('metric')
@@ -256,7 +256,7 @@ def tune(self,
                               .with_timeout(timeout)
                               .build(input_data))
 
-            self.current_pipeline = pipeline_tuner.tune(self.current_pipeline, show_progress)
+            self.current_pipeline = pipeline_tuner.tune(self.current_pipeline, show_progress=show_progress)
             self.api_composer.was_tuned = pipeline_tuner.was_tuned
 
             # Tuner returns a not fitted pipeline, and it is required to fit on train dataset

diff --git a/fedot/core/pipelines/tuning/tuner_builder.py b/fedot/core/pipelines/tuning/tuner_builder.py
@@ -1,6 +1,7 @@
 from datetime import timedelta
 from typing import Iterable, Sequence, Type, Union
 
+from golem.core.tuning.iopt_tuner import IOptTuner
 from golem.core.tuning.optuna_tuner import OptunaTuner
 from golem.core.tuning.simultaneous import SimultaneousTuner
 from golem.core.tuning.tuner_interface import BaseTuner
@@ -95,10 +96,10 @@ def with_additional_params(self, **parameters):
 
     def build(self, data: InputData) -> BaseTuner:
         if len(self.metric) > 1:
-            if self.tuner_class is OptunaTuner:
+            if self.tuner_class in [OptunaTuner, IOptTuner]:
                 self.additional_params.update({'objectives_number': len(self.metric)})
             else:
-                raise ValueError('Multi objective tuning applicable only for OptunaTuner.')
+                raise ValueError('Multi objective tuning applicable only for OptunaTuner and IOptTuner.')
         objective = MetricsObjective(self.metric, is_multi_objective=len(self.metric) > 1)
         data_splitter = DataSourceSplitter(self.cv_folds, validation_blocks=self.validation_blocks)
         data_producer = data_splitter.build(data)

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 scipy<1.13.0
 
 # Base framework
-thegolem==0.4.0
+thegolem @ git+https://github.com/aimclub/GOLEM.git@45215bf#egg=thegolem
 
 # Data
 numpy>=1.16.0, !=1.24.0

diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py
@@ -76,7 +76,7 @@ def test_api_tune_correct(task_type, metric_name, pred_model):
     base_pipeline = deepcopy(model.fit(features=train_data, predefined_model=pred_model))
     pred_before = model.predict(features=test_data)
 
-    tuned_pipeline = deepcopy(model.tune(timeout=tuning_timeout))
+    tuned_pipeline = deepcopy(model.tune(timeout=tuning_timeout, n_jobs=1))
     pred_after = model.predict(features=test_data)
 
     assert isinstance(tuned_pipeline, Pipeline)

diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from fedot.core.pipelines.pipeline_builder import PipelineBuilder
 from fedot.core.repository.dataset_types import DataTypesEnum
 from golem.core.tuning.hyperopt_tuner import get_node_parameters_for_hyperopt
 from golem.core.tuning.iopt_tuner import IOptTuner
@@ -14,7 +15,7 @@
 from hyperopt.pyll.stochastic import sample as hp_sample
 
 from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_smoothing_pipeline, \
-    ts_glm_pipeline
+     ts_polyfit_ridge_pipeline
 from fedot.core.data.data import InputData
 from fedot.core.data.data_split import train_test_data_setup
 from fedot.core.operations.evaluation.operation_implementations.models.ts_implementations.statsmodels import \
@@ -128,7 +129,7 @@ def get_class_pipelines():
 
 
 def get_ts_forecasting_pipelines():
-    pipelines = [ts_glm_pipeline(), ts_complex_ridge_smoothing_pipeline()]
+    pipelines = [ts_polyfit_ridge_pipeline(2), ts_complex_ridge_smoothing_pipeline()]
     return pipelines
 
 
@@ -141,7 +142,7 @@ def get_regr_operation_types():
 
 
 def get_class_operation_types():
-    return ['dt']
+    return ['rf']
 
 
 def get_regr_losses():
@@ -169,7 +170,7 @@ def get_not_default_search_space():
         'lgbmreg': {
             'learning_rate': {
                 'hyperopt-dist': hp.loguniform,
-                'sampling-scope': [0.05, 0.1],
+                'sampling-scope': [0.03, 0.1],
                 'type': 'continuous'},
             'colsample_bytree': {
                 'hyperopt-dist': hp.uniform,
@@ -216,9 +217,8 @@ def run_pipeline_tuner(train_data,
                        tuner=SimultaneousTuner,
                        search_space=PipelineSearchSpace(),
                        cv=None,
-                       iterations=3,
+                       iterations=5,
                        early_stopping_rounds=None, **kwargs):
-
     # if data is time series then lagged window should be tuned correctly
     # because lagged window raises error if windows size is uncorrect
     # and tuner will fall
@@ -241,11 +241,12 @@ def run_pipeline_tuner(train_data,
         .with_metric(loss_function) \
         .with_cv_folds(cv) \
         .with_iterations(iterations) \
+        .with_n_jobs(1) \
         .with_early_stopping_rounds(early_stopping_rounds) \
         .with_search_space(search_space) \
         .with_additional_params(**kwargs) \
         .build(train_data)
-    tuned_pipeline = pipeline_tuner.tune(pipeline)
+    tuned_pipeline = pipeline_tuner.tune(pipeline, show_progress=False)
     return pipeline_tuner, tuned_pipeline
 
 
@@ -299,6 +300,7 @@ def test_pipeline_tuner_correct(data_fixture, pipelines, loss_functions, request
     for pipeline in pipelines:
         for loss_function in loss_functions:
             for cv in cvs:
+                print(pipeline)
                 pipeline_tuner, tuned_pipeline = run_pipeline_tuner(tuner=tuner,
                                                                     train_data=data,
                                                                     pipeline=pipeline,
@@ -423,7 +425,7 @@ def test_ts_pipeline_with_stats_model(n_steps, tuner):
             .with_metric(RegressionMetricsEnum.MSE) \
             .with_iterations(3) \
             .with_search_space(search_space).build(train_data)
-        tuned_pipeline = tuner_ar.tune(ar_pipeline)
+        tuned_pipeline = tuner_ar.tune(ar_pipeline, show_progress=False)
         assert tuned_pipeline is not None
         assert tuner_ar.obtained_metric is not None
 
@@ -472,15 +474,15 @@ def test_search_space_correctness_after_customization():
     custom_search_space_with_replace = PipelineSearchSpace(custom_search_space=custom_search_space,
                                                            replace_default_search_space=True)
 
-    default_params = get_node_parameters_for_hyperopt(default_search_space,
-                                                      node_id=0,
-                                                      operation_name='gbr')
-    custom_without_replace_params = get_node_parameters_for_hyperopt(custom_search_space_without_replace,
+    default_params, _ = get_node_parameters_for_hyperopt(default_search_space,
+                                                         node_id=0,
+                                                         node=PipelineNode('gbr'))
+    custom_without_replace_params, _ = get_node_parameters_for_hyperopt(custom_search_space_without_replace,
+                                                                        node_id=0,
+                                                                        node=PipelineNode('gbr'))
+    custom_with_replace_params, _ = get_node_parameters_for_hyperopt(custom_search_space_with_replace,
                                                                      node_id=0,
-                                                                     operation_name='gbr')
-    custom_with_replace_params = get_node_parameters_for_hyperopt(custom_search_space_with_replace,
-                                                                  node_id=0,
-                                                                  operation_name='gbr')
+                                                                     node=PipelineNode('gbr'))
 
     assert default_params.keys() == custom_without_replace_params.keys()
     assert default_params.keys() != custom_with_replace_params.keys()
@@ -520,12 +522,14 @@ def test_complex_search_space():
             assert params['link'] in GLMImplementation.family_distribution[params['family']]['available_links']
 
 
-@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner, OptunaTuner])
+# TODO: (YamLyubov) add IOptTuner when it will support nested parameters.
+@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, OptunaTuner])
 def test_complex_search_space_tuning_correct(tuner):
-    """ Tests SimultaneousTuner for time series forecasting task with GLM model that has a complex glm search space"""
+    """ Tests Tuners for time series forecasting task with GLM model that has a complex glm search space"""
     train_data, test_data = get_ts_data(n_steps=700, forecast_length=20)
 
-    glm_pipeline = Pipeline(PipelineNode('glm'))
+    # ridge added because IOpt requires at least one continuous parameter
+    glm_pipeline = PipelineBuilder().add_sequence('glm', 'ridge', branch_idx=0).build()
     initial_parameters = glm_pipeline.nodes[0].parameters
     tuner = TunerBuilder(train_data.task) \
         .with_tuner(tuner) \
@@ -534,11 +538,7 @@ def test_complex_search_space_tuning_correct(tuner):
         .build(train_data)
     tuned_glm_pipeline = tuner.tune(glm_pipeline)
     found_parameters = tuned_glm_pipeline.nodes[0].parameters
-    if tuner.init_metric == tuner.obtained_metric:
-        # TODO: (YamLyubov) Remove the check when IOptTuner will be able to tune categorical parameters.
-        assert initial_parameters == found_parameters
-    else:
-        assert initial_parameters != found_parameters
+    assert initial_parameters != found_parameters
 
 
 @pytest.mark.parametrize('data_fixture, pipelines, loss_functions',
@@ -547,7 +547,7 @@ def test_complex_search_space_tuning_correct(tuner):
                           ('multi_classification_dataset', get_class_pipelines(), get_class_losses()),
                           ('ts_forecasting_dataset', get_ts_forecasting_pipelines(), get_regr_losses()),
                           ('multimodal_dataset', get_multimodal_pipelines(), get_class_losses())])
-@pytest.mark.parametrize('tuner', [OptunaTuner])
+@pytest.mark.parametrize('tuner', [OptunaTuner, IOptTuner])
 def test_multiobj_tuning(data_fixture, pipelines, loss_functions, request, tuner):
     """ Test multi objective tuning is correct """
     data = request.getfixturevalue(data_fixture)
@@ -559,8 +559,7 @@ def test_multiobj_tuning(data_fixture, pipelines, loss_functions, request, tuner
                                                                  train_data=data,
                                                                  pipeline=pipeline,
                                                                  loss_function=loss_functions,
-                                                                 cv=cv,
-                                                                 iterations=10)
+                                                                 cv=cv)
             assert tuned_pipelines is not None
             assert all([tuned_pipeline is not None for tuned_pipeline in ensure_wrapped_in_sequence(tuned_pipelines)])
             for metrics in pipeline_tuner.obtained_metric:

diff --git a/test/integration/real_applications/test_examples.py b/test/integration/real_applications/test_examples.py
@@ -84,8 +84,9 @@ def test_api_classification_example():
 
 
 def test_api_ts_forecasting_example():
-    forecast = run_ts_forecasting_example(dataset='salaries', timeout=2, with_tuning=False)
-    assert forecast is not None
+    for _ in range(100):
+        forecast = run_ts_forecasting_example(dataset='salaries', timeout=2, with_tuning=False)
+        assert forecast is not None
 
 
 def test_api_classification_multiobj_example():

diff --git a/test/integration/real_applications/test_model_result_reproducing.py b/test/integration/real_applications/test_model_result_reproducing.py
@@ -9,7 +9,7 @@
 from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
 
 
-def get_data(data_length=500, test_length=100):
+def get_data(data_length=300, test_length=5):
     garmonics = [(0.1, 0.9), (0.1, 1), (0.1, 1.1), (0.05, 2), (0.05, 5), (1, 0.02)]
     time = np.linspace(0, 100, data_length)
     data = time * 0
@@ -30,7 +30,7 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
               'task_params': TsForecastingParams(forecast_length=forecast_length),
               'seed': 1,
               'timeout': None,
-              'pop_size': 50,
+              'pop_size': 5,
               'num_of_generations': 5,
               'with_tuning': False}
     params.update(kwargs)
@@ -47,25 +47,22 @@ def check_fedots(fedots: List[Fedot], test_data: InputData, are_same: bool = Tru
         :return: None"""
     for fedot in fedots[1:]:
         assert are_same == np.allclose(fedots[0].history.all_historical_fitness, fedot.history.all_historical_fitness)
-        # TODO return check
-        # assert are_same == np.allclose(fedots[0].forecast(test_data), fedot.forecast(test_data))
+        if are_same:
+            assert np.allclose(fedots[0].forecast(test_data), fedot.forecast(test_data))
 
 
 def test_result_reproducing():
     """ Test check that Fedot instance returns same compose result
         and makes same compose process in different run with fixed seeds """
-    # TODO: fix reproducing
-    #       it is randomly unstable
-    pass
-    # train, test = get_data()
-    # old_fedot = None
-    # # try in cycle because some problems are random
-    # for _ in range(4):
-    #     fedot = get_fitted_fedot(forecast_length=test.idx.shape[0],
-    #                              train_data=train)
-    #     if old_fedot is not None:
-    #         check_fedots([fedot, old_fedot], test, are_same=True)
-    #     old_fedot = fedot
+    train, test = get_data()
+    old_fedot = None
+    # try in cycle because some problems are random
+    for _ in range(4):
+        fedot = get_fitted_fedot(forecast_length=test.idx.shape[0],
+                                 train_data=train)
+        if old_fedot is not None:
+            check_fedots([fedot, old_fedot], test, are_same=True)
+        old_fedot = fedot
 
 
 def test_result_changing():

diff --git a/test/unit/api/test_api_params.py b/test/unit/api/test_api_params.py
@@ -2,7 +2,6 @@
 from typing import Optional
 
 import pytest
-
 from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer
 from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
 from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
@@ -82,6 +81,9 @@ def test_correctly_sets_default_params(input_params):
                                                 ('gp_algo', correct_gp_algorithm_attributes)])
 def test_filter_params_correctly(input_params, case, correct_keys):
     params_repository = get_api_params_repository()
+    if case == 'gp_algo':
+        input_params['seed'] = 0
+        correct_keys.add('seed')
     input_params = params_repository.check_and_set_default_params(input_params)
     if case == 'composer':
         output_params = params_repository.get_params_for_composer_requirements(input_params)

diff --git a/test/unit/data_operations/test_time_series_operations.py b/test/unit/data_operations/test_time_series_operations.py
@@ -358,9 +358,9 @@ def test_tuner_correctly_work_with_window_size_selector():
     tuner_tuned_window = tuned_pipeline.nodes[-1].parameters['window_size']
 
     assert autotuned_window != tuner_tuned_window
-    # check that WindowSizeSelector runs twice due to tuner graph copying in initialization
+    # check that WindowSizeSelector runs once
     sum_records = sum(check_window_size_selector_logging(records))
-    assert sum_records == 2 or sum_records == 3
+    assert sum_records == 1
 
 
 @pytest.mark.parametrize(('length', 'features_count', 'target_count', 'window_size'),

diff --git a/test/unit/optimizer/gp_operators/test_mutation.py b/test/unit/optimizer/gp_operators/test_mutation.py
@@ -168,7 +168,7 @@ def test_no_opt_or_graph_nodes_after_mutation():
     graph = get_simple_linear_graph()
     mutation = get_mutation_obj()
     for mut in mutation.parameters.mutation_types:
-        graph, _ = mutation._adapt_and_apply_mutation(new_graph=graph, mutation_type=mut)
+        graph = mutation._apply_mutations(new_graph=graph, mutation_type=mut)
     new_pipeline = adapter.restore(graph)
 
     assert not find_first(new_pipeline, lambda n: type(n) in (GraphNode, OptNode))
diff --git a/test/unit/pipelines/test_reproducibility.py b/test/unit/pipelines/test_reproducibility.py
@@ -4,7 +4,7 @@
 from test.integration.quality.test_synthetic_tasks import get_regression_pipeline, get_regression_data
 
 
-def test_reproducubility():
+def test_reproducibility():
     """
     Test validates that two sequential evaluation (fit/predict) of pipelines leads with exactly same result
     if random seed is fixed via session-scoped pytest fixture