aimclub · kasyanovse · Dec 14, 2023 · Dec 14, 2023 · Dec 14, 2023 · Dec 15, 2023
diff --git a/examples/advanced/additional_learning.py b/examples/advanced/additional_learning.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 from fedot import Fedot
-from fedot.core.operations.atomized_model import AtomizedModel
+from fedot.core.operations.atomized_model.atomized_model import AtomizedModel
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.pipelines.pipeline_builder import PipelineBuilder

diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py
@@ -1,6 +1,10 @@
 import datetime
 from typing import Sequence
 
+from fedot.core.optimisers.genetic_operators.crossover import fedot_subtree_crossover, fedot_one_point_crossover
+from fedot.core.optimisers.genetic_operators.mutation import fedot_single_edge_mutation, fedot_single_add_mutation, \
+    fedot_single_change_mutation, fedot_single_drop_mutation, insert_atomized_operation, fedot_tree_growth
+from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum
 from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
 from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum
 
@@ -118,6 +122,7 @@ def get_params_for_gp_algorithm_params(self, params: dict) -> dict:
             gp_algorithm_params['genetic_scheme_type'] = GeneticSchemeTypesEnum.steady_state
 
         gp_algorithm_params['mutation_types'] = ApiParamsRepository._get_default_mutations(self.task_type, params)
+        gp_algorithm_params['crossover_types'] = ApiParamsRepository._get_default_crossovers(self.task_type, params)
         return gp_algorithm_params
 
     @staticmethod
@@ -128,14 +133,41 @@ def _get_default_mutations(task_type: TaskTypesEnum, params) -> Sequence[Mutatio
                      MutationTypesEnum.single_add,
                      MutationTypesEnum.single_edge]
 
-        # TODO remove workaround after boosting mutation fix
-        #      Boosting mutation does not work due to problem with __eq__ with it copy.
-        #      ``partial`` refactor to ``def`` does not work
-        #      Also boosting mutation does not work by it own.
         if task_type == TaskTypesEnum.ts_forecasting:
+            # TODO remove workaround after boosting mutation fix
+            #      Boosting mutation does not work due to problem with __eq__ with it copy.
+            #      ``partial`` refactor to ``def`` does not work
+            #      Also boosting mutation does not work by it own.
             # mutations.append(partial(boosting_mutation, params=params))
-            pass
+
+            # TODO remove when tests will ends
+            # add for testing purpose
+            mutations = [parameter_change_mutation,
+                         fedot_single_edge_mutation,
+                         fedot_single_add_mutation,
+                         fedot_single_change_mutation,
+                         fedot_single_drop_mutation,
+                         insert_atomized_operation]
+            # mutations = [fedot_single_edge_mutation,
+            #              fedot_single_add_mutation,
+            #              fedot_single_change_mutation,
+            #              fedot_single_drop_mutation,
+            #              fedot_tree_growth,
+            #              insert_atomized_operation]
+            # mutations = [fedot_single_add_mutation,
+            #              fedot_single_change_mutation,
+            #              fedot_single_drop_mutation,
+            #              insert_atomized_operation]
         else:
             mutations.append(add_resample_mutation)
 
         return mutations
+
+    @staticmethod
+    def _get_default_crossovers(task_type: TaskTypesEnum, params) -> Sequence[MutationTypesEnum]:
+        if task_type == TaskTypesEnum.ts_forecasting:
+            crossovers = [fedot_subtree_crossover, fedot_one_point_crossover]
+            crossovers = [fedot_one_point_crossover]
+        else:
+            crossovers = [CrossoverTypesEnum.one_point]
+        return crossovers
diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py
@@ -2,10 +2,13 @@
 from typing import List
 
 from fedot.api.api_utils.assumptions.operations_filter import OperationsFilter
+from fedot.core.pipelines.node import PipelineNode
+from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.pipelines.pipeline_builder import PipelineBuilder
 from fedot.core.repository.operation_types_repository import OperationTypesRepository
 from fedot.core.repository.tasks import Task, TaskTypesEnum
 from fedot.utilities.custom_errors import AbstractMethodNotImplementError
+from golem.core.optimisers.opt_graph_builder import OptGraphBuilder
 
 
 class TaskAssumptions:
@@ -64,9 +67,17 @@ def builders(self):
                 .add_branch('polyfit', 'lagged')
                 .grow_branches(None, 'ridge')
                 .join_branches('ridge'),
-            'smoothing_ar':
+            'atomized_ts_differ':
                 PipelineBuilder()
-                .add_sequence('smoothing', 'ar'),
+                .add_branch('lagged', 'lagged')
+                .add_node('ridge', branch_idx=0)
+                .add_node('atomized_ts_differ', branch_idx=1, params={'pipeline': PipelineBuilder()
+                                                                                  .add_node('ridge').build()})
+                .join_branches('ridge'),
+            'atomized_ts_to_time':
+                PipelineBuilder()
+                .add_sequence('lagged', 'ridge')
+                .add_node('atomized_ts_to_time', params={'pipeline': PipelineBuilder().add_node('ridge').build()}),
         }
 
     def ensemble_operation(self) -> str:

diff --git a/fedot/core/data/supplementary_data.py b/fedot/core/data/supplementary_data.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass
-from typing import Dict, Optional
+from dataclasses import dataclass, field
+from typing import Dict, Optional, List
 
 import numpy as np
 
@@ -31,6 +31,8 @@ class SupplementaryData:
     col_type_ids: Optional[Dict[str, np.ndarray]] = None
     # Was the data preprocessed before composer
     is_auto_preprocessed: bool = False
+    # time series bias for time series forecasting problem
+    time_series_bias: List[np.ndarray] = field(default_factory=list)
 
     @property
     def compound_mask(self):

diff --git a/fedot/core/operations/atomized.py b/fedot/core/operations/atomized.py
@@ -0,0 +1,14 @@
+from fedot.core.operations.model import Model
+from fedot.core.repository.operation_types_repository import OperationTypesRepository
+
+
+class Atomized(Model):
+    """
+    Class with fit/predict methods defining the atomized strategy for the task
+
+    :param operation_type: name of the model
+    """
+
+    def __init__(self, operation_type: str):
+        super().__init__(operation_type=operation_type)
+        self.operations_repo = OperationTypesRepository('atomized')
diff --git a/fedot/core/operations/atomized_model/__init__.py b/fedot/core/operations/atomized_model/__init__.py
diff --git a/fedot/core/operations/atomized_model.py → ...erations/atomized_model/atomized_model.py b/fedot/core/operations/atomized_model.py → ...erations/atomized_model/atomized_model.py
@@ -1,18 +1,11 @@
 from collections import Counter
-from datetime import timedelta
 from functools import reduce
 from operator import and_, or_
 from typing import Any, Callable, Dict, List, Optional, Set, Union
 
-from golem.core.tuning.simultaneous import SimultaneousTuner
-
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.operation import Operation
 from fedot.core.operations.operation_parameters import OperationParameters
-from fedot.core.pipelines.node import PipelineNode
-from fedot.core.pipelines.pipeline import Pipeline
-from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
-from fedot.core.repository.metrics_repository import MetricCallable
 from fedot.core.repository.operation_types_repository import OperationMetaInfo, atomized_model_type
 
 
@@ -26,6 +19,11 @@ def __init__(self, pipeline: 'Pipeline'):
         super().__init__(operation_type=atomized_model_type())
         self.pipeline = pipeline
 
+    @property
+    def acceptable_task_types(self):
+        root_operation = self.pipeline.root_node.operation
+        return root_operation.acceptable_task_types
+
     def fit(self, params: Optional[Union[OperationParameters, dict]], data: InputData) -> ('Pipeline', OutputData):
         predicted_train = self.pipeline.fit(input_data=data)
         fitted_atomized_operation = self.pipeline
@@ -50,27 +48,31 @@ def predict_for_fit(self,
         return self.predict(fitted_operation, data, params, output_mode)
 
     def fine_tune(self,
-                  metric_function: MetricCallable,
+                  metric_function: 'MetricCallable',
                   input_data: Optional[InputData] = None,
                   iterations: int = 50,
                   timeout: int = 5) -> 'AtomizedModel':
         """ Method for tuning hyperparameters """
-        tuner = TunerBuilder(input_data.task) \
-            .with_tuner(SimultaneousTuner) \
-            .with_metric(metric_function) \
-            .with_iterations(iterations) \
-            .with_timeout(timedelta(minutes=timeout)) \
-            .build(input_data)
-        tuned_pipeline = tuner.tune(self.pipeline)
-        tuned_atomized_model = AtomizedModel(tuned_pipeline)
-        return tuned_atomized_model
+        # TODO Fix tuner with atomized model
+        #      cannot be made by that way due to problem with circular import
+        # TODO add tests for atomized tuning
+        #      origin test was removed
+        # tuner = TunerBuilder(input_data.task) \
+        #     .with_tuner(SimultaneousTuner) \
+        #     .with_metric(metric_function) \
+        #     .with_iterations(iterations) \
+        #     .with_timeout(timedelta(minutes=timeout)) \
+        #     .build(input_data)
+        # tuned_pipeline = tuner.tune(self.pipeline)
+        # tuned_atomized_model = AtomizedModel(tuned_pipeline)
+        # return tuned_atomized_model
 
     @property
     def metadata(self) -> OperationMetaInfo:
         root_node = self.pipeline.root_node
 
         def extract_metadata_from_pipeline(attr_name: str,
-                                           node_filter: Optional[Callable[[PipelineNode], bool]] = None,
+                                           node_filter: Optional[Callable[['PipelineNode'], bool]] = None,
                                            reduce_function: Optional[Callable[[Set], Set]] = None) -> List[Any]:
             """ Extract metadata from atomized pipeline
                 :param attr_name: extracting metadata property
@@ -110,8 +112,8 @@ def description(self, operation_params: Optional[dict] = None) -> str:
         operation_types = map(lambda node: node.operation.operation_type,
                               self.pipeline.nodes)
         operation_types_dict = dict(Counter(operation_types))
-        return f'{operation_type}_length:{operation_length}_depth:{operation_depth}' \
-               f'_types:{operation_types_dict}_id:{operation_id}'
+        return (f'{self.__class__}({operation_type}_length:{operation_length}_depth:{operation_depth}'
+                f'_types:{operation_types_dict}_id:{operation_id})')
 
     @staticmethod
     def assign_tabular_column_types(output_data: OutputData,

diff --git a/fedot/core/operations/atomized_template.py → ...tions/atomized_model/atomized_template.py b/fedot/core/operations/atomized_template.py → ...tions/atomized_model/atomized_template.py
@@ -10,7 +10,7 @@ def __init__(self, node: PipelineNode = None, operation_id: int = None, nodes_fr
         # Need use the imports inside the class because of the problem of circular imports.
         from fedot.core.pipelines.pipeline import Pipeline
         from fedot.core.pipelines.template import PipelineTemplate
-        from fedot.core.operations.atomized_model import AtomizedModel
+        from fedot.core.operations.atomized_model.atomized_model import AtomizedModel
 
         super().__init__()
         self.atomized_model_json_path = None

diff --git a/fedot/core/operations/evaluation/atomized.py b/fedot/core/operations/evaluation/atomized.py
@@ -0,0 +1,54 @@
+import warnings
+from typing import Optional
+
+from fedot.core.data.data import InputData, OutputData
+from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
+from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
+    import DecomposerRegImplementation
+from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
+    import IsolationForestRegImplementation
+from fedot.core.operations.evaluation.operation_implementations. \
+    data_operations.sklearn_filters import LinearRegRANSACImplementation, NonLinearRegRANSACImplementation
+from fedot.core.operations.evaluation.operation_implementations. \
+    data_operations.sklearn_selectors import LinearRegFSImplementation, NonLinearRegFSImplementation
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_decompose import \
+    AtomizedTimeSeriesDecomposer
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_differ import \
+    AtomizedTimeSeriesDiffer
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_sampler import \
+    AtomizedTimeSeriesSampler
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_scaler import \
+    AtomizedTimeSeriesScaler
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_transform_to_time import \
+    AtomizedTimeSeriesToTime
+from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnRegImplementation
+from fedot.core.operations.operation_parameters import OperationParameters
+from fedot.utilities.random import ImplementationRandomStateHandler
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+
+class FedotAtomizedStrategy(EvaluationStrategy):
+    _operations_by_types = {
+        'atomized_ts_differ': AtomizedTimeSeriesDiffer,
+        'atomized_ts_scaler': AtomizedTimeSeriesScaler,
+        'atomized_ts_sampler': AtomizedTimeSeriesSampler,
+        'atomized_ts_to_time': AtomizedTimeSeriesToTime,
+        'atomized_ts_decomposer': AtomizedTimeSeriesDecomposer,
+    }
+
+    def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
+        self.operation_impl = self._convert_to_operation(operation_type)
+        super().__init__(operation_type, params)
+
+    def fit(self, train_data: InputData):
+        model = self.operation_impl(self.params_for_fit.get('pipeline'))
+        return model.fit(train_data)
+
+    def predict(self, trained_operation, predict_data: InputData) -> OutputData:
+        prediction = trained_operation.predict(predict_data)
+        return prediction
+
+    def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData:
+        prediction = trained_operation.predict_for_fit(predict_data)
+        return prediction
diff --git a/...ore/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/...ore/operations/evaluation/operation_implementations/data_operations/ts_transformations.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 import pandas as pd
+
+from fedot.utilities.window_size_selector import WindowSizeSelector
 from golem.core.log import default_log
 from scipy.ndimage import gaussian_filter
 from sklearn.decomposition import TruncatedSVD
@@ -115,6 +117,11 @@ def _check_and_correct_window_size(self, time_series: np.array, forecast_length:
 
             """
 
+        if self.params.get('autotune_window', 0) == 1:
+            new = int(WindowSizeSelector(method='hac', window_range=(5, 25))
+                      .get_window_size(time_series) * len(time_series) / 100)
+            self.params.update(window_size=new, autotune_window=0)
+
         # Maximum threshold
         if self.window_size + forecast_length > len(time_series):
             raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}")

diff --git a/fedot/core/operations/evaluation/operation_implementations/models/atomized/__init__.py b/fedot/core/operations/evaluation/operation_implementations/models/atomized/__init__.py
diff --git a/...ore/operations/evaluation/operation_implementations/models/atomized/atomized_decompose.py b/...ore/operations/evaluation/operation_implementations/models/atomized/atomized_decompose.py
@@ -0,0 +1,43 @@
+from typing import Optional
+
+from fedot.core.data.data import InputData, OutputData
+from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_mixins import \
+    AtomizedTimeSeriesBuildFactoriesMixin
+from fedot.core.pipelines.node import PipelineNode
+from fedot.core.pipelines.pipeline import Pipeline
+
+
+class AtomizedTimeSeriesDecomposer(AtomizedTimeSeriesBuildFactoriesMixin):
+    def __init__(self, pipeline: Optional['Pipeline'] = None):
+        if pipeline is None:
+            pipeline = Pipeline(PipelineNode('ridge'))
+        self.pipeline = pipeline
+
+    def _decompose(self, data: InputData, fit_stage: bool):
+        # get merged data from lagged and any model
+        forecast_length = data.task.task_params.forecast_length
+        data_from_lagged = data.features[:, :-forecast_length]
+        data_from_model = data.features[:, -forecast_length:]
+        new_target = data.target
+        if fit_stage:
+            new_target -= data_from_model
+
+        new_data = InputData(idx=data.idx,
+                             features=data_from_lagged,
+                             target=new_target,
+                             data_type=data.data_type,
+                             task=data.task,
+                             supplementary_data=data.supplementary_data)
+        return new_data
+
+    def fit(self, data: InputData):
+        new_data = self._decompose(data, fit_stage=True)
+        self.pipeline.fit(new_data)
+        return self
+
+    def predict(self, data: InputData) -> OutputData:
+        new_data = self._decompose(data, fit_stage=False)
+        return self.pipeline.predict(new_data)
+
+    def predict_for_fit(self, data: InputData) -> OutputData:
+        return self.predict(data)