Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Atomized model operation #1227

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
640fbce
move atomized to new folder
kasyanovse Dec 14, 2023
d74e386
series decompose model
kasyanovse Dec 14, 2023
0a85475
wip tests
kasyanovse Dec 14, 2023
37492bd
new model
kasyanovse Dec 15, 2023
09f9828
fixes
kasyanovse Dec 15, 2023
d6fdb34
delete ts decomposer
kasyanovse Dec 15, 2023
db18ed9
add scaler
kasyanovse Dec 15, 2023
5d60648
add diff operation
kasyanovse Dec 15, 2023
7c2e51a
wip, start work with mutations are adapted for atomized models
kasyanovse Dec 15, 2023
0ec6851
pipeline adapter works with atomized models now
kasyanovse Dec 18, 2023
6bcdd7e
add mutations for atomized graphs
kasyanovse Dec 18, 2023
d4cf768
fix problem with `functools.wraps`
kasyanovse Dec 18, 2023
57be94f
fix task type bug in atomized model
kasyanovse Dec 18, 2023
3cccc39
Merge branch 'master' into atomized-model-operation
kasyanovse Dec 18, 2023
3442a06
fix atomized mutation in fedot
kasyanovse Dec 18, 2023
1c489bf
small fixes
kasyanovse Dec 18, 2023
6645956
fix tests
kasyanovse Dec 19, 2023
3363637
pep8
kasyanovse Dec 19, 2023
a925074
fix probability in test
kasyanovse Dec 19, 2023
0cb15ec
intermediate save before create new model class
kasyanovse Dec 19, 2023
0a5defd
create new operation type - atomized
kasyanovse Dec 21, 2023
3756c63
crossover
kasyanovse Dec 21, 2023
2dbf081
new model
kasyanovse Dec 22, 2023
299af5f
some adds for testing purpose
kasyanovse Dec 22, 2023
7dfb639
small fix
kasyanovse Dec 25, 2023
d232831
add window size selector
kasyanovse Dec 27, 2023
6d7a92e
add decomposer
kasyanovse Dec 27, 2023
d6f48bb
fix `atomized_ts_to_time` model
kasyanovse Dec 28, 2023
fcf9da5
fix some errors, disable `atomized_ts_decomposer`
kasyanovse Dec 28, 2023
713b1d5
small fix
kasyanovse Dec 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/advanced/additional_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd

from fedot import Fedot
from fedot.core.operations.atomized_model import AtomizedModel
from fedot.core.operations.atomized_model.atomized_model import AtomizedModel
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
Expand Down
42 changes: 37 additions & 5 deletions fedot/api/api_utils/api_params_repository.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import datetime
from typing import Sequence

from fedot.core.optimisers.genetic_operators.crossover import fedot_subtree_crossover, fedot_one_point_crossover
from fedot.core.optimisers.genetic_operators.mutation import fedot_single_edge_mutation, fedot_single_add_mutation, \
fedot_single_change_mutation, fedot_single_drop_mutation, insert_atomized_operation, fedot_tree_growth
from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum
from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum

Expand Down Expand Up @@ -118,6 +122,7 @@ def get_params_for_gp_algorithm_params(self, params: dict) -> dict:
gp_algorithm_params['genetic_scheme_type'] = GeneticSchemeTypesEnum.steady_state

gp_algorithm_params['mutation_types'] = ApiParamsRepository._get_default_mutations(self.task_type, params)
gp_algorithm_params['crossover_types'] = ApiParamsRepository._get_default_crossovers(self.task_type, params)
return gp_algorithm_params

@staticmethod
Expand All @@ -128,14 +133,41 @@ def _get_default_mutations(task_type: TaskTypesEnum, params) -> Sequence[Mutatio
MutationTypesEnum.single_add,
MutationTypesEnum.single_edge]

# TODO remove workaround after boosting mutation fix
# Boosting mutation does not work due to problem with __eq__ with it copy.
# ``partial`` refactor to ``def`` does not work
# Also boosting mutation does not work by it own.
if task_type == TaskTypesEnum.ts_forecasting:
# TODO remove workaround after boosting mutation fix
# Boosting mutation does not work due to problem with __eq__ with it copy.
# ``partial`` refactor to ``def`` does not work
# Also boosting mutation does not work by it own.
# mutations.append(partial(boosting_mutation, params=params))
pass

# TODO remove when tests will ends
# add for testing purpose
mutations = [parameter_change_mutation,
fedot_single_edge_mutation,
fedot_single_add_mutation,
fedot_single_change_mutation,
fedot_single_drop_mutation,
insert_atomized_operation]
# mutations = [fedot_single_edge_mutation,
# fedot_single_add_mutation,
# fedot_single_change_mutation,
# fedot_single_drop_mutation,
# fedot_tree_growth,
# insert_atomized_operation]
# mutations = [fedot_single_add_mutation,
# fedot_single_change_mutation,
# fedot_single_drop_mutation,
# insert_atomized_operation]
else:
mutations.append(add_resample_mutation)

return mutations

@staticmethod
def _get_default_crossovers(task_type: TaskTypesEnum, params) -> Sequence[MutationTypesEnum]:
if task_type == TaskTypesEnum.ts_forecasting:
crossovers = [fedot_subtree_crossover, fedot_one_point_crossover]
crossovers = [fedot_one_point_crossover]
else:
crossovers = [CrossoverTypesEnum.one_point]
return crossovers
15 changes: 13 additions & 2 deletions fedot/api/api_utils/assumptions/task_assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
from typing import List

from fedot.api.api_utils.assumptions.operations_filter import OperationsFilter
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.operation_types_repository import OperationTypesRepository
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.utilities.custom_errors import AbstractMethodNotImplementError
from golem.core.optimisers.opt_graph_builder import OptGraphBuilder


class TaskAssumptions:
Expand Down Expand Up @@ -64,9 +67,17 @@ def builders(self):
.add_branch('polyfit', 'lagged')
.grow_branches(None, 'ridge')
.join_branches('ridge'),
'smoothing_ar':
'atomized_ts_differ':
PipelineBuilder()
.add_sequence('smoothing', 'ar'),
.add_branch('lagged', 'lagged')
.add_node('ridge', branch_idx=0)
.add_node('atomized_ts_differ', branch_idx=1, params={'pipeline': PipelineBuilder()
.add_node('ridge').build()})
.join_branches('ridge'),
'atomized_ts_to_time':
PipelineBuilder()
.add_sequence('lagged', 'ridge')
.add_node('atomized_ts_to_time', params={'pipeline': PipelineBuilder().add_node('ridge').build()}),
}

def ensemble_operation(self) -> str:
Expand Down
6 changes: 4 additions & 2 deletions fedot/core/data/supplementary_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Dict, Optional
from dataclasses import dataclass, field
from typing import Dict, Optional, List

import numpy as np

Expand Down Expand Up @@ -31,6 +31,8 @@ class SupplementaryData:
col_type_ids: Optional[Dict[str, np.ndarray]] = None
# Was the data preprocessed before composer
is_auto_preprocessed: bool = False
# time series bias for time series forecasting problem
time_series_bias: List[np.ndarray] = field(default_factory=list)

@property
def compound_mask(self):
Expand Down
14 changes: 14 additions & 0 deletions fedot/core/operations/atomized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from fedot.core.operations.model import Model
from fedot.core.repository.operation_types_repository import OperationTypesRepository


class Atomized(Model):
"""
Class with fit/predict methods defining the atomized strategy for the task

:param operation_type: name of the model
"""

def __init__(self, operation_type: str):
super().__init__(operation_type=operation_type)
self.operations_repo = OperationTypesRepository('atomized')
Empty file.
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
from collections import Counter
from datetime import timedelta
from functools import reduce
from operator import and_, or_
from typing import Any, Callable, Dict, List, Optional, Set, Union

from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.operation import Operation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.metrics_repository import MetricCallable
from fedot.core.repository.operation_types_repository import OperationMetaInfo, atomized_model_type


Expand All @@ -26,6 +19,11 @@ def __init__(self, pipeline: 'Pipeline'):
super().__init__(operation_type=atomized_model_type())
self.pipeline = pipeline

@property
def acceptable_task_types(self):
root_operation = self.pipeline.root_node.operation
return root_operation.acceptable_task_types

def fit(self, params: Optional[Union[OperationParameters, dict]], data: InputData) -> ('Pipeline', OutputData):
predicted_train = self.pipeline.fit(input_data=data)
fitted_atomized_operation = self.pipeline
Expand All @@ -50,27 +48,31 @@ def predict_for_fit(self,
return self.predict(fitted_operation, data, params, output_mode)

def fine_tune(self,
metric_function: MetricCallable,
metric_function: 'MetricCallable',
input_data: Optional[InputData] = None,
iterations: int = 50,
timeout: int = 5) -> 'AtomizedModel':
""" Method for tuning hyperparameters """
tuner = TunerBuilder(input_data.task) \
.with_tuner(SimultaneousTuner) \
.with_metric(metric_function) \
.with_iterations(iterations) \
.with_timeout(timedelta(minutes=timeout)) \
.build(input_data)
tuned_pipeline = tuner.tune(self.pipeline)
tuned_atomized_model = AtomizedModel(tuned_pipeline)
return tuned_atomized_model
# TODO Fix tuner with atomized model
# cannot be made by that way due to problem with circular import
# TODO add tests for atomized tuning
# origin test was removed
# tuner = TunerBuilder(input_data.task) \
# .with_tuner(SimultaneousTuner) \
# .with_metric(metric_function) \
# .with_iterations(iterations) \
# .with_timeout(timedelta(minutes=timeout)) \
# .build(input_data)
# tuned_pipeline = tuner.tune(self.pipeline)
# tuned_atomized_model = AtomizedModel(tuned_pipeline)
# return tuned_atomized_model

@property
def metadata(self) -> OperationMetaInfo:
root_node = self.pipeline.root_node

def extract_metadata_from_pipeline(attr_name: str,
node_filter: Optional[Callable[[PipelineNode], bool]] = None,
node_filter: Optional[Callable[['PipelineNode'], bool]] = None,
reduce_function: Optional[Callable[[Set], Set]] = None) -> List[Any]:
""" Extract metadata from atomized pipeline
:param attr_name: extracting metadata property
Expand Down Expand Up @@ -110,8 +112,8 @@ def description(self, operation_params: Optional[dict] = None) -> str:
operation_types = map(lambda node: node.operation.operation_type,
self.pipeline.nodes)
operation_types_dict = dict(Counter(operation_types))
return f'{operation_type}_length:{operation_length}_depth:{operation_depth}' \
f'_types:{operation_types_dict}_id:{operation_id}'
return (f'{self.__class__}({operation_type}_length:{operation_length}_depth:{operation_depth}'
f'_types:{operation_types_dict}_id:{operation_id})')

@staticmethod
def assign_tabular_column_types(output_data: OutputData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, node: PipelineNode = None, operation_id: int = None, nodes_fr
# Need use the imports inside the class because of the problem of circular imports.
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.template import PipelineTemplate
from fedot.core.operations.atomized_model import AtomizedModel
from fedot.core.operations.atomized_model.atomized_model import AtomizedModel

super().__init__()
self.atomized_model_json_path = None
Expand Down
54 changes: 54 additions & 0 deletions fedot/core/operations/evaluation/atomized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import warnings
from typing import Optional

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
import DecomposerRegImplementation
from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters \
import IsolationForestRegImplementation
from fedot.core.operations.evaluation.operation_implementations. \
data_operations.sklearn_filters import LinearRegRANSACImplementation, NonLinearRegRANSACImplementation
from fedot.core.operations.evaluation.operation_implementations. \
data_operations.sklearn_selectors import LinearRegFSImplementation, NonLinearRegFSImplementation
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_decompose import \
AtomizedTimeSeriesDecomposer
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_differ import \
AtomizedTimeSeriesDiffer
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_sampler import \
AtomizedTimeSeriesSampler
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_scaler import \
AtomizedTimeSeriesScaler
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_transform_to_time import \
AtomizedTimeSeriesToTime
from fedot.core.operations.evaluation.operation_implementations.models.knn import FedotKnnRegImplementation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.utilities.random import ImplementationRandomStateHandler

warnings.filterwarnings("ignore", category=UserWarning)


class FedotAtomizedStrategy(EvaluationStrategy):
_operations_by_types = {
'atomized_ts_differ': AtomizedTimeSeriesDiffer,
'atomized_ts_scaler': AtomizedTimeSeriesScaler,
'atomized_ts_sampler': AtomizedTimeSeriesSampler,
'atomized_ts_to_time': AtomizedTimeSeriesToTime,
'atomized_ts_decomposer': AtomizedTimeSeriesDecomposer,
}

def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
self.operation_impl = self._convert_to_operation(operation_type)
super().__init__(operation_type, params)

def fit(self, train_data: InputData):
model = self.operation_impl(self.params_for_fit.get('pipeline'))
return model.fit(train_data)

def predict(self, trained_operation, predict_data: InputData) -> OutputData:
prediction = trained_operation.predict(predict_data)
return prediction

def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData:
prediction = trained_operation.predict_for_fit(predict_data)
return prediction
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np
import pandas as pd

from fedot.utilities.window_size_selector import WindowSizeSelector
from golem.core.log import default_log
from scipy.ndimage import gaussian_filter
from sklearn.decomposition import TruncatedSVD
Expand Down Expand Up @@ -115,6 +117,11 @@ def _check_and_correct_window_size(self, time_series: np.array, forecast_length:

"""

if self.params.get('autotune_window', 0) == 1:
new = int(WindowSizeSelector(method='hac', window_range=(5, 25))
.get_window_size(time_series) * len(time_series) / 100)
self.params.update(window_size=new, autotune_window=0)

# Maximum threshold
if self.window_size + forecast_length > len(time_series):
raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Optional

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.operation_implementations.models.atomized.atomized_ts_mixins import \
AtomizedTimeSeriesBuildFactoriesMixin
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline


class AtomizedTimeSeriesDecomposer(AtomizedTimeSeriesBuildFactoriesMixin):
def __init__(self, pipeline: Optional['Pipeline'] = None):
if pipeline is None:
pipeline = Pipeline(PipelineNode('ridge'))
self.pipeline = pipeline

def _decompose(self, data: InputData, fit_stage: bool):
# get merged data from lagged and any model
forecast_length = data.task.task_params.forecast_length
data_from_lagged = data.features[:, :-forecast_length]
data_from_model = data.features[:, -forecast_length:]
new_target = data.target
if fit_stage:
new_target -= data_from_model

new_data = InputData(idx=data.idx,
features=data_from_lagged,
target=new_target,
data_type=data.data_type,
task=data.task,
supplementary_data=data.supplementary_data)
return new_data

def fit(self, data: InputData):
new_data = self._decompose(data, fit_stage=True)
self.pipeline.fit(new_data)
return self

def predict(self, data: InputData) -> OutputData:
new_data = self._decompose(data, fit_stage=False)
return self.pipeline.predict(new_data)

def predict_for_fit(self, data: InputData) -> OutputData:
return self.predict(data)
Loading
Loading