Skip to content

Commit

Permalink
add monash example
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Apr 16, 2024
1 parent 79f75bb commit e692c87
Show file tree
Hide file tree
Showing 8 changed files with 2,872 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
metric='rmse',
timeout=15,
with_tuning=False,
pop_size=10,
industrial_strategy='forecasting_assumptions',
n_jobs=2,
logging_level=30)
Expand Down

Large diffs are not rendered by default.

16 changes: 9 additions & 7 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
from fedot.core.pipelines.pipeline import Pipeline

from golem.core.optimisers.opt_history_objects.opt_history import OptHistory

from fedot.core.pipelines.adapters import PipelineAdapter
from fedot.core.utils import fedot_project_root
from fedot.core.visualisation.pipeline_specific_visuals import PipelineHistoryVisualizer
from fedot_ind.api.utils.checkers_collections import DataCheck
from fedot_ind.api.utils.industrial_strategy import IndustrialStrategy
Expand Down Expand Up @@ -107,6 +104,7 @@ def __init__(self, **kwargs):
self.predicted_probs = None
self.predict_data = None
self.target_encoder = None
self.is_finetuned = False

# map Fedot params to Industrial params
self.config_dict = kwargs
Expand Down Expand Up @@ -177,6 +175,7 @@ def fit(self,
self.solver = self.industrial_strategy_class.fit(self.train_data)
else:
self.solver.fit(self.train_data)
self.is_finetuned = False

def predict(self,
predict_data: tuple,
Expand All @@ -196,7 +195,7 @@ def predict(self,
self.predict_data = DataCheck(input_data=self.predict_data,
task=self.config_dict['problem'],
task_params=self.task_params).check_input_data()
if self.industrial_strategy is not None:
if self.industrial_strategy is not None and not self.is_finetuned:
self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode)
else:
if self.condition_check.solver_is_fedot_class(self.solver):
Expand Down Expand Up @@ -234,7 +233,7 @@ def predict_proba(self,
self.predict_data = DataCheck(input_data=self.predict_data,
task=self.config_dict['problem'],
task_params=self.task_params).check_input_data()
if self.industrial_strategy is not None:
if self.industrial_strategy is not None and not self.is_finetuned:
self.predicted_labels = self.industrial_strategy_class.predict(self.predict_data, predict_mode)
else:
if self.condition_check.solver_is_fedot_class(self.solver):
Expand Down Expand Up @@ -263,7 +262,9 @@ def finetune(self,
"""
if not self.condition_check.input_data_is_fedot_type(train_data):
input_preproc = DataCheck(input_data=train_data, task=self.config_dict['problem'])
input_preproc = DataCheck(input_data=train_data,
task=self.config_dict['problem'],
task_params=self.task_params)
train_data = input_preproc.check_input_data()
self.target_encoder = input_preproc.get_target_encoder()
tuning_params = ApiConverter.tuning_params_is_none(tuning_params)
Expand All @@ -275,12 +276,13 @@ def finetune(self,
elif not self.condition_check.solver_is_none(model_to_tune):
model_to_tune = model_to_tune
else:
model_to_tune = deepcopy(self.config_dict['initial_assumption'])
model_to_tune = deepcopy(self.config_dict['initial_assumption']).build()
tuning_params['tuner'] = tuner_type
pipeline_tuner, model_to_tune = build_tuner(self, model_to_tune, tuning_params, train_data, mode)
if abs(pipeline_tuner.obtained_metric) > tuned_metric:
tuned_metric = abs(pipeline_tuner.obtained_metric)
self.solver = model_to_tune
self.is_finetuned = True

def get_metrics(self,
target: Union[list, np.array] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,11 @@ def _create_channel_params(self, train_data):
family, link = self._check_glm_params(kurtosis(train_data.features), skew(train_data.features))
self.multi_dim_dispatcher.params_for_fit = {'family': family,
'link': link}
return train_data

def fit(self, train_data: InputData):
train_data = self.multi_dim_dispatcher._convert_input_data(train_data)
self._create_channel_params(train_data)
train_data = self._create_channel_params(train_data)
return self.multi_dim_dispatcher.fit(train_data)

def predict(self, trained_operation, predict_data: InputData, output_mode: str = 'labels') -> OutputData:
Expand Down
9 changes: 6 additions & 3 deletions fedot_ind/core/repository/constanst_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,16 +254,19 @@ class FedotOperationConstant(Enum):
'classification': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node(
'logit'),
'regression': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('treg'),
'ts_forecasting': PipelineBuilder().add_node('ar')
'ts_forecasting': PipelineBuilder().add_node('eigen_basis',
params={'low_rank_approximation': False,
'rank_regularization': 'explained_dispersion'}).add_node(
'ar')
}

FEDOT_TS_FORECASTING_ASSUMPTIONS = {
'lagged_ridge': PipelineBuilder().add_node('lagged').add_node('ridge'),
'eigen_ar': PipelineBuilder().add_node('eigen_basis',
params={'low_rank_approximation': False,
'rank_regularization': 'explained_dispersion'}).add_node('ar'),
'glm': PipelineBuilder().add_node('glm')
}
'cgru': PipelineBuilder().add_node("lagged").add_node('cgru', params={'loss': 'mse',
'optimizer': 'adamw'})}

FEDOT_ENSEMBLE_ASSUMPTIONS = {
'classification': PipelineBuilder().add_node('logit'),
Expand Down
32 changes: 31 additions & 1 deletion fedot_ind/core/repository/industrial_implementations/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,33 @@ def transform_smoothing(self, input_data: InputData) -> OutputData:
return output_data


def _check_and_correct_window_size(self, time_series: np.ndarray, forecast_length: int):
""" Method check if the length of the time series is not enough for
lagged transformation
Args:
time_series: time series for transformation
forecast_length: forecast length
Returns:
"""
max_allowed_window_size = max(1, round((len(time_series) - forecast_length - 1) * 0.25))
window_list = list(range(3 * forecast_length, max_allowed_window_size, round(1.5 * forecast_length)))

if self.window_size == 0 or self.window_size > max_allowed_window_size:
window_size = np.random.choice(window_list)
self.log.message((f"Window size of lagged transformation was changed "
f"by WindowSizeSelector from {self.params.get('window_size')} to {window_size}"))
self.params.update(window_size=window_size)

# Minimum threshold
if self.window_size < self.window_size_minimum:
self.log.info((f"Warning: window size of lagged transformation was changed "
f"from {self.params.get('window_size')} to {self.window_size_minimum}"))
self.params.update(window_size=self.window_size_minimum)


def transform_lagged_for_fit(self, input_data: InputData) -> OutputData:
"""Method for transformation of time series to lagged form for fit stage
Expand All @@ -249,7 +276,10 @@ def transform_lagged_for_fit(self, input_data: InputData) -> OutputData:
forecast_length = new_input_data.task.task_params.forecast_length
# Correct window size parameter
self._check_and_correct_window_size(new_input_data.features, forecast_length)
window_size = 3*forecast_length
window_list = list(range(3 * forecast_length,
round(input_data.features.shape[0] * 0.25),
round(1.5 * forecast_length)))
window_size = np.random.choice(window_list)
new_idx, transformed_cols, new_target = transform_features_and_target_into_lagged(
input_data,
forecast_length,
Expand Down
4 changes: 3 additions & 1 deletion fedot_ind/core/repository/initializer_industrial_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.repository.industrial_implementations.abstract import merge_predicts, preprocess_predicts, \
predict_for_fit, predict, predict_operation, postprocess_predicts, update_column_types, transform_lagged, \
transform_lagged_for_fit, transform_smoothing, _build, split_any
transform_lagged_for_fit, transform_smoothing, _build, split_any, _check_and_correct_window_size
from fedot_ind.core.repository.industrial_implementations.optimisation import _get_default_industrial_mutations, \
MutationStrengthEnumIndustrial, has_no_data_flow_conflicts_in_industrial_pipeline, _crossover_by_type
from fedot_ind.core.tuning.search_space import get_industrial_search_space
Expand Down Expand Up @@ -79,6 +79,8 @@ def setup_repository(self):
setattr(LaggedImplementation, 'transform', transform_lagged)
setattr(LaggedImplementation, 'transform_for_fit',
transform_lagged_for_fit)
setattr(LaggedImplementation, '_check_and_correct_window_size',
_check_and_correct_window_size)
setattr(TsSmoothingImplementation, 'transform', transform_smoothing)

class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)
Expand Down
110 changes: 0 additions & 110 deletions fedot_ind/core/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,116 +519,6 @@ def get_industrial_search_space(self):
'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']],
'type': 'categorical'}
},
'fast_ica': {
'n_components': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [1, 20],
'type': 'discrete'},
'fun': {
'hyperopt-dist': hp.choice,
'sampling-scope': [['logcosh', 'exp', 'cube']],
'type': 'categorical'}
},
'ransac_lin_reg': {
'min_samples': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.1, 0.9],
'type': 'continuous'},
'residual_threshold': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.1, 1000],
'type': 'continuous'},
'max_trials': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [50, 500],
'type': 'continuous'},
'max_skips': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [50, 500000],
'type': 'continuous'}
},
'ransac_non_lin_reg': {
'min_samples': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.1, 0.9],
'type': 'continuous'},
'residual_threshold': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.1, 1000],
'type': 'continuous'},
'max_trials': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [50, 500],
'type': 'continuous'},
'max_skips': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [50, 500000],
'type': 'continuous'}
},
'isolation_forest_reg': {
'max_samples': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.05, 0.99],
'type': 'continuous'},
'max_features': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.05, 0.99],
'type': 'continuous'},
'bootstrap': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'}
},
'isolation_forest_class': {
'max_samples': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.05, 0.99],
'type': 'continuous'},
'max_features': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.05, 0.99],
'type': 'continuous'},
'bootstrap': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'}
},
'rfe_lin_reg': {
'n_features_to_select': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.5, 0.9],
'type': 'continuous'},
'step': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.1, 0.2],
'type': 'continuous'}
},
'rfe_non_lin_reg': {
'n_features_to_select': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.5, 0.9],
'type': 'continuous'},
'step': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.1, 0.2],
'type': 'continuous'}
},
'poly_features': {
'degree': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [2, 5],
'type': 'discrete'},
'interaction_only': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'}
},
'polyfit': {
'degree': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [1, 6],
'type': 'discrete'}
},
'lagged': {
'window_size': {
'hyperopt-dist': hp.uniformint,
Expand Down

0 comments on commit e692c87

Please sign in to comment.