diff --git a/fedot_ind/api/exper.py b/fedot_ind/api/exper.py index 108105b4f..bc8a5bf98 100644 --- a/fedot_ind/api/exper.py +++ b/fedot_ind/api/exper.py @@ -3,43 +3,49 @@ if __name__ == "__main__": - # datasets_bad_f1 = [ - # 'EOGVerticalSignal', - # 'ScreenType', - # 'CricketY', - # 'ElectricDevices', - # 'Lightning7' - # ] + datasets_bad_f1 = [ + #'EOGVerticalSignal', + # 'ScreenType', + # 'CricketY', + # 'ElectricDevices', + 'Lightning7' + ] - # datasets_good_f1 = [ - # 'Car', - # 'ECG5000', - # 'Phoneme', - # 'Meat', + datasets_good_f1 = [ + 'Car', + 'ECG5000', + "Beef", + # 'Phoneme', + #'Meat', # 'RefrigerationDevices' - # ] + ] datasets_good_roc = [ - 'Chinatown', - # 'Earthquakes', - # 'Ham', - # 'ECG200', - # 'MiddlePhalanxOutlineCorrect', - # 'MoteStrain', - # 'TwoLeadECG' + # 'Chinatown', + 'Computers', + # 'Earthquakes', + 'Ham', + 'ECG200', + 'ECGFiveDays' + # 'MiddlePhalanxOutlineCorrect', + # 'MoteStrain', + # 'TwoLeadECG' ] + # node_scaling = PipelineNode('scaling') + # node_final = PipelineNode('rf', nodes_from=[node_scaling]) + # rf_model = Pipeline(node_final) - # datasets_bad_roc = [ - # 'Lightning2', - # 'WormsTwoClass', - # 'DistalPhalanxOutlineCorrect' - # ] + datasets_bad_roc = [ + 'Lightning2', + # 'WormsTwoClass', + # 'DistalPhalanxOutlineCorrect' + ] for group in [ - # datasets_bad_f1, - # datasets_good_f1, + datasets_bad_f1, + datasets_good_f1, datasets_good_roc, - # datasets_bad_roc + datasets_bad_roc ]: for dataset_name in group: @@ -52,15 +58,14 @@ # 'wavelet_basis', 'data_driven_basis' ], - tuning_iterations=10, - tuning_timeout=2, + tuning_iterations=30, + tuning_timeout=15.0, use_cache=False, - timeout=1, - n_jobs=2, + timeout=10, + n_jobs=6, ) train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() - model = industrial.fit(features=train_data[0], target=train_data[1]) labels = industrial.predict(features=test_data[0], target=test_data[1]) @@ -70,5 +75,5 @@ metric_names=['f1', 'roc_auc']) for pred, kind in zip([labels, probs], ['labels', 'probs']): industrial.save_predict(predicted_data=pred, kind=kind) - industrial.save_metrics(metrics=metric) + _ = 1 diff --git a/fedot_ind/api/rank_experiment.py b/fedot_ind/api/rank_experiment.py index d676e6607..a22d71931 100644 --- a/fedot_ind/api/rank_experiment.py +++ b/fedot_ind/api/rank_experiment.py @@ -1,18 +1,84 @@ import os import numpy as np -import pandas as pd -from fedot.core.data.data import InputData import matplotlib.pyplot as plt +from fedot.api.main import Fedot +from sklearn.metrics import f1_score, roc_auc_score from fedot_ind.api.main import FedotIndustrial from fedot_ind.core.architecture.preprocessing.DatasetLoader import DataLoader from fedot_ind.core.models.statistical.StatsExtractor import StatsExtractor from fedot_ind.core.operation.transformation.basis.data_driven import DataDrivenBasisImplementation +from sklearn.neural_network import MLPClassifier +from sklearn.preprocessing import StandardScaler + + +def extract_features(train_data, bss): + basis_1d_raw = bss._transform(train_data[0]) + feature_train = stats_model.transform(basis_1d_raw) + return feature_train, bss + + +def evaluate_model(feature_train, bss, test_data, model_type: str = 'MLP'): + if len(np.unique(test_data[1])) > 2: + metric_name = 'f1' + else: + metric_name = 'roc_auc' + + if model_type == 'MLP': + clf = MLPClassifier(hidden_layer_sizes=(150, 100, 50), max_iter=300, activation='relu', solver='adam', + random_state=42) + else: + clf = Fedot( + # available_operations=['fast_ica', 'scaling','normalization', + # 'xgboost', + # 'rf', + # 'logit', + # 'mlp', + # 'knn', + # 'pca'], + metric=metric_name, timeout=10, problem='classification', n_jobs=6) + + scaler = StandardScaler() + scaler.fit(feature_train) + feature_train = scaler.transform(feature_train) + clf.fit(feature_train, train_data[1]) + basis_1d_raw = bss._transform(test_data[0]) + test_feature = stats_model.transform(basis_1d_raw) + test_feature = scaler.transform(test_feature) + if len(np.unique(test_data[1])) > 2: + metric = f1_score(test_data[1], clf.predict(test_feature), average='weighted') + else: + metric = roc_auc_score(test_data[1], clf.predict(test_feature), average='weighted') + return metric, test_feature + + +# def visualise_and_save(): +# for class_number in np.unique(train_data[1]): +# for basis_name, basis in zip(['basis_before_power_iterations', 'basis_after_power_iterations'], +# [basis_1d_raw, basis_1d_approx]): +# class_idx = np.where(train_data[1] == class_number)[0] +# class_slice = np.take(basis, class_idx, 0) +# pd.DataFrame(np.median(class_slice, axis=0)).T.plot() +# # plt.show() +# plt.savefig(f'{dataset_name}/{basis_name}_{class_number}_median_component.png', bbox_inches='tight') +# # plt.title(f'mean_{basis_name}_components_for_{class_number}_class') +# rank_distrib = pd.DataFrame([rank_distribution_befor, rank_distribution_after]).T +# rank_distrib.columns = ['HT_approach', +# 'Proposed_approach'] +# rank_distrib.plot(kind='kde') +# # plt.show() +# rank_dispersion_ht = np.round(rank_distrib['HT_approach'].std(), 3) +# rank_dispersion_new = np.round(rank_distrib['Proposed_approach'].std(), 3) +# plt.savefig(f'{dataset_name}/rank_distrib. ' +# f'Classical_rank_{low_rank_befor}_std_{rank_dispersion_ht}.' +# f'New_{low_rank_after}_std_{rank_dispersion_new}.png', bbox_inches='tight') +# rank_distrib['classes'] = train_data[1] + if __name__ == "__main__": datasets_bad_f1 = [ - # 'EOGVerticalSignal', + #'EOGVerticalSignal', # 'ScreenType', # 'CricketY', # 'ElectricDevices', @@ -20,11 +86,11 @@ ] datasets_good_f1 = [ - # 'Car', - # 'ECG5000', - # "Beef" + 'Car', + 'ECG5000', + "Beef", # 'Phoneme', - # 'Meat', + 'Meat', # 'RefrigerationDevices' ] @@ -39,12 +105,16 @@ # 'MoteStrain', # 'TwoLeadECG' ] + # node_scaling = PipelineNode('scaling') + # node_final = PipelineNode('rf', nodes_from=[node_scaling]) + # rf_model = Pipeline(node_final) datasets_bad_roc = [ 'Lightning2', # 'WormsTwoClass', # 'DistalPhalanxOutlineCorrect' ] + stats_model = StatsExtractor({'window_mode': False, 'window_size': 5, 'use_cache': False, 'n_jobs': 4}) for group in [ datasets_bad_f1, @@ -63,7 +133,7 @@ # 'wavelet_basis', 'data_driven_basis' ], - tuning_iterations=10, + tuning_iterations=30, tuning_timeout=15, use_cache=False, timeout=5, @@ -73,37 +143,19 @@ os.makedirs(f'./{dataset_name}') except Exception: _ = 1 + train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() + # bss = DataDrivenBasisImplementation({'sv_selector': 'median', 'window_size': 20}) + # bss.low_rank_approximation = False + # train_feature, bss = extract_features(train_data, bss) + # f1_HT, test_feature = evaluate_model(train_feature, bss, test_data,model_type='Auto') + bss = DataDrivenBasisImplementation({'sv_selector': 'median', 'window_size': 20}) - bss.low_rank_approximation = False - basis_1d_raw = bss._transform(train_data[0]) - rank_distribution_befor = bss.rank_distribution - low_rank_befor = bss.SV_threshold bss.low_rank_approximation = True bss.SV_threshold = None - basis_1d_approx = bss._transform(train_data[0]) - rank_distribution_after = bss.rank_distribution - low_rank_after = bss.SV_threshold - - HT_feature = stats_model.transform(basis_1d_raw) - for class_number in np.unique(train_data[1]): - for basis_name, basis in zip(['basis_before_power_iterations', 'basis_after_power_iterations'], - [basis_1d_raw, basis_1d_approx]): - class_idx = np.where(train_data[1] == class_number)[0] - class_slice = np.take(basis, class_idx, 0) - pd.DataFrame(np.median(class_slice, axis=0)).T.plot() - # plt.show() - plt.savefig(f'{dataset_name}/{basis_name}_{class_number}_median_component.png', bbox_inches='tight') - # plt.title(f'mean_{basis_name}_components_for_{class_number}_class') - rank_distrib = pd.DataFrame([rank_distribution_befor, rank_distribution_after]).T - rank_distrib.columns = ['HT_approach', - 'Proposed_approach'] - rank_distrib.plot(kind='kde') - # plt.show() - rank_dispersion_ht = np.round(rank_distrib['HT_approach'].std(), 3) - rank_dispersion_new = np.round(rank_distrib['Proposed_approach'].std(), 3) - plt.savefig(f'{dataset_name}/rank_distrib. ' - f'Classical_rank_{low_rank_befor}_std_{rank_dispersion_ht}.' - f'New_{low_rank_after}_std_{rank_dispersion_new}.png', bbox_inches='tight') - rank_distrib['classes'] = train_data[1] + train_feature, bss = extract_features(train_data, bss) + f1_PI, test_feature_PI = evaluate_model(train_feature, bss, test_data,model_type='Auto') + print(f'Dataset-{dataset_name}') + #print(f'HT_metric-{f1_HT}') + print(f'PI_metric-{f1_PI}') _ = 1 diff --git a/fedot_ind/api/utils/saver_collections.py b/fedot_ind/api/utils/saver_collections.py index 7bacc931f..cb2bb4118 100644 --- a/fedot_ind/api/utils/saver_collections.py +++ b/fedot_ind/api/utils/saver_collections.py @@ -14,7 +14,9 @@ def __init__(self, dataset_name: str, generator_name: str, output_dir: str = Non self.logger = logging.getLogger(self.__class__.__name__) self.save_method_dict = {'labels': self.save_labels, 'probs': self.save_probs, - 'metrics': self.save_metrics} + 'metrics': self.save_metrics, + 'baseline_metrics': self.save_baseline_metrics + } def __init_save_path(self, dataset_name, generator_name, output_dir): if output_dir is None: @@ -46,3 +48,6 @@ def save_metrics(self, metrics: dict): df = pd.DataFrame(metrics, index=[0]) df.to_csv(os.path.join(self.path, 'metrics.csv')) + def save_baseline_metrics(self, metrics: dict): + df = pd.DataFrame(metrics, index=[0]) + df.to_csv(os.path.join(self.path, 'baseline_metrics.csv')) diff --git a/fedot_ind/core/architecture/experiment/TimeSeriesClassifierPreset.py b/fedot_ind/core/architecture/experiment/TimeSeriesClassifierPreset.py index 0665ada11..1d60efb01 100644 --- a/fedot_ind/core/architecture/experiment/TimeSeriesClassifierPreset.py +++ b/fedot_ind/core/architecture/experiment/TimeSeriesClassifierPreset.py @@ -15,7 +15,6 @@ from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum from golem.core.tuning.simultaneous import SimultaneousTuner - from fedot_ind.api.utils.saver_collections import ResultSaver from fedot_ind.core.architecture.postprocessing.Analyzer import PerformanceAnalyzer from fedot_ind.core.architecture.utils.utils import default_path_to_save_results @@ -49,7 +48,7 @@ def __init__(self, params: Optional[OperationParameters] = None): self.model_params = params.get('model_params') self.dataset_name = params.get('dataset') self.tuning_iters = params.get('tuning_iterations', 30) - self.tuning_timeout = params.get('tuning_timeout', 15) + self.tuning_timeout = params.get('tuning_timeout', 15.0) self.output_folder = params.get('output_folder', default_path_to_save_results()) self.saver = ResultSaver(dataset_name=self.dataset_name, @@ -125,7 +124,10 @@ def _build_pipeline(self): for index, (basis, extractor) in enumerate(zip(self.branch_nodes, self.extractors)): pipeline_builder.add_node(basis, branch_idx=index) pipeline_builder.add_node(extractor, branch_idx=index) - pipeline_builder.join_branches('rf') + pipeline_builder.join_branches('mlp', params={'hidden_layer_sizes': (150, 100, 50), + 'max_iter': 300, + 'activation': 'relu', + 'solver': 'adam', }) return pipeline_builder.build() @@ -194,7 +196,18 @@ def fit(self, features, metric = 'roc_auc' if train_data_preprocessed.num_classes == 2 else 'f1' self.model_params.update({'metric': metric}) - self.predictor = Fedot(**self.model_params) + self.predictor = Fedot(available_operations=['scaling', + 'normalization', + 'fast_ica', + 'xgboost', + 'rfr', + 'rf', + 'logit', + 'mlp', + 'knn', + 'lgbm', + 'pca'] + , **self.model_params) self.predictor.fit(train_data_preprocessed) @@ -218,7 +231,7 @@ def predict(self, features: pd.DataFrame, target: np.array) -> dict: target=test_data_preprocessed.target, data_type=test_data_preprocessed.data_type, task=test_data_preprocessed.task) - self.prediction_label_baseline = self.baseline_model.predict(self.test_data_preprocessed).predict + self.prediction_label_baseline = self.baseline_model.predict(self.test_data_preprocessed,'labels').predict self.prediction_label = self.predictor.predict(self.test_data_preprocessed) return self.prediction_label @@ -228,7 +241,7 @@ def predict_proba(self, features, target) -> dict: test_data_preprocessed = self.preprocessing_pipeline.root_node.predict(test_data) self.test_data_preprocessed.predict = np.squeeze(test_data_preprocessed.predict) - self.prediction_proba_baseline = self.baseline_model.predict(self.test_data_preprocessed,'probs').predict + self.prediction_proba_baseline = self.baseline_model.predict(self.test_data_preprocessed, 'probs').predict self.prediction_proba = self.predictor.predict_proba(self.test_data_preprocessed) return self.prediction_proba diff --git a/fedot_ind/core/operation/transformation/basis/data_driven.py b/fedot_ind/core/operation/transformation/basis/data_driven.py index 1f81623d8..b2ae0395e 100644 --- a/fedot_ind/core/operation/transformation/basis/data_driven.py +++ b/fedot_ind/core/operation/transformation/basis/data_driven.py @@ -1,5 +1,4 @@ import math -import time from multiprocessing import Pool from typing import Tuple, TypeVar, Optional @@ -11,8 +10,7 @@ from sklearn.metrics import f1_score, roc_auc_score from tensorly.decomposition import parafac from tqdm import tqdm - -from fedot_ind.core.architecture.preprocessing import InputData +from fedot.core.data.data import InputData from fedot_ind.core.operation.decomposition.matrix_decomposition.fast_svd import RSVDDecomposition from fedot_ind.core.operation.transformation.basis.abstract_basis import BasisDecompositionImplementation @@ -39,7 +37,8 @@ def __init__(self, params: Optional[OperationParameters] = None): self.window_size = params.get('window_size') self.basis = None self.SV_threshold = None - self.sv_selector = params.get('sv_selector') + #self.sv_selector = params.get('sv_selector') + self.sv_selector = 'median' self.svd_estimator = RSVDDecomposition() self.low_rank_approximation = True self.logging_params.update({'WS': self.window_size, diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 3a14f7d8f..2943d2cd6 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -2,6 +2,12 @@ "rf": { "n_jobs": 1 }, + "mlp":{ + + "max_iter":300, + "activation":"relu", + "solver":"adam" + }, "rfr": { "n_jobs": 1 }, diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index b38e05138..858ee005d 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -4,18 +4,19 @@ industrial_search_space = { 'data_driven_basis': { - 'sv_selector': (hp.choice, [['median', 'mean', '0.25%']]), - 'window_size': (hp.choice, [[x for x in range(5, 50, 5)]])}, + #'sv_selector': {'hyperopt-dist': hp.choice, 'sampling-scope': [['median', '0.75%', '0.25%']]}, + 'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}}, 'wavelet_basis': - {'n_components': (hp.uniformint, [2, 10]), - 'wavelet': (hp.choice, [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']])}, + {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, + 'wavelet': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}}, 'fourier_basis': - {'spectrum': (hp.choice, [['smoothed']]), - 'threshold': (hp.uniformint, [10000, 50000])}, + {'spectrum': {'hyperopt-dist': hp.choice, 'sampling-scope': [['smoothed']]}, + 'threshold': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [10000, 50000]}}, 'quantile_extractor': - {'window_mode': (hp.choice, [[True, False]]), - 'window_size': (hp.choice, [[x for x in range(1, 50, 3)]])}, + {'window_mode': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, True]]}, + 'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 50, 3)]]}}, 'recurrence_extractor': {'win_mode': (hp.choice, [[True, False]]), @@ -24,19 +25,20 @@ 'max_signal_ratio': (hp.uniform, [0.5, 1]), 'rec_metric': (hp.choice, [['chebyshev', 'cosine', 'euclidean' 'mahalanobis']])}, 'signal_extractor': - {'wavelet': (hp.choice, [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']])} + {'wavelet': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}} } def get_industrial_search_space(self): parameters_per_operation = { 'kmeans': { - 'n_clusters': (hp.uniformint, [2, 7]) + 'n_clusters': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 7]} }, 'adareg': { - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'loss': (hp.choice, [["linear", "square", "exponential"]]) + 'learning_rate': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(1e-3), np.log(1)]}, + 'loss': {'hyperopt-dist': hp.choice, 'sampling-scope': [["linear", "square", "exponential"]]} }, 'gbr': { @@ -50,20 +52,20 @@ def get_industrial_search_space(self): 'alpha': (hp.uniform, [0.75, 0.99]) }, 'logit': { - 'C': (hp.uniform, [1e-2, 10.0]) + 'C': {'hyperopt-dist': hp.uniform, 'sampling-scope': [1e-2, 10.0]} }, 'rf': { - 'criterion': (hp.choice, [["gini", "entropy"]]), - 'max_features': (hp.uniform, [0.05, 1.0]), - 'min_samples_split': (hp.uniformint, [2, 10]), - 'min_samples_leaf': (hp.uniformint, [1, 15]), - 'bootstrap': (hp.choice, [[True, False]]) + 'criterion': {'hyperopt-dist': hp.choice, 'sampling-scope': [["gini", "entropy"]]}, + 'max_features': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.05, 1.0]}, + 'min_samples_split': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, + 'min_samples_leaf': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 15]}, + 'bootstrap': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]} }, 'lasso': { - 'alpha': (hp.uniform, [0.01, 10.0]) + 'alpha': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.01, 10.0]} }, 'ridge': { - 'alpha': (hp.uniform, [0.01, 10.0]) + 'alpha': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.01, 10.0]} }, 'rfr': { @@ -82,21 +84,22 @@ def get_industrial_search_space(self): }, 'xgboost': { - 'max_depth': (hp.uniformint, [1, 7]), - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'subsample': (hp.uniform, [0.05, 0.99]), - 'min_child_weight': (hp.uniform, [1, 21]) + 'max_depth': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 7]}, + 'learning_rate': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(1e-3), np.log(1)]}, + 'subsample': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.05, 0.99]}, + 'min_child_weight': {'hyperopt-dist': hp.uniform, 'sampling-scope': [1, 21]} }, 'svr': { - 'loss': (hp.choice, [["epsilon_insensitive", "squared_epsilon_insensitive"]]), - 'tol': (hp.loguniform, [np.log(1e-5), np.log(1e-1)]), - 'C': (hp.uniform, [1e-4, 25.0]), - 'epsilon': (hp.uniform, [1e-4, 1.0]) + 'loss': {'hyperopt-dist': hp.choice, + 'sampling-scope': [["epsilon_insensitive", "squared_epsilon_insensitive"]]}, + 'tol': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(1e-5), np.log(1e-1)]}, + 'C': {'hyperopt-dist': hp.uniform, 'sampling-scope': [1e-4, 25.0]}, + 'epsilon': {'hyperopt-dist': hp.uniform, 'sampling-scope': [1e-4, 1.0]} }, 'dtreg': { - 'max_depth': (hp.uniformint, [1, 11]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]) + 'max_depth': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 11]}, + 'min_samples_split': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 21]}, + 'min_samples_leaf': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 21]} }, 'treg': { @@ -106,19 +109,19 @@ def get_industrial_search_space(self): 'bootstrap': (hp.choice, [[True, False]]) }, 'dt': { - 'max_depth': (hp.uniformint, [1, 11]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]) + 'max_depth': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 11]}, + 'min_samples_split': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 21]}, + 'min_samples_leaf': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 21]} }, 'knnreg': { - 'n_neighbors': (hp.uniformint, [1, 50]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]]) + 'n_neighbors': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 50]}, + 'weights': {'hyperopt-dist': hp.choice, 'sampling-scope': [["uniform", "distance"]]}, + 'p': {'hyperopt-dist': hp.choice, 'sampling-scope': [[1, 2]]} }, 'knn': { - 'n_neighbors': (hp.uniformint, [1, 50]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]]) + 'n_neighbors': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 50]}, + 'weights': {'hyperopt-dist': hp.choice, 'sampling-scope': [["uniform", "distance"]]}, + 'p': {'hyperopt-dist': hp.choice, 'sampling-scope': [[1, 2]]} }, 'arima': { 'p': (hp.uniformint, [1, 7]), @@ -177,16 +180,17 @@ def get_industrial_search_space(self): 'loss': (hp.choice, [['mae', 'mse']]) }, 'pca': { - 'n_components': (hp.uniform, [0.1, 0.99]), - 'svd_solver': (hp.choice, [['full']]) + 'n_components': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.1, 0.99]}, + 'svd_solver': {'hyperopt-dist': hp.choice, 'sampling-scope': [['full']]} }, 'kernel_pca': { - 'n_components': (hp.uniformint, [1, 20]), - 'kernel': (hp.choice, [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']]) + 'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 20]}, + 'kernel': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']]} }, 'fast_ica': { - 'n_components': (hp.uniformint, [1, 20]), - 'fun': (hp.choice, [['logcosh', 'exp', 'cube']]) + 'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 20]}, + 'fun': {'hyperopt-dist': hp.choice, 'sampling-scope': [['logcosh', 'exp', 'cube']]} }, 'ransac_lin_reg': { 'min_samples': (hp.uniform, [0.1, 0.9]), @@ -206,9 +210,9 @@ def get_industrial_search_space(self): 'bootstrap': (hp.choice, [[True, False]]) }, 'isolation_forest_class': { - 'max_samples': (hp.uniform, [0.05, 0.99]), - 'max_features': (hp.uniform, [0.05, 0.99]), - 'bootstrap': (hp.choice, [[True, False]]) + 'max_samples': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.05, 0.99]}, + 'max_features': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.05, 0.99]}, + 'bootstrap': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]} }, 'rfe_lin_reg': { 'n_features_to_select': (hp.uniform, [0.5, 0.9]), @@ -219,11 +223,11 @@ def get_industrial_search_space(self): 'step': (hp.uniform, [0.1, 0.2]) }, 'poly_features': { - 'degree': (hp.uniformint, [2, 5]), - 'interaction_only': (hp.choice, [[True, False]]) + 'degree': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 5]}, + 'interaction_only': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]} }, 'polyfit': { - 'degree': (hp.uniformint, [1, 6]) + 'degree': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 6]} }, 'lagged': { 'window_size': (hp.uniformint, [5, 500]) @@ -248,13 +252,13 @@ def get_industrial_search_space(self): 'cut_part': (hp.uniform, [0, 0.9]) }, 'lgbm': { - 'class_weight': (hp.choice, [[None, 'balanced']]), - 'num_leaves': (hp.uniformint, [2, 256]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'colsample_bytree': (hp.uniform, [0.4, 1]), - 'subsample': (hp.uniform, [0.4, 1]), - 'reg_alpha': (hp.loguniform, [np.log(1e-8), np.log(10)]), - 'reg_lambda': (hp.loguniform, [np.log(1e-8), np.log(10)]) + 'class_weight': {'hyperopt-dist': hp.choice, 'sampling-scope': [[None, 'balanced']]}, + 'num_leaves': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 256]}, + 'learning_rate': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(0.01), np.log(0.2)]}, + 'colsample_bytree': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.4, 1]}, + 'subsample': {'hyperopt-dist': hp.uniform, 'sampling-scope': [0.4, 1]}, + 'reg_alpha': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(1e-8), np.log(10)]}, + 'reg_lambda': {'hyperopt-dist': hp.loguniform, 'sampling-scope': [np.log(1e-8), np.log(10)]} }, 'lgbmreg': { 'num_leaves': (hp.uniformint, [2, 256]), @@ -265,12 +269,12 @@ def get_industrial_search_space(self): 'reg_lambda': (hp.loguniform, [np.log(1e-8), np.log(10)]) }, 'catboost': { - 'max_depth': (hp.uniformint, [1, 11]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'min_data_in_leaf': (hp.qloguniform, [0, 6, 1]), - 'border_count': (hp.uniformint, [2, 255]), - 'l2_leaf_reg': (hp.loguniform, [np.log(1e-8), np.log(10)]), - 'loss_function': (hp.choice, [['Logloss', 'CrossEntropy']]) + 'max_depth': {'hyperopt-dist':hp.uniformint, 'sampling-scope': [1, 11]}, + 'learning_rate': {'hyperopt-dist':hp.loguniform, 'sampling-scope': [np.log(0.01), np.log(0.2)]}, + 'min_data_in_leaf': {'hyperopt-dist':hp.qloguniform, 'sampling-scope': [0, 6, 1]}, + 'border_count': {'hyperopt-dist':hp.uniformint, 'sampling-scope': [2, 255]}, + 'l2_leaf_reg': {'hyperopt-dist':hp.loguniform, 'sampling-scope': [np.log(1e-8), np.log(10)]}, + 'loss_function': {'hyperopt-dist':hp.choice, 'sampling-scope': [['Logloss', 'CrossEntropy']]} }, 'catboostreg': { 'max_depth': (hp.uniformint, [1, 11]), @@ -280,9 +284,9 @@ def get_industrial_search_space(self): 'l2_leaf_reg': (hp.loguniform, [np.log(1e-8), np.log(10)]) }, 'resample': { - 'balance': (hp.choice, [['expand_minority', 'reduce_majority']]), - 'replace': (hp.choice, [[True, False]]), - 'balance_ratio': (hp.uniform, [0.3, 1]) + 'balance': {'hyperopt-dist':hp.choice,'sampling-scope': [['expand_minority', 'reduce_majority']]}, + 'replace': {'hyperopt-dist':hp.choice, 'sampling-scope': [[True, False]]}, + 'balance_ratio': {'hyperopt-dist':hp.uniform, 'sampling-scope': [0.3, 1]} }, 'lda': { 'solver': (hp.choice, [['svd', 'lsqr', 'eigen']]),