From b7f852cd57fbed92a23bbac90d67e009e5fa83a7 Mon Sep 17 00:00:00 2001 From: Caparrini Date: Fri, 23 Feb 2024 18:18:06 +0100 Subject: [PATCH] Modified: values of hyperparams passed as dict in tree algorithms, allows to use more params, TODO input hyperparam validation. --- mloptimizer/genoptimizer/trees.py | 82 +++---------------------------- mloptimizer/genoptimizer/xgb.py | 21 ++------ 2 files changed, 11 insertions(+), 92 deletions(-) diff --git a/mloptimizer/genoptimizer/trees.py b/mloptimizer/genoptimizer/trees.py index 7168b3e..d258f4b 100644 --- a/mloptimizer/genoptimizer/trees.py +++ b/mloptimizer/genoptimizer/trees.py @@ -15,23 +15,8 @@ class TreeOptimizer(BaseOptimizer, ABC): def get_clf(self, individual): individual_dict = self.individual2dict(individual) - if "scale_pos_weight" in individual_dict.keys(): - class_weight = {0: 1, 1: individual_dict["scale_pos_weight"]} - else: - class_weight = "balanced" - - clf = DecisionTreeClassifier(criterion="gini", - class_weight=class_weight, - splitter="best", - max_features=None, - max_depth=individual_dict['max_depth'], - min_samples_split=individual_dict['min_samples_split'], - min_samples_leaf=individual_dict['min_samples_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - # min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - ccp_alpha=individual_dict['ccp_alpha'], - max_leaf_nodes=None, - random_state=None) + clf = DecisionTreeClassifier(random_state=self.mlopt_seed, + **individual_dict) return clf @staticmethod @@ -56,21 +41,8 @@ class ForestOptimizer(TreeOptimizer, ABC): def get_clf(self, individual): individual_dict = self.individual2dict(individual) - clf = RandomForestClassifier(n_estimators=individual_dict['n_estimators'], - criterion="gini", - max_depth=individual_dict['max_depth'], - max_samples=individual_dict['max_samples'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_leaf_nodes=None, - bootstrap=True, - oob_score=True, - n_jobs=-1, - random_state=None, - verbose=0, - warm_start=False, - class_weight="balanced" + clf = RandomForestClassifier(random_state=self.mlopt_seed, + **individual_dict ) return clf @@ -97,34 +69,8 @@ class ExtraTreesOptimizer(ForestOptimizer, ABC): def get_clf(self, individual): individual_dict = self.individual2dict(individual) - class_weight = "balanced" - - if "scale_pos_weight" in individual_dict.keys(): - perc_class_one = individual_dict["scale_pos_weight"] - total = 10 - class_one = total * perc_class_one - class_zero = total - class_one - real_weight_zero = total / (2 * class_zero) - real_weight_one = total / (2 * class_one) - class_weight = {0: real_weight_zero, 1: real_weight_one} - - clf = ExtraTreesClassifier(n_estimators=individual_dict['n_estimators'], - criterion="gini", - max_depth=individual_dict['max_depth'], - # min_samples_split=individual_dict['min_samples_split'], - # min_samples_leaf=individual_dict['min_samples_leaf'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_samples=individual_dict['max_samples'], - max_leaf_nodes=None, - bootstrap=True, - oob_score=False, - n_jobs=-1, - random_state=None, - verbose=0, - warm_start=False, - class_weight=class_weight + clf = ExtraTreesClassifier(random_state=self.mlopt_seed, + **individual_dict ) return clf @@ -154,18 +100,6 @@ def get_hyperparams(self): def get_clf(self, individual): individual_dict = self.individual2dict(individual) - clf = GradientBoostingClassifier(n_estimators=individual_dict['n_estimators'], - criterion="friedman_mse", - max_depth=individual_dict['max_depth'], - # min_samples_split=individual_dict['min_samples_split'], - # min_samples_leaf=individual_dict['min_samples_leaf'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_leaf_nodes=None, - random_state=None, - verbose=0, - warm_start=False, - learning_rate=individual_dict['learning_rate'], - subsample=individual_dict['subsample']) + clf = GradientBoostingClassifier(random_state=self.mlopt_seed, + **individual_dict) return clf diff --git a/mloptimizer/genoptimizer/xgb.py b/mloptimizer/genoptimizer/xgb.py index 4338e6f..e4a098c 100644 --- a/mloptimizer/genoptimizer/xgb.py +++ b/mloptimizer/genoptimizer/xgb.py @@ -26,24 +26,9 @@ def get_default_hyperparams(): def get_clf(self, individual): individual_dict = self.individual2dict(individual) - clf = xgb.XGBClassifier(base_score=0.5, - booster='gbtree', - colsample_bytree=individual_dict['colsample_bytree'], - colsample_bylevel=1, - eval_metric='logloss', - gamma=individual_dict['gamma'], - learning_rate=individual_dict['learning_rate'], - max_depth=individual_dict['max_depth'], - n_estimators=individual_dict['n_estimators'], - n_jobs=-1, - objective='binary:logistic', - random_state=0, - # reg_alpha=0, - # reg_lambda=1, - scale_pos_weight=individual_dict['scale_pos_weight'], - seed=self.mlopt_seed, - subsample=individual_dict['subsample'], - # tree_method="gpu_hist" + clf = xgb.XGBClassifier(seed=self.mlopt_seed, + random_state=self.mlopt_seed, + **individual_dict ) return clf