Skip to content

Commit

Permalink
Modified: values of hyperparams passed as dict in tree algorithms, al…
Browse files Browse the repository at this point in the history
…lows to use more params, TODO input hyperparam validation.
  • Loading branch information
Caparrini committed Feb 23, 2024
1 parent 6118538 commit b7f852c
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 92 deletions.
82 changes: 8 additions & 74 deletions mloptimizer/genoptimizer/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,8 @@ class TreeOptimizer(BaseOptimizer, ABC):
def get_clf(self, individual):
individual_dict = self.individual2dict(individual)

if "scale_pos_weight" in individual_dict.keys():
class_weight = {0: 1, 1: individual_dict["scale_pos_weight"]}
else:
class_weight = "balanced"

clf = DecisionTreeClassifier(criterion="gini",
class_weight=class_weight,
splitter="best",
max_features=None,
max_depth=individual_dict['max_depth'],
min_samples_split=individual_dict['min_samples_split'],
min_samples_leaf=individual_dict['min_samples_leaf'],
min_impurity_decrease=individual_dict['min_impurity_decrease'],
# min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'],
ccp_alpha=individual_dict['ccp_alpha'],
max_leaf_nodes=None,
random_state=None)
clf = DecisionTreeClassifier(random_state=self.mlopt_seed,
**individual_dict)
return clf

@staticmethod
Expand All @@ -56,21 +41,8 @@ class ForestOptimizer(TreeOptimizer, ABC):
def get_clf(self, individual):
individual_dict = self.individual2dict(individual)

clf = RandomForestClassifier(n_estimators=individual_dict['n_estimators'],
criterion="gini",
max_depth=individual_dict['max_depth'],
max_samples=individual_dict['max_samples'],
min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'],
min_impurity_decrease=individual_dict['min_impurity_decrease'],
max_features=individual_dict['max_features'],
max_leaf_nodes=None,
bootstrap=True,
oob_score=True,
n_jobs=-1,
random_state=None,
verbose=0,
warm_start=False,
class_weight="balanced"
clf = RandomForestClassifier(random_state=self.mlopt_seed,
**individual_dict
)
return clf

Expand All @@ -97,34 +69,8 @@ class ExtraTreesOptimizer(ForestOptimizer, ABC):
def get_clf(self, individual):
individual_dict = self.individual2dict(individual)

class_weight = "balanced"

if "scale_pos_weight" in individual_dict.keys():
perc_class_one = individual_dict["scale_pos_weight"]
total = 10
class_one = total * perc_class_one
class_zero = total - class_one
real_weight_zero = total / (2 * class_zero)
real_weight_one = total / (2 * class_one)
class_weight = {0: real_weight_zero, 1: real_weight_one}

clf = ExtraTreesClassifier(n_estimators=individual_dict['n_estimators'],
criterion="gini",
max_depth=individual_dict['max_depth'],
# min_samples_split=individual_dict['min_samples_split'],
# min_samples_leaf=individual_dict['min_samples_leaf'],
min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'],
min_impurity_decrease=individual_dict['min_impurity_decrease'],
max_features=individual_dict['max_features'],
max_samples=individual_dict['max_samples'],
max_leaf_nodes=None,
bootstrap=True,
oob_score=False,
n_jobs=-1,
random_state=None,
verbose=0,
warm_start=False,
class_weight=class_weight
clf = ExtraTreesClassifier(random_state=self.mlopt_seed,
**individual_dict
)
return clf

Expand Down Expand Up @@ -154,18 +100,6 @@ def get_hyperparams(self):

def get_clf(self, individual):
individual_dict = self.individual2dict(individual)
clf = GradientBoostingClassifier(n_estimators=individual_dict['n_estimators'],
criterion="friedman_mse",
max_depth=individual_dict['max_depth'],
# min_samples_split=individual_dict['min_samples_split'],
# min_samples_leaf=individual_dict['min_samples_leaf'],
min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'],
min_impurity_decrease=individual_dict['min_impurity_decrease'],
max_features=individual_dict['max_features'],
max_leaf_nodes=None,
random_state=None,
verbose=0,
warm_start=False,
learning_rate=individual_dict['learning_rate'],
subsample=individual_dict['subsample'])
clf = GradientBoostingClassifier(random_state=self.mlopt_seed,
**individual_dict)
return clf
21 changes: 3 additions & 18 deletions mloptimizer/genoptimizer/xgb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,9 @@ def get_default_hyperparams():

def get_clf(self, individual):
individual_dict = self.individual2dict(individual)
clf = xgb.XGBClassifier(base_score=0.5,
booster='gbtree',
colsample_bytree=individual_dict['colsample_bytree'],
colsample_bylevel=1,
eval_metric='logloss',
gamma=individual_dict['gamma'],
learning_rate=individual_dict['learning_rate'],
max_depth=individual_dict['max_depth'],
n_estimators=individual_dict['n_estimators'],
n_jobs=-1,
objective='binary:logistic',
random_state=0,
# reg_alpha=0,
# reg_lambda=1,
scale_pos_weight=individual_dict['scale_pos_weight'],
seed=self.mlopt_seed,
subsample=individual_dict['subsample'],
# tree_method="gpu_hist"
clf = xgb.XGBClassifier(seed=self.mlopt_seed,
random_state=self.mlopt_seed,
**individual_dict
)
return clf

Expand Down

0 comments on commit b7f852c

Please sign in to comment.