Skip to content

Commit

Permalink
fixed formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
deadsoul44 committed Jul 17, 2024
1 parent bc099dd commit 6f5c697
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 28 deletions.
89 changes: 66 additions & 23 deletions python-package/examples/benchmark_lgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,51 +21,92 @@ def prepare_data(cal_housing, seed):
metric_function = log_loss
metric_name = "log_loss"
LGBMBooster = LGBMClassifier
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)
return X_train, X_test, y_train, y_test, scoring, metric_function, metric_name, LGBMBooster
X_train, X_test, y_train, y_test = train_test_split(
data, target, test_size=0.2248, random_state=seed
)
return (
X_train,
X_test,
y_train,
y_test,
scoring,
metric_function,
metric_name,
LGBMBooster,
)


best_cv_results = None
cv_results = None


def save_best_cv_results(study, trial):
global best_cv_results
if study.best_trial.number == trial.number:
best_cv_results = cv_results

def objective_function(trial, seed, n_estimators, LGBMBooster, X_train, y_train, scoring):

def objective_function(
trial, seed, n_estimators, LGBMBooster, X_train, y_train, scoring
):
global cv_results
params = {
'seed': seed,
'verbosity': -1,
'n_estimators': n_estimators,
'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
'min_split_gain': trial.suggest_float('min_split_gain', 1e-6, 1.0, log=True),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-6, 1.0, log=True),
'reg_lambda': trial.suggest_float('reg_lambda', 1e-6, 1.0, log=True),
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
'subsample': trial.suggest_float('subsample', 0.2, 1.0),
'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
'max_depth': trial.suggest_int('max_depth', 3, 33),
'num_leaves': trial.suggest_int('num_leaves', 2, 1024),
'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
"seed": seed,
"verbosity": -1,
"n_estimators": n_estimators,
"learning_rate": trial.suggest_float("learning_rate", 0.001, 0.5, log=True),
"min_split_gain": trial.suggest_float("min_split_gain", 1e-6, 1.0, log=True),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-6, 1.0, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-6, 1.0, log=True),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
"subsample": trial.suggest_float("subsample", 0.2, 1.0),
"subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
"max_depth": trial.suggest_int("max_depth", 3, 33),
"num_leaves": trial.suggest_int("num_leaves", 2, 1024),
"min_child_samples": trial.suggest_int("min_child_samples", 1, 100),
}
model = LGBMBooster(**params)
cv_results = cross_validate(model, X_train, y_train, cv=5, scoring=scoring, return_train_score=True, return_estimator=True)
return -1 * np.mean(cv_results['test_score'])
cv_results = cross_validate(
model,
X_train,
y_train,
cv=5,
scoring=scoring,
return_train_score=True,
return_estimator=True,
)
return -1 * np.mean(cv_results["test_score"])


if __name__ == '__main__':
if __name__ == "__main__":
optuna.logging.set_verbosity(optuna.logging.WARNING)
cal_housing = True
n_estimators = 300
n_trials = 100
cpu_times = []
metrics = []
for seed in [0, 1]:
X_train, X_test, y_train, y_test, scoring, metric_function, metric_name, LGBMBooster = prepare_data(cal_housing, seed)
(
X_train,
X_test,
y_train,
y_test,
scoring,
metric_function,
metric_name,
LGBMBooster,
) = prepare_data(cal_housing, seed)
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction='minimize', sampler=sampler)
obj = partial(objective_function, seed=seed, n_estimators=n_estimators, LGBMBooster=LGBMBooster, X_train=X_train, y_train=y_train, scoring=scoring)
study = optuna.create_study(direction="minimize", sampler=sampler)
obj = partial(
objective_function,
seed=seed,
n_estimators=n_estimators,
LGBMBooster=LGBMBooster,
X_train=X_train,
y_train=y_train,
scoring=scoring,
)
start = process_time()
study.optimize(obj, n_trials=n_trials, callbacks=[save_best_cv_results])
stop = process_time()
Expand All @@ -81,4 +122,6 @@ def objective_function(trial, seed, n_estimators, LGBMBooster, X_train, y_train,

print(f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}")

print(f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}")
print(
f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}"
)
18 changes: 13 additions & 5 deletions python-package/examples/benchmark_perpetual.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,21 @@ def prepare_data(cal_housing, seed):
metric_function = log_loss
metric_name = "log_loss"
objective = "LogLoss"
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)
X_train, X_test, y_train, y_test = train_test_split(
data, target, test_size=0.2248, random_state=seed
)
return X_train, X_test, y_train, y_test, metric_function, metric_name, objective


if __name__ == '__main__':
if __name__ == "__main__":
budget = 1.5
cal_housing = True
cpu_times = []
metrics = []
for seed in [0, 1, 2, 3, 4]:
X_train, X_test, y_train, y_test, metric_function, metric_name, objective = prepare_data(cal_housing, seed)
X_train, X_test, y_train, y_test, metric_function, metric_name, objective = (
prepare_data(cal_housing, seed)
)
model = PerpetualBooster(objective=objective)
start = process_time()
model.fit(X_train, y_train, budget=budget)
Expand All @@ -41,6 +45,10 @@ def prepare_data(cal_housing, seed):
metric = metric_function(y_test, y_pred)
metrics.append(metric)

print(f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}, n_trees: {model.number_of_trees}")
print(
f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}, n_trees: {model.number_of_trees}"
)

print(f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}")
print(
f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}"
)
1 change: 1 addition & 0 deletions scripts/run-python-tests.ps1
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Set-Location python-package
python -m black python/perpetual/
python -m black tests/
python -m black examples/
maturin develop --release
pytest .
Set-Location ..

0 comments on commit 6f5c697

Please sign in to comment.