fixed formatting

perpetual-ml · Jul 17, 2024 · 6f5c697 · 6f5c697
1 parent bc099dd
commit 6f5c697
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 28 deletions.
diff --git a/python-package/examples/benchmark_lgbm.py b/python-package/examples/benchmark_lgbm.py
@@ -21,51 +21,92 @@ def prepare_data(cal_housing, seed):
         metric_function = log_loss
         metric_name = "log_loss"
         LGBMBooster = LGBMClassifier
-    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)
-    return X_train, X_test, y_train, y_test, scoring, metric_function, metric_name, LGBMBooster
+    X_train, X_test, y_train, y_test = train_test_split(
+        data, target, test_size=0.2248, random_state=seed
+    )
+    return (
+        X_train,
+        X_test,
+        y_train,
+        y_test,
+        scoring,
+        metric_function,
+        metric_name,
+        LGBMBooster,
+    )
+
 
 best_cv_results = None
 cv_results = None
 
+
 def save_best_cv_results(study, trial):
     global best_cv_results
     if study.best_trial.number == trial.number:
         best_cv_results = cv_results
 
-def objective_function(trial, seed, n_estimators, LGBMBooster, X_train, y_train, scoring):
+
+def objective_function(
+    trial, seed, n_estimators, LGBMBooster, X_train, y_train, scoring
+):
     global cv_results
     params = {
-        'seed': seed,
-        'verbosity': -1,
-        'n_estimators': n_estimators,
-        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
-        'min_split_gain': trial.suggest_float('min_split_gain', 1e-6, 1.0, log=True),
-        'reg_alpha': trial.suggest_float('reg_alpha', 1e-6, 1.0, log=True),
-        'reg_lambda': trial.suggest_float('reg_lambda', 1e-6, 1.0, log=True),
-        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
-        'subsample': trial.suggest_float('subsample', 0.2, 1.0),
-        'subsample_freq': trial.suggest_int('subsample_freq', 1, 10),
-        'max_depth': trial.suggest_int('max_depth', 3, 33),
-        'num_leaves': trial.suggest_int('num_leaves', 2, 1024),
-        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
+        "seed": seed,
+        "verbosity": -1,
+        "n_estimators": n_estimators,
+        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.5, log=True),
+        "min_split_gain": trial.suggest_float("min_split_gain", 1e-6, 1.0, log=True),
+        "reg_alpha": trial.suggest_float("reg_alpha", 1e-6, 1.0, log=True),
+        "reg_lambda": trial.suggest_float("reg_lambda", 1e-6, 1.0, log=True),
+        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
+        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
+        "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
+        "max_depth": trial.suggest_int("max_depth", 3, 33),
+        "num_leaves": trial.suggest_int("num_leaves", 2, 1024),
+        "min_child_samples": trial.suggest_int("min_child_samples", 1, 100),
     }
     model = LGBMBooster(**params)
-    cv_results = cross_validate(model, X_train, y_train, cv=5, scoring=scoring, return_train_score=True, return_estimator=True)
-    return -1 * np.mean(cv_results['test_score'])
+    cv_results = cross_validate(
+        model,
+        X_train,
+        y_train,
+        cv=5,
+        scoring=scoring,
+        return_train_score=True,
+        return_estimator=True,
+    )
+    return -1 * np.mean(cv_results["test_score"])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     optuna.logging.set_verbosity(optuna.logging.WARNING)
     cal_housing = True
     n_estimators = 300
     n_trials = 100
     cpu_times = []
     metrics = []
     for seed in [0, 1]:
-        X_train, X_test, y_train, y_test, scoring, metric_function, metric_name, LGBMBooster = prepare_data(cal_housing, seed)
+        (
+            X_train,
+            X_test,
+            y_train,
+            y_test,
+            scoring,
+            metric_function,
+            metric_name,
+            LGBMBooster,
+        ) = prepare_data(cal_housing, seed)
         sampler = optuna.samplers.TPESampler(seed=seed)
-        study = optuna.create_study(direction='minimize', sampler=sampler)
-        obj = partial(objective_function, seed=seed, n_estimators=n_estimators, LGBMBooster=LGBMBooster, X_train=X_train, y_train=y_train, scoring=scoring)
+        study = optuna.create_study(direction="minimize", sampler=sampler)
+        obj = partial(
+            objective_function,
+            seed=seed,
+            n_estimators=n_estimators,
+            LGBMBooster=LGBMBooster,
+            X_train=X_train,
+            y_train=y_train,
+            scoring=scoring,
+        )
         start = process_time()
         study.optimize(obj, n_trials=n_trials, callbacks=[save_best_cv_results])
         stop = process_time()
@@ -81,4 +122,6 @@ def objective_function(trial, seed, n_estimators, LGBMBooster, X_train, y_train,
 
         print(f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}")
 
-    print(f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}")
+    print(
+        f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}"
+    )
diff --git a/python-package/examples/benchmark_perpetual.py b/python-package/examples/benchmark_perpetual.py
@@ -17,17 +17,21 @@ def prepare_data(cal_housing, seed):
         metric_function = log_loss
         metric_name = "log_loss"
         objective = "LogLoss"
-    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)
+    X_train, X_test, y_train, y_test = train_test_split(
+        data, target, test_size=0.2248, random_state=seed
+    )
     return X_train, X_test, y_train, y_test, metric_function, metric_name, objective
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     budget = 1.5
     cal_housing = True
     cpu_times = []
     metrics = []
     for seed in [0, 1, 2, 3, 4]:
-        X_train, X_test, y_train, y_test, metric_function, metric_name, objective = prepare_data(cal_housing, seed)
+        X_train, X_test, y_train, y_test, metric_function, metric_name, objective = (
+            prepare_data(cal_housing, seed)
+        )
         model = PerpetualBooster(objective=objective)
         start = process_time()
         model.fit(X_train, y_train, budget=budget)
@@ -41,6 +45,10 @@ def prepare_data(cal_housing, seed):
         metric = metric_function(y_test, y_pred)
         metrics.append(metric)
 
-        print(f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}, n_trees: {model.number_of_trees}")
+        print(
+            f"seed: {seed}, cpu time: {stop - start}, {metric_name}: {metric}, n_trees: {model.number_of_trees}"
+        )
 
-    print(f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}")
+    print(
+        f"average cpu time: {np.mean(cpu_times)}, average {metric_name}: {np.mean(metrics)}"
+    )
diff --git a/scripts/run-python-tests.ps1 b/scripts/run-python-tests.ps1
@@ -1,6 +1,7 @@
 Set-Location python-package
 python -m black python/perpetual/
 python -m black tests/
+python -m black examples/
 maturin develop --release
 pytest .
 Set-Location ..