updated notebook

perpetual-ml · Jun 15, 2024 · cccaf54 · cccaf54
1 parent a6a3954
commit cccaf54
Showing 1 changed file with 16 additions and 82 deletions.
diff --git a/python-package/examples/performance_benchmark.ipynb b/python-package/examples/performance_benchmark.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -17,38 +17,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Python 3.10.13\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!python --version"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "numpy: 1.26.4\n",
-      "optuna: 3.5.0\n",
-      "lightgbm: 4.1.0\n",
-      "scikit-learn: 1.3.0\n",
-      "perpetual: 0.1.57\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from importlib.metadata import version\n",
     "\n",
@@ -61,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -70,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,18 +83,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "len(X_train): 450400\n",
-      "len(X_test): 130612\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2248, random_state=seed)\n",
     "\n",
@@ -124,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -139,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -167,56 +138,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[I 2024-03-16 08:25:08,915] A new study created in memory with name: no-name-8d93836c-7983-4085-903b-786201844dba\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "sampler = optuna.samplers.TPESampler(seed=seed)\n",
     "study = optuna.create_study(direction='minimize', sampler=sampler)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[I 2024-03-16 08:43:17,963] Trial 0 finished with value: 0.22175740241526296 and parameters: {'learning_rate': 0.03028525153605885, 'min_split_gain': 0.019549524484259877, 'reg_alpha': 0.00413599739383989, 'reg_lambda': 0.0018590843630169633, 'colsample_bytree': 0.5389238394711238, 'subsample': 0.7167152904533249, 'subsample_freq': 5, 'max_depth': 30, 'num_leaves': 987, 'min_child_samples': 39}. Best is trial 0 with value: 0.22175740241526296.\n",
-      "[I 2024-03-16 08:57:24,209] Trial 1 finished with value: 0.2732769181539135 and parameters: {'learning_rate': 0.13703835270362635, 'min_split_gain': 0.0014906288366101645, 'reg_alpha': 0.002560161525002871, 'reg_lambda': 0.35775015430826956, 'colsample_bytree': 0.25682884655830956, 'subsample': 0.26970343976123257, 'subsample_freq': 1, 'max_depth': 28, 'num_leaves': 798, 'min_child_samples': 88}. Best is trial 0 with value: 0.22175740241526296.\n",
-      "[I 2024-03-16 09:06:31,544] Trial 2 finished with value: 0.20403118094430436 and parameters: {'learning_rate': 0.43778567971542487, 'min_split_gain': 0.06236650188352654, 'reg_alpha': 0.0005873218708481509, 'reg_lambda': 0.048214210285277594, 'colsample_bytree': 0.2946195406951466, 'subsample': 0.7119368170620191, 'subsample_freq': 2, 'max_depth': 32, 'num_leaves': 535, 'min_child_samples': 42}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:21:13,258] Trial 3 finished with value: 0.902680648226068 and parameters: {'learning_rate': 0.00517639800969051, 'min_split_gain': 0.04419795105236875, 'reg_alpha': 0.0005456347144827907, 'reg_lambda': 0.002573971258732489, 'colsample_bytree': 0.21503184034908412, 'subsample': 0.6941083976607016, 'subsample_freq': 7, 'max_depth': 22, 'num_leaves': 967, 'min_child_samples': 69}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:26:04,018] Trial 4 finished with value: 0.6725231387560552 and parameters: {'learning_rate': 0.009339014290782485, 'min_split_gain': 0.00041897848572377823, 'reg_alpha': 0.01533865133545861, 'reg_lambda': 2.298014844498276e-06, 'colsample_bytree': 0.7334133723565341, 'subsample': 0.7365102956945275, 'subsample_freq': 3, 'max_depth': 6, 'num_leaves': 324, 'min_child_samples': 37}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:32:54,091] Trial 5 finished with value: 0.4381322311311667 and parameters: {'learning_rate': 0.034589475764299146, 'min_split_gain': 0.00042816294681191587, 'reg_alpha': 0.8516144627787162, 'reg_lambda': 4.095141046390408e-06, 'colsample_bytree': 0.3671014048758678, 'subsample': 0.329047614307997, 'subsample_freq': 7, 'max_depth': 10, 'num_leaves': 479, 'min_child_samples': 25}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:37:17,436] Trial 6 finished with value: 0.9912250386311733 and parameters: {'learning_rate': 0.002685679213591633, 'min_split_gain': 4.594633206288104e-06, 'reg_alpha': 0.00866917034659781, 'reg_lambda': 6.746798008923238e-06, 'colsample_bytree': 0.3572658893440428, 'subsample': 0.4949801365287713, 'subsample_freq': 9, 'max_depth': 6, 'num_leaves': 859, 'min_child_samples': 10}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:41:13,676] Trial 7 finished with value: 4.301360752854002 and parameters: {'learning_rate': 0.4319513210123975, 'min_split_gain': 0.0006484958629787591, 'reg_alpha': 0.7253815900686046, 'reg_lambda': 0.004256700735530979, 'colsample_bytree': 0.7914108635186414, 'subsample': 0.23135023380345654, 'subsample_freq': 3, 'max_depth': 6, 'num_leaves': 304, 'min_child_samples': 12}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:44:29,287] Trial 8 finished with value: 0.7725708699897976 and parameters: {'learning_rate': 0.007214845958494043, 'min_split_gain': 0.00030589893648986084, 'reg_alpha': 2.425967498008261e-06, 'reg_lambda': 0.014283436757818842, 'colsample_bytree': 0.6532811633652602, 'subsample': 0.4123115927515564, 'subsample_freq': 6, 'max_depth': 5, 'num_leaves': 591, 'min_child_samples': 93}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:46:35,507] Trial 9 finished with value: 0.8651757974932586 and parameters: {'learning_rate': 0.007241158387044701, 'min_split_gain': 0.010103277456997328, 'reg_alpha': 6.177136160192784e-06, 'reg_lambda': 0.019859267454728526, 'colsample_bytree': 0.4315248743577609, 'subsample': 0.3465530896056935, 'subsample_freq': 6, 'max_depth': 3, 'num_leaves': 850, 'min_child_samples': 1}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 09:49:49,290] Trial 10 finished with value: 5.203927828480977 and parameters: {'learning_rate': 0.4058390418065474, 'min_split_gain': 0.7963468644533023, 'reg_alpha': 7.083552500496203e-05, 'reg_lambda': 8.147468889675739e-05, 'colsample_bytree': 0.9594241151524179, 'subsample': 0.9824992872909852, 'subsample_freq': 1, 'max_depth': 17, 'num_leaves': 15, 'min_child_samples': 60}. Best is trial 2 with value: 0.20403118094430436.\n",
-      "[I 2024-03-16 10:04:19,509] Trial 11 finished with value: 0.19968516511674084 and parameters: {'learning_rate': 0.048186763588324306, 'min_split_gain': 0.26345360071711127, 'reg_alpha': 0.00018233680558677173, 'reg_lambda': 0.7411958132121903, 'colsample_bytree': 0.53148805532753, 'subsample': 0.8795334379697572, 'subsample_freq': 4, 'max_depth': 33, 'num_leaves': 642, 'min_child_samples': 42}. Best is trial 11 with value: 0.19968516511674084.\n",
-      "[I 2024-03-16 10:12:57,177] Trial 12 finished with value: 0.17781146059113012 and parameters: {'learning_rate': 0.09942534771027574, 'min_split_gain': 0.6319281151521206, 'reg_alpha': 0.00011035812323342188, 'reg_lambda': 0.8159491598398828, 'colsample_bytree': 0.5224937700533134, 'subsample': 0.9356286123721699, 'subsample_freq': 3, 'max_depth': 33, 'num_leaves': 623, 'min_child_samples': 54}. Best is trial 12 with value: 0.17781146059113012.\n",
-      "[I 2024-03-16 10:21:59,071] Trial 13 finished with value: 0.19812344013955413 and parameters: {'learning_rate': 0.0827221158535836, 'min_split_gain': 0.9194197463736601, 'reg_alpha': 3.9126073200279555e-05, 'reg_lambda': 0.9139156345722601, 'colsample_bytree': 0.5206259982929461, 'subsample': 0.982194814042813, 'subsample_freq': 4, 'max_depth': 26, 'num_leaves': 672, 'min_child_samples': 68}. Best is trial 12 with value: 0.17781146059113012.\n",
-      "[I 2024-03-16 10:33:12,009] Trial 14 finished with value: 0.154021295336553 and parameters: {'learning_rate': 0.10697311044459532, 'min_split_gain': 0.28590072462857175, 'reg_alpha': 2.147440649733628e-05, 'reg_lambda': 0.1459259484783647, 'colsample_bytree': 0.4984191731421451, 'subsample': 0.9896725069698811, 'subsample_freq': 4, 'max_depth': 25, 'num_leaves': 720, 'min_child_samples': 74}. Best is trial 14 with value: 0.154021295336553.\n",
-      "[I 2024-03-16 10:44:32,380] Trial 15 finished with value: 0.1322957705205645 and parameters: {'learning_rate': 0.1464179609395655, 'min_split_gain': 1.2546142912631768e-05, 'reg_alpha': 1.5581435919054263e-05, 'reg_lambda': 0.10011897856040776, 'colsample_bytree': 0.6565146641861263, 'subsample': 0.853975187889126, 'subsample_freq': 3, 'max_depth': 23, 'num_leaves': 358, 'min_child_samples': 80}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 10:55:20,023] Trial 16 finished with value: 0.15113304866245672 and parameters: {'learning_rate': 0.1969172544821798, 'min_split_gain': 1.3625816038901367e-06, 'reg_alpha': 1.4563678685891703e-05, 'reg_lambda': 0.0823147094657594, 'colsample_bytree': 0.851594982577979, 'subsample': 0.7995752864846458, 'subsample_freq': 5, 'max_depth': 18, 'num_leaves': 301, 'min_child_samples': 85}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 11:06:07,124] Trial 17 finished with value: 0.9181742806849817 and parameters: {'learning_rate': 0.20039036411477432, 'min_split_gain': 1.0259185090983993e-06, 'reg_alpha': 1.3898944062292137e-06, 'reg_lambda': 0.00033028158891536025, 'colsample_bytree': 0.8958165941765961, 'subsample': 0.8257761087195659, 'subsample_freq': 10, 'max_depth': 17, 'num_leaves': 184, 'min_child_samples': 81}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 11:21:10,556] Trial 18 finished with value: 0.39887736503756505 and parameters: {'learning_rate': 0.016186981788602176, 'min_split_gain': 3.603165387246647e-05, 'reg_alpha': 1.0055727242380978e-05, 'reg_lambda': 0.0839610794405858, 'colsample_bytree': 0.8266684543242371, 'subsample': 0.5850792596639499, 'subsample_freq': 8, 'max_depth': 13, 'num_leaves': 378, 'min_child_samples': 100}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 11:33:58,440] Trial 19 finished with value: 2.3559130255306355 and parameters: {'learning_rate': 0.22927235503820384, 'min_split_gain': 2.9664234758636775e-05, 'reg_alpha': 5.772990765841611e-06, 'reg_lambda': 0.0003155931086002135, 'colsample_bytree': 0.6598383104029715, 'subsample': 0.7972782985165164, 'subsample_freq': 5, 'max_depth': 22, 'num_leaves': 180, 'min_child_samples': 80}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 15:15:34,273] Trial 20 finished with value: 0.9847133576175395 and parameters: {'learning_rate': 0.0012216074641774246, 'min_split_gain': 1.2612051796453974e-05, 'reg_alpha': 0.1469532985096933, 'reg_lambda': 0.010844683334181114, 'colsample_bytree': 0.7206720142428236, 'subsample': 0.6106889449505086, 'subsample_freq': 2, 'max_depth': 20, 'num_leaves': 443, 'min_child_samples': 99}. Best is trial 15 with value: 0.1322957705205645.\n",
-      "[I 2024-03-16 15:24:35,183] Trial 21 finished with value: 0.22597728828112848 and parameters: {'learning_rate': 0.056809509499213276, 'min_split_gain': 1.3976296147413307e-06, 'reg_alpha': 2.1903513958748333e-05, 'reg_lambda': 0.14404308061387716, 'colsample_bytree': 0.8732170809791944, 'subsample': 0.8795766416061285, 'subsample_freq': 4, 'max_depth': 25, 'num_leaves': 188, 'min_child_samples': 74}. Best is trial 15 with value: 0.1322957705205645.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "study.optimize(objective_function, n_trials=n_trials, callbacks=[save_best_cv_results])"