[Feature] introduce binary focal objective, eval (#4)

RektPunk · Sep 15, 2024 · 91bcdf7 · 91bcdf7
1 parent 6745204
commit 91bcdf7
Show file tree

Hide file tree

Showing 4 changed files with 828 additions and 9 deletions.
diff --git a/experiments/basic.py b/experiments/basic.py
@@ -0,0 +1,68 @@
+import lightgbm as lgb
+from sklearn.datasets import load_breast_cancer
+from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
+from sklearn.model_selection import train_test_split
+
+import imlightgbm as imlgb
+
+# Load breast cancer dataset
+data = load_breast_cancer()
+X, y = data.data, data.target
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+
+# Create LightGBM datasets
+train_data = lgb.Dataset(X_train, label=y_train)
+test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
+
+# Parameters for standard LightGBM model
+params_standard = {
+    "objective": "binary",
+    "metric": "binary_logloss",
+    "learning_rate": 0.05,
+    "num_leaves": 31,
+    "feature_fraction": 0.9,
+    "bagging_fraction": 0.8,
+    "bagging_freq": 5,
+    "seed": 42,
+    "early_stopping_rounds": 10,
+}
+
+# # Train standard LightGBM model
+bst_standard = lgb.train(
+    params_standard, train_data, num_boost_round=100, valid_sets=[test_data]
+)
+
+
+bst_focal = imlgb.train(
+    params_standard, train_data, num_boost_round=100, valid_sets=[test_data]
+)
+
+# Predict using the standard LightGBM model
+y_pred_standard = bst_standard.predict(X_test)
+y_pred_standard_binary = (y_pred_standard > 0.5).astype(int)
+
+# Predict using the focal loss model
+y_pred_focal = bst_focal.predict(X_test)
+y_pred_focal_binary = (y_pred_focal > 0.5).astype(int)
+
+# Evaluate models
+accuracy_standard = accuracy_score(y_test, y_pred_standard_binary)
+logloss_standard = log_loss(y_test, y_pred_standard)
+rocauc_standard = roc_auc_score(y_test, y_pred_standard)
+
+
+accuracy_focal = accuracy_score(y_test, y_pred_focal_binary)
+logloss_focal = log_loss(y_test, y_pred_focal)
+rocauc_focal = roc_auc_score(y_test, y_pred_focal)
+print(
+    f"Standard LightGBM - Accuracy: {accuracy_standard:.4f}, Log Loss: {logloss_standard:.4f}, rocauc: {rocauc_standard:.4f}"
+)
+print(
+    f"LightGBM with Focal Loss - Accuracy: {accuracy_focal:.4f}, Log Loss: {logloss_focal:.4f}, rocauc: {rocauc_focal:.4f}"
+)
+# Standard LightGBM - Accuracy: 0.9737, Log Loss: 0.1029, rocauc: 0.9931
+# LightGBM with Focal Loss - Accuracy: 0.8158, Log Loss: 0.6955, rocauc: 0.9843
diff --git a/imlightgbm/objective.py b/imlightgbm/objective.py
@@ -10,23 +10,59 @@
 
 EvalLike = Callable[[np.ndarray, Dataset], tuple[str, float, bool]]
 ObjLike = Callable[[np.ndarray, Dataset], tuple[np.ndarray, np.ndarray]]
-ALPHA_DEFAULT: float = 0.05
-GAMMA_DEFAULT: float = 0.05
+ALPHA_DEFAULT: float = 0.25
+GAMMA_DEFAULT: float = 2.0
 OBJECTIVE_STR: str = "objective"
+IS_HIGHER_BETTER = False
+
+
+def _power(num_base: np.ndarray, num_pow: float):
+    return np.sign(num_base) * (np.abs(num_base)) ** (num_pow)
+
+
+def _log(array: np.ndarray, is_prob: bool = False) -> np.ndarray:
+    _upper = 1 if is_prob else None
+    return np.log(np.clip(array, 1e-6, _upper))
+
+
+def _sigmoid(x: np.ndarray) -> np.ndarray:
+    """Convert raw predictions to probabilities in binary task"""
+    return 1 / (1 + np.exp(-x))
 
 
 def binary_focal_eval(
     pred: np.ndarray, train_data: Dataset, alpha: float, gamma: float
 ) -> tuple[str, float, bool]:
-    is_higher_better = False
-    return "binary_focal", ..., is_higher_better
+    label = train_data.get_label()
+    pred_prob = _sigmoid(pred)
+    p_t = np.where(label == 1, pred_prob, 1 - pred_prob)
+    loss = -alpha * ((1 - p_t) ** gamma) * _log(p_t, True)
+
+    focal_loss = np.mean(loss)
+    return "binary_focal", focal_loss, IS_HIGHER_BETTER
 
 
 def binary_focal_objective(
-    pred: np.ndarray, train_data: Dataset, alpha: float, gamma: float
+    pred: np.ndarray, train_data: Dataset, gamma: float
 ) -> tuple[np.ndarray, np.ndarray]:
-    # TODO
-    return ...
+    label = train_data.get_label()
+    pred_prob = _sigmoid(pred)
+
+    # gradient
+    g1 = pred_prob * (1 - pred_prob)
+    g2 = label + ((-1) ** label) * pred_prob
+    g3 = pred_prob + label - 1
+    g4 = 1 - label - ((-1) ** label) * pred_prob
+    g5 = label + ((-1) ** label) * pred_prob
+    grad = gamma * g3 * _power(g2, gamma) * _log(g4) + ((-1) ** label) * _power(
+        g5, (gamma + 1)
+    )
+
+    # hess
+    h1 = _power(g2, gamma) + gamma * ((-1) ** label) * g3 * _power(g2, (gamma - 1))
+    h2 = ((-1) ** label) * g3 * _power(g2, gamma) / g4
+    hess = ((h1 * _log(g4) - h2) * gamma + (gamma + 1) * _power(g5, gamma)) * g1
+    return grad, hess
 
 
 def multiclass_focal_eval(
@@ -52,7 +88,7 @@ def set_fobj_feval(
             f"Invalid target type: {inferred_task}. Supported types are 'binary' or 'multiclass'."
         )
     objective_mapper: dict[str, ObjLike] = {
-        "binary": partial(binary_focal_objective, alpha=alpha, gamma=gamma),
+        "binary": partial(binary_focal_objective, gamma=gamma),
         "multiclass": partial(multiclass_focal_objective, alpha=alpha, gamma=gamma),
     }
     eval_mapper: dict[str, EvalLike] = {