Skip to content

Commit

Permalink
[Feature] initially fitting to mean of label (#39)
Browse files Browse the repository at this point in the history
Set mean of label to be initial model fit.
  • Loading branch information
RektPunk authored Sep 30, 2024
1 parent 6201c47 commit aa2befa
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- id: check-merge-conflict

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.4
rev: v0.6.8
hooks:
- id: ruff
args: [ --fix ]
Expand Down
14 changes: 10 additions & 4 deletions mqboost/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def __init__(
self._data = prepare_x(x=_data, alphas=self._alphas)
self._columns = self._data.columns
if label is not None:
self._label = prepare_y(y=label, alphas=self._alphas)
self._label_mean = label.mean()
self._label = prepare_y(y=label - self._label_mean, alphas=self._alphas)
self._is_none_label = False

@property
Expand Down Expand Up @@ -109,16 +110,21 @@ def data(self) -> pd.DataFrame:
"""Get the raw input features."""
return self._data

@property
def alphas(self) -> list[float]:
"""Get the list of quantile levels."""
return self._alphas

@property
def label(self) -> pd.DataFrame:
"""Get the raw target labels."""
self.__label_available()
return self._label

@property
def alphas(self) -> list[float]:
"""Get the list of quantile levels."""
return self._alphas
def label_mean(self) -> float:
self.__label_available()
return self._label_mean

@property
def dtrain(self) -> DtrainLike:
Expand Down
8 changes: 6 additions & 2 deletions mqboost/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def get_params(trial: Trial) -> dict[str, Any]:
dict[str, Any]: The best hyperparameters found by the optimization process.
"""
self._dataset = dataset
self._label_mean = dataset.label_mean
self._MQObj = MQObjective(
alphas=dataset.alphas,
objective=self._objective,
Expand Down Expand Up @@ -189,7 +190,10 @@ def __optuna_objective(
valid_sets=dvalid,
)
_gbm = lgb.train(**model_params)
_preds = _gbm.predict(data=deval, num_iteration=_gbm.best_iteration)
_preds = (
_gbm.predict(data=deval, num_iteration=_gbm.best_iteration)
+ self._label_mean
)
_, loss, _ = self._MQObj.feval(y_pred=_preds, dtrain=dvalid)
elif self.__is_xgb:
model_params = dict(
Expand All @@ -198,7 +202,7 @@ def __optuna_objective(
evals=[(dvalid, "valid")],
)
_gbm = xgb.train(**model_params)
_preds = _gbm.predict(data=deval)
_preds = _gbm.predict(data=deval) + self._label_mean
_, loss = self._MQObj.feval(y_pred=_preds, dtrain=dvalid)
else:
raise FittingException("Model name is invalid")
Expand Down
4 changes: 3 additions & 1 deletion mqboost/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def fit(
else:
_eval_set = dataset.dtrain

self._label_mean = dataset.label_mean

params = set_monotone_constraints(
params=self._params,
columns=dataset.columns,
Expand Down Expand Up @@ -115,7 +117,7 @@ def predict(
np.ndarray: The predicted quantiles.
"""
self.__predict_available()
_pred = self.model.predict(data=dataset.dpredict)
_pred = self.model.predict(data=dataset.dpredict) + self._label_mean
_pred = _pred.reshape(len(dataset.alphas), dataset.nrow)
return _pred

Expand Down
6 changes: 4 additions & 2 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def test_mqdataset_initialization_with_lgb():
dataset.data,
_concat(data, 3).assign(_tau=[0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3]),
)
np.testing.assert_array_equal(dataset.label, np.concatenate([label] * len(alphas)))
np.testing.assert_array_equal(
dataset.label, np.array([-1, 0, 1, -1, 0, 1, -1, 0, 1])
)


def test_mqdataset_initialization_with_xgb():
Expand All @@ -42,7 +44,7 @@ def test_mqdataset_initialization_with_xgb():
pd.testing.assert_frame_equal(
dataset.data, _concat(data, 2).assign(_tau=[0.1, 0.1, 0.1, 0.2, 0.2, 0.2])
)
np.testing.assert_array_equal(dataset.label, np.concatenate([label] * len(alphas)))
np.testing.assert_array_equal(dataset.label, np.array([-1, 0, 1, -1, 0, 1]))


def test_mqdataset_initialization_with_invalid_alpha():
Expand Down

1 comment on commit aa2befa

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests Skipped Failures Errors Time
91 0 💤 0 ❌ 0 🔥 6.035s ⏱️

Please sign in to comment.