Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package] stop relying on string concatenation / splitting for cv() eval results #6761

Merged
merged 14 commits into from
Dec 22, 2024
72 changes: 38 additions & 34 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ class CallbackEnv:
evaluation_result_list: Optional[_ListOfEvalResultTuples]


def _using_cv(env: CallbackEnv) -> bool:
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved
"""Check if model in callback env is a CVBooster."""
# this string-matching is used instead of isinstance() to avoid a circular import
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved
return env.model.__class__.__name__ == "CVBooster" or any(
c.__name__ == "CVBooster" for c in env.model.__class__.__bases__
)


def _format_eval_result(value: _EvalResultTuple, show_stdv: bool) -> str:
"""Format metric string."""
dataset_name, metric_name, metric_value, *_ = value
Expand Down Expand Up @@ -143,16 +151,13 @@ def _init(self, env: CallbackEnv) -> None:
)
self.eval_result.clear()
for item in env.evaluation_result_list:
if len(item) == 4: # regular train
data_name, eval_name = item[:2]
else: # cv
data_name, eval_name = item[1].split()
self.eval_result.setdefault(data_name, OrderedDict())
dataset_name, metric_name, *_ = item
self.eval_result.setdefault(dataset_name, OrderedDict())
if len(item) == 4:
self.eval_result[data_name].setdefault(eval_name, [])
self.eval_result[dataset_name].setdefault(metric_name, [])
else:
self.eval_result[data_name].setdefault(f"{eval_name}-mean", [])
self.eval_result[data_name].setdefault(f"{eval_name}-stdv", [])
self.eval_result[dataset_name].setdefault(f"{metric_name}-mean", [])
self.eval_result[dataset_name].setdefault(f"{metric_name}-stdv", [])

def __call__(self, env: CallbackEnv) -> None:
if env.iteration == env.begin_iteration:
Expand All @@ -163,15 +168,16 @@ def __call__(self, env: CallbackEnv) -> None:
"Please report it at https://github.com/microsoft/LightGBM/issues"
)
for item in env.evaluation_result_list:
# for cv(), 'metric_value' is actually a mean of metric values over all CV folds
dataset_name, metric_name, metric_value, *_ = item
if len(item) == 4:
data_name, eval_name, result = item[:3]
self.eval_result[data_name][eval_name].append(result)
# train()
self.eval_result[dataset_name][metric_name].append(metric_value)
else:
data_name, eval_name = item[1].split()
res_mean = item[2]
res_stdv = item[4] # type: ignore[misc]
self.eval_result[data_name][f"{eval_name}-mean"].append(res_mean)
self.eval_result[data_name][f"{eval_name}-stdv"].append(res_stdv)
# cv()
metric_std_dev = item[4] # type: ignore[misc]
self.eval_result[dataset_name][f"{metric_name}-mean"].append(metric_value)
self.eval_result[dataset_name][f"{metric_name}-stdv"].append(metric_std_dev)


def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable:
Expand Down Expand Up @@ -304,15 +310,15 @@ def _gt_delta(self, curr_score: float, best_score: float, delta: float) -> bool:
def _lt_delta(self, curr_score: float, best_score: float, delta: float) -> bool:
return curr_score < best_score - delta

def _is_train_set(self, ds_name: str, eval_name: str, env: CallbackEnv) -> bool:
def _is_train_set(self, dataset_name: str, env: CallbackEnv) -> bool:
"""Check, by name, if a given Dataset is the training data."""
# for lgb.cv() with eval_train_metric=True, evaluation is also done on the training set
# and those metrics are considered for early stopping
if ds_name == "cv_agg" and eval_name == "train":
if _using_cv(env) and dataset_name == "train":
return True

# for lgb.train(), it's possible to pass the training data via valid_sets with any eval_name
if isinstance(env.model, Booster) and ds_name == env.model._train_data_name:
if isinstance(env.model, Booster) and dataset_name == env.model._train_data_name:
return True

return False
Expand All @@ -327,11 +333,13 @@ def _init(self, env: CallbackEnv) -> None:
_log_warning("Early stopping is not available in dart mode")
return

# get details of the first dataset
first_dataset_name, first_metric_name, *_ = env.evaluation_result_list[0]

# validation sets are guaranteed to not be identical to the training data in cv()
if isinstance(env.model, Booster):
only_train_set = len(env.evaluation_result_list) == 1 and self._is_train_set(
ds_name=env.evaluation_result_list[0][0],
eval_name=env.evaluation_result_list[0][1].split(" ")[0],
dataset_name=first_dataset_name,
env=env,
)
if only_train_set:
Expand Down Expand Up @@ -370,8 +378,7 @@ def _init(self, env: CallbackEnv) -> None:
_log_info(f"Using {self.min_delta} as min_delta for all metrics.")
deltas = [self.min_delta] * n_datasets * n_metrics

# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
self.first_metric = env.evaluation_result_list[0][1].split(" ")[-1]
self.first_metric = first_metric_name
for eval_ret, delta in zip(env.evaluation_result_list, deltas):
self.best_iter.append(0)
if eval_ret[3]: # greater is better
Expand All @@ -381,15 +388,15 @@ def _init(self, env: CallbackEnv) -> None:
self.best_score.append(float("inf"))
self.cmp_op.append(partial(self._lt_delta, delta=delta))

def _final_iteration_check(self, env: CallbackEnv, eval_name_splitted: List[str], i: int) -> None:
def _final_iteration_check(self, *, env: CallbackEnv, metric_name: str, i: int) -> None:
if env.iteration == env.end_iteration - 1:
if self.verbose:
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
_log_info(
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
)
if self.first_metric_only:
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
_log_info(f"Evaluated only: {metric_name}")
raise EarlyStopException(self.best_iter[i], self.best_score_list[i])

def __call__(self, env: CallbackEnv) -> None:
Expand All @@ -405,21 +412,18 @@ def __call__(self, env: CallbackEnv) -> None:
# self.best_score_list is initialized to an empty list
first_time_updating_best_score_list = self.best_score_list == []
for i in range(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2]
if first_time_updating_best_score_list or self.cmp_op[i](score, self.best_score[i]):
self.best_score[i] = score
dataset_name, metric_name, metric_value, *_ = env.evaluation_result_list[i]
if first_time_updating_best_score_list or self.cmp_op[i](metric_value, self.best_score[i]):
self.best_score[i] = metric_value
self.best_iter[i] = env.iteration
if first_time_updating_best_score_list:
self.best_score_list.append(env.evaluation_result_list)
else:
self.best_score_list[i] = env.evaluation_result_list
# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
eval_name_splitted = env.evaluation_result_list[i][1].split(" ")
if self.first_metric_only and self.first_metric != eval_name_splitted[-1]:
if self.first_metric_only and self.first_metric != metric_name:
continue # use only the first metric for early stopping
if self._is_train_set(
ds_name=env.evaluation_result_list[i][0],
eval_name=eval_name_splitted[0],
dataset_name=dataset_name,
env=env,
):
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
Expand All @@ -430,9 +434,9 @@ def __call__(self, env: CallbackEnv) -> None:
)
_log_info(f"Early stopping, best iteration is:\n[{self.best_iter[i] + 1}]\t{eval_result_str}")
if self.first_metric_only:
_log_info(f"Evaluated only: {eval_name_splitted[-1]}")
_log_info(f"Evaluated only: {metric_name}")
raise EarlyStopException(self.best_iter[i], self.best_score_list[i])
self._final_iteration_check(env, eval_name_splitted, i)
self._final_iteration_check(env=env, metric_name=metric_name, i=i)


def _should_enable_early_stopping(stopping_rounds: Any) -> bool:
Expand Down
40 changes: 30 additions & 10 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,15 +581,35 @@ def _agg_cv_result(
raw_results: List[List[_LGBM_BoosterEvalMethodResultType]],
) -> List[_LGBM_BoosterEvalMethodResultWithStandardDeviationType]:
"""Aggregate cross-validation results."""
cvmap: Dict[str, List[float]] = OrderedDict()
metric_type: Dict[str, bool] = {}
# build up 2 maps, of the form:
#
# OrderedDict{
# (<dataset_name>, <metric_name>): <is_higher_better>
# }
#
# OrderedDict{
# (<dataset_name>, <metric_name>): list[<metric_value>]
# }
#
metric_types: Dict[Tuple[str, str], bool] = OrderedDict()
metric_values: Dict[Tuple[str, str], List[float]] = OrderedDict()
for one_result in raw_results:
for one_line in one_result:
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved
key = f"{one_line[0]} {one_line[1]}"
metric_type[key] = one_line[3]
cvmap.setdefault(key, [])
cvmap[key].append(one_line[2])
return [("cv_agg", k, float(np.mean(v)), metric_type[k], float(np.std(v))) for k, v in cvmap.items()]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, removing this "cv_agg" string literal, is the key change... everything else flows from that.

dataset_name, metric_name, metric_value, is_higher_better = one_line
key = (dataset_name, metric_name)
metric_types[key] = is_higher_better
metric_values.setdefault(key, [])
metric_values[key].append(metric_value)

# turn that into a list of tuples of the form:
#
# [
# (<dataset_name>, <metric_name>, mean(<values>), <is_higher_better>, std_dev(<values>))
# ]
return [
(k[0], k[1], float(np.mean(metric_values[k])), metric_types[k], float(np.std(metric_values[k])))
jameslamb marked this conversation as resolved.
Show resolved Hide resolved
for k, v in metric_values.items()
]


def cv(
Expand Down Expand Up @@ -812,9 +832,9 @@ def cv(
)
cvfolds.update(fobj=fobj) # type: ignore[call-arg]
res = _agg_cv_result(cvfolds.eval_valid(feval)) # type: ignore[call-arg]
for _, key, mean, _, std in res:
results[f"{key}-mean"].append(mean)
results[f"{key}-stdv"].append(std)
for dataset_name, metric_name, metric_mean, _, metric_std_dev in res:
results[f"{dataset_name} {metric_name}-mean"].append(metric_mean)
results[f"{dataset_name} {metric_name}-stdv"].append(metric_std_dev)
try:
for cb in callbacks_after_iter:
cb(
Expand Down
Loading