Skip to content

Commit

Permalink
Merge branch 'master' of github.com:microsoft/LightGBM into googletes…
Browse files Browse the repository at this point in the history
…t-version
  • Loading branch information
jameslamb committed Sep 25, 2023
2 parents 432e156 + 60a4a13 commit b07ad20
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ fi
# ref: https://github.com/microsoft/LightGBM/issues/6030
CONSTRAINED_DEPENDENCIES="'dask-core>=2023.5.0' 'distributed>=2023.5.0' 'pandas>=2.0'"
if [[ $PYTHON_VERSION == "3.7" ]]; then
CONSTRAINED_DEPENDENCIES="'dask-core' 'distributed 'pandas<2.0'"
CONSTRAINED_DEPENDENCIES="'dask-core' 'distributed' 'pandas<2.0'"
fi

# including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
Expand Down
13 changes: 13 additions & 0 deletions .ci/test_r_package_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,19 @@ if ($env:COMPILER -ne "MSVC") {
}
}

# Checking that the correct R version was used
if ($env:TOOLCHAIN -ne "MSVC") {
$checks = Select-String -Path "${LOG_FILE_NAME}" -Pattern "using R version $env:R_WINDOWS_VERSION"
$checks_cnt = $checks.Matches.length
} else {
$checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "R version passed into FindLibR.* $env:R_WINDOWS_VERSION"
$checks_cnt = $checks.Matches.length
}
if ($checks_cnt -eq 0) {
Write-Output "Wrong R version was found (expected '$env:R_WINDOWS_VERSION'). Check the build logs."
Check-Output $False
}

# Checking that we actually got the expected compiler. The R package has some logic
# to fail back to MinGW if MSVC fails, but for CI builds we need to check that the correct
# compiler was used.
Expand Down
45 changes: 33 additions & 12 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
if TYPE_CHECKING:
from typing import Literal

# typing.TypeGuard was only introduced in Python 3.10
try:
from typing import TypeGuard
except ImportError:
from typing_extensions import TypeGuard


__all__ = [
'Booster',
'Dataset',
Expand Down Expand Up @@ -279,6 +286,20 @@ def _is_1d_list(data: Any) -> bool:
return isinstance(data, list) and (not data or _is_numeric(data[0]))


def _is_list_of_numpy_arrays(data: Any) -> "TypeGuard[List[np.ndarray]]":
return (
isinstance(data, list)
and all(isinstance(x, np.ndarray) for x in data)
)


def _is_list_of_sequences(data: Any) -> "TypeGuard[List[Sequence]]":
return (
isinstance(data, list)
and all(isinstance(x, Sequence) for x in data)
)


def _is_1d_collection(data: Any) -> bool:
"""Check whether data is a 1-D collection."""
return (
Expand Down Expand Up @@ -458,7 +479,7 @@ def _get_all_param_aliases() -> Dict[str, List[str]]:
buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_DumpParamAliases(
ctypes.c_int64(buffer_len),
ctypes.byref(tmp_out_len),
Expand All @@ -467,7 +488,7 @@ def _get_all_param_aliases() -> Dict[str, List[str]]:
# if buffer length is not long enough, re-allocate a buffer
if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_DumpParamAliases(
ctypes.c_int64(actual_len),
ctypes.byref(tmp_out_len),
Expand Down Expand Up @@ -1593,7 +1614,7 @@ def __init__(
self.used_indices: Optional[List[int]] = None
self._need_slice = True
self._predictor: Optional[_InnerPredictor] = None
self.pandas_categorical = None
self.pandas_categorical: Optional[List[List]] = None
self._params_back_up = None
self.version = 0
self._start_row = 0 # Used when pushing rows one by one.
Expand Down Expand Up @@ -1918,9 +1939,9 @@ def _lazy_init(
elif isinstance(data, np.ndarray):
self.__init_from_np2d(data, params_str, ref_dataset)
elif isinstance(data, list) and len(data) > 0:
if all(isinstance(x, np.ndarray) for x in data):
if _is_list_of_numpy_arrays(data):
self.__init_from_list_np2d(data, params_str, ref_dataset)
elif all(isinstance(x, Sequence) for x in data):
elif _is_list_of_sequences(data):
self.__init_from_seqs(data, ref_dataset)
else:
raise TypeError('Data list can only be of ndarray or Sequence')
Expand Down Expand Up @@ -2870,7 +2891,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]:
self.data = self.data[self.used_indices, :]
elif isinstance(self.data, Sequence):
self.data = self.data[self.used_indices]
elif isinstance(self.data, list) and len(self.data) > 0 and all(isinstance(x, Sequence) for x in self.data):
elif _is_list_of_sequences(self.data) and len(self.data) > 0:
self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices)))
else:
_log_warning(f"Cannot subset {type(self.data).__name__} type of raw data.\n"
Expand Down Expand Up @@ -3294,7 +3315,7 @@ def _get_loaded_param(self) -> Dict[str, Any]:
buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterGetLoadedParam(
self._handle,
ctypes.c_int64(buffer_len),
Expand All @@ -3304,7 +3325,7 @@ def _get_loaded_param(self) -> Dict[str, Any]:
# if buffer length is not long enough, re-allocate a buffer
if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterGetLoadedParam(
self._handle,
ctypes.c_int64(actual_len),
Expand Down Expand Up @@ -4057,7 +4078,7 @@ def model_to_string(
buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterSaveModelToString(
self._handle,
ctypes.c_int(start_iteration),
Expand All @@ -4070,7 +4091,7 @@ def model_to_string(
# if buffer length is not long enough, re-allocate a buffer
if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterSaveModelToString(
self._handle,
ctypes.c_int(start_iteration),
Expand Down Expand Up @@ -4125,7 +4146,7 @@ def dump_model(
buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterDumpModel(
self._handle,
ctypes.c_int(start_iteration),
Expand All @@ -4138,7 +4159,7 @@ def dump_model(
# if buffer length is not long enough, reallocate a buffer
if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
ptr_string_buffer = ctypes.c_char_p(ctypes.addressof(string_buffer))
_safe_call(_LIB.LGBM_BoosterDumpModel(
self._handle,
ctypes.c_int(start_iteration),
Expand Down
2 changes: 1 addition & 1 deletion python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def __call__(self, env: CallbackEnv) -> None:
else:
data_name, eval_name = item[1].split()
res_mean = item[2]
res_stdv = item[4]
res_stdv = item[4] # type: ignore[misc]
self.eval_result[data_name][f'{eval_name}-mean'].append(res_mean)
self.eval_result[data_name][f'{eval_name}-stdv'].append(res_stdv)

Expand Down

0 comments on commit b07ad20

Please sign in to comment.