Skip to content

Commit

Permalink
remove pin on scikit-learn and skip all the load_boston() tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Dec 29, 2022
1 parent b95c865 commit 8431c38
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 25 deletions.
7 changes: 3 additions & 4 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,10 @@ fi

# hack around https://github.com/microsoft/LightGBM/pull/5619#issuecomment-1341935203 just to produce
# a releasable artifact on Ubuntu 14.04
PACKAGE_CONSTRAINTS="dask-core distributed pandas numpy scipy"
PACKAGE_CONSTRAINTS="dask-core distributed pandas numpy scikit-learn scipy"
if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]] && [[ $ARCH != "aarch64" ]]; then
if [[ $TASK == "bdist" ]] || [[ $TASK == "regular" ]] || [[ $TASK == "mpi" ]]; then
PACKAGE_CONSTRAINTS="dask-core<=2022.7.1 distributed<=2022.7.1 libstdcxx-ng<12.0 numpy<=1.20.0 pandas<=1.4.1 scipy<=1.8.0"
PACKAGE_CONSTRAINTS="dask-core<=2022.7.1 distributed<=2022.7.1 libstdcxx-ng<12.0 numpy<=1.20.0 pandas<=1.4.1 scikit-learn<=1.1.0 scipy<=1.8.0"
fi
fi

Expand All @@ -136,8 +136,7 @@ conda install -q -y -n $CONDA_ENV \
pytest \
${PACKAGE_CONSTRAINTS} \
"python=$PYTHON_VERSION[build=*cpython]" \
python-graphviz \
'scikit-learn<1.2.0' || exit -1
python-graphviz || exit -1

if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
# fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
Expand Down
2 changes: 1 addition & 1 deletion .ci/test_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if ($env:TASK -eq "swig") {
}

# re-including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
conda install -q -y -n $env:CONDA_ENV cloudpickle joblib matplotlib numpy pandas psutil pytest "python=$env:PYTHON_VERSION[build=*cpython]" python-graphviz 'scikit-learn<=1.1' scipy ; Check-Output $?
conda install -q -y -n $env:CONDA_ENV cloudpickle joblib matplotlib numpy pandas psutil pytest "python=$env:PYTHON_VERSION[build=*cpython]" python-graphviz scikit-learn scipy ; Check-Output $?

if ($env:TASK -eq "regular") {
mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build
Expand Down
18 changes: 17 additions & 1 deletion tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import lightgbm as lgb

from .utils import load_boston, load_breast_cancer, load_digits, load_iris
from .utils import load_breast_cancer, load_digits, load_iris

decreasing_generator = itertools.count(0, -1)

Expand Down Expand Up @@ -99,6 +99,7 @@ def test_rf():


def test_regression():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {
Expand Down Expand Up @@ -643,6 +644,7 @@ def test_early_stopping():


def test_continue_train():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {
Expand Down Expand Up @@ -671,6 +673,7 @@ def test_continue_train():


def test_continue_train_reused_dataset():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
params = {
'objective': 'regression',
Expand All @@ -685,6 +688,7 @@ def test_continue_train_reused_dataset():


def test_continue_train_dart():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {
Expand Down Expand Up @@ -733,6 +737,7 @@ def test_continue_train_multiclass():


def test_cv():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X_train, y_train = load_boston(return_X_y=True)
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
Expand Down Expand Up @@ -837,6 +842,7 @@ def test_cvbooster():


def test_feature_name():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X_train, y_train = load_boston(return_X_y=True)
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
Expand Down Expand Up @@ -866,6 +872,7 @@ def test_feature_name_with_non_ascii():


def test_save_load_copy_pickle():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
def train_and_predict(init_model=None, return_model=False):
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
Expand Down Expand Up @@ -1496,6 +1503,7 @@ def test_refit():


def test_mape_rf():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
params = {
'boosting_type': 'rf',
Expand All @@ -1514,6 +1522,7 @@ def test_mape_rf():


def test_mape_dart():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
params = {
'boosting_type': 'dart',
Expand Down Expand Up @@ -2052,6 +2061,7 @@ def test_default_objective_and_metric():

@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
def test_model_size():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
data = lgb.Dataset(X, y)
bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
Expand Down Expand Up @@ -2079,6 +2089,7 @@ def test_model_size():


def test_get_split_value_histogram():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
Expand Down Expand Up @@ -2159,6 +2170,7 @@ def test_get_split_value_histogram():


def test_early_stopping_for_only_first_metric():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")

def metrics_combination_train_regression(valid_sets, metric_list, assumed_iteration,
first_metric_only, feval=None):
Expand Down Expand Up @@ -2465,6 +2477,7 @@ def test_dataset_params_with_reference():


def test_extra_trees():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
# check extra trees increases regularization
X, y = load_boston(return_X_y=True)
lgb_x = lgb.Dataset(X, label=y)
Expand All @@ -2484,6 +2497,7 @@ def test_extra_trees():


def test_path_smoothing():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
# check path smoothing increases regularization
X, y = load_boston(return_X_y=True)
lgb_x = lgb.Dataset(X, label=y)
Expand Down Expand Up @@ -2554,6 +2568,7 @@ def _imptcs_to_numpy(X, impcts_dict):


def test_interaction_constraints():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
num_features = X.shape[1]
train_data = lgb.Dataset(X, label=y)
Expand Down Expand Up @@ -2709,6 +2724,7 @@ def test_linear_single_leaf():


def test_predict_with_start_iteration():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
def inner_test(X, y, params, early_stopping_rounds):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
train_data = lgb.Dataset(X_train, label=y_train)
Expand Down
24 changes: 10 additions & 14 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import lightgbm as lgb

from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking
from .utils import load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking

sk_version = parse_version(sk_version)
if sk_version < parse_version("0.23"):
Expand Down Expand Up @@ -91,6 +91,7 @@ def test_binary():


def test_regression():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
Expand Down Expand Up @@ -157,6 +158,7 @@ def test_eval_at_aliases():


def test_regression_with_custom_objective():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
Expand All @@ -180,6 +182,7 @@ def test_binary_classification_with_custom_objective():


def test_dart():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
Expand Down Expand Up @@ -217,6 +220,7 @@ def test_stacking_classifier():
# sklearn <0.23 does not have a stacking regressor and n_features_in_ property
@pytest.mark.skipif(sk_version < parse_version('0.23'), reason='scikit-learn version is less than 0.23')
def test_stacking_regressor():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
from sklearn.ensemble import StackingRegressor

X, y = load_boston(return_X_y=True)
Expand Down Expand Up @@ -384,14 +388,6 @@ def test_regressor_chain():


def test_clone_and_property():
X, y = load_boston(return_X_y=True)
gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
gbm.fit(X, y, verbose=False)

gbm_clone = clone(gbm)
assert isinstance(gbm.booster_, lgb.Booster)
assert isinstance(gbm.feature_importances_, np.ndarray)

X, y = load_digits(n_class=2, return_X_y=True)
clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
clf.fit(X, y, verbose=False)
Expand All @@ -402,6 +398,7 @@ def test_clone_and_property():


def test_joblib():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
Expand Down Expand Up @@ -644,6 +641,7 @@ def test_predict():


def test_evaluate_train_set():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
Expand All @@ -658,6 +656,7 @@ def test_evaluate_train_set():


def test_metrics():
pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
X, y = load_boston(return_X_y=True)
params = {'n_estimators': 2, 'verbose': -1}
params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
Expand Down Expand Up @@ -989,7 +988,7 @@ def test_nan_handle():


def test_first_metric_only():

pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_only):
params['first_metric_only'] = first_metric_only
gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
Expand Down Expand Up @@ -1204,7 +1203,7 @@ def test_parameters_default_constructible(estimator):
check_parameters_default_constructible(name, Estimator)


@pytest.mark.parametrize('task', ['classification', 'ranking', 'regression'])
@pytest.mark.parametrize('task', ['classification', 'ranking'])
def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task):
pd = pytest.importorskip("pandas")
if task == 'ranking':
Expand All @@ -1214,9 +1213,6 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
elif task == 'classification':
X, y = load_iris(return_X_y=True)
model_factory = lgb.LGBMClassifier
elif task == 'regression':
X, y = load_boston(return_X_y=True)
model_factory = lgb.LGBMRegressor
X = pd.DataFrame(X)
y_col_array = y.reshape(-1, 1)
params = {
Expand Down
5 changes: 0 additions & 5 deletions tests/python_package_test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
from sklearn.utils import check_random_state


@lru_cache(maxsize=None)
def load_boston(**kwargs):
return sklearn.datasets.load_boston(**kwargs)


@lru_cache(maxsize=None)
def load_breast_cancer(**kwargs):
return sklearn.datasets.load_breast_cancer(**kwargs)
Expand Down

0 comments on commit 8431c38

Please sign in to comment.