remove pin on scikit-learn and skip all the load_boston() tests

microsoft · Dec 29, 2022 · 8431c38 · 8431c38
1 parent b95c865
commit 8431c38
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 25 deletions.
diff --git a/.ci/test.sh b/.ci/test.sh
@@ -120,10 +120,10 @@ fi
 
 # hack around https://github.com/microsoft/LightGBM/pull/5619#issuecomment-1341935203 just to produce
 # a releasable artifact on Ubuntu 14.04
-PACKAGE_CONSTRAINTS="dask-core distributed pandas numpy scipy"
+PACKAGE_CONSTRAINTS="dask-core distributed pandas numpy scikit-learn scipy"
 if [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "gcc" ]] && [[ $ARCH != "aarch64" ]]; then
     if [[ $TASK == "bdist" ]] || [[ $TASK == "regular" ]] || [[ $TASK == "mpi" ]]; then
-        PACKAGE_CONSTRAINTS="dask-core<=2022.7.1 distributed<=2022.7.1 libstdcxx-ng<12.0 numpy<=1.20.0 pandas<=1.4.1 scipy<=1.8.0"
+        PACKAGE_CONSTRAINTS="dask-core<=2022.7.1 distributed<=2022.7.1 libstdcxx-ng<12.0 numpy<=1.20.0 pandas<=1.4.1 scikit-learn<=1.1.0 scipy<=1.8.0"
     fi
 fi
 
@@ -136,8 +136,7 @@ conda install -q -y -n $CONDA_ENV \
     pytest \
     ${PACKAGE_CONSTRAINTS} \
     "python=$PYTHON_VERSION[build=*cpython]" \
-    python-graphviz \
-    'scikit-learn<1.2.0' || exit -1
+    python-graphviz || exit -1
 
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
     # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)

diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
@@ -51,7 +51,7 @@ if ($env:TASK -eq "swig") {
 }
 
 # re-including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
-conda install -q -y -n $env:CONDA_ENV cloudpickle joblib matplotlib numpy pandas psutil pytest "python=$env:PYTHON_VERSION[build=*cpython]" python-graphviz 'scikit-learn<=1.1' scipy ; Check-Output $?
+conda install -q -y -n $env:CONDA_ENV cloudpickle joblib matplotlib numpy pandas psutil pytest "python=$env:PYTHON_VERSION[build=*cpython]" python-graphviz scikit-learn scipy ; Check-Output $?
 
 if ($env:TASK -eq "regular") {
   mkdir $env:BUILD_SOURCESDIRECTORY/build; cd $env:BUILD_SOURCESDIRECTORY/build

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
@@ -17,7 +17,7 @@
 
 import lightgbm as lgb
 
-from .utils import load_boston, load_breast_cancer, load_digits, load_iris
+from .utils import load_breast_cancer, load_digits, load_iris
 
 decreasing_generator = itertools.count(0, -1)
 
@@ -99,6 +99,7 @@ def test_rf():
 
 
 def test_regression():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     params = {
@@ -643,6 +644,7 @@ def test_early_stopping():
 
 
 def test_continue_train():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     params = {
@@ -671,6 +673,7 @@ def test_continue_train():
 
 
 def test_continue_train_reused_dataset():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     params = {
         'objective': 'regression',
@@ -685,6 +688,7 @@ def test_continue_train_reused_dataset():
 
 
 def test_continue_train_dart():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     params = {
@@ -733,6 +737,7 @@ def test_continue_train_multiclass():
 
 
 def test_cv():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X_train, y_train = load_boston(return_X_y=True)
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
@@ -837,6 +842,7 @@ def test_cvbooster():
 
 
 def test_feature_name():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X_train, y_train = load_boston(return_X_y=True)
     params = {'verbose': -1}
     lgb_train = lgb.Dataset(X_train, y_train)
@@ -866,6 +872,7 @@ def test_feature_name_with_non_ascii():
 
 
 def test_save_load_copy_pickle():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     def train_and_predict(init_model=None, return_model=False):
         X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
@@ -1496,6 +1503,7 @@ def test_refit():
 
 
 def test_mape_rf():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     params = {
         'boosting_type': 'rf',
@@ -1514,6 +1522,7 @@ def test_mape_rf():
 
 
 def test_mape_dart():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     params = {
         'boosting_type': 'dart',
@@ -2052,6 +2061,7 @@ def test_default_objective_and_metric():
 
 @pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
 def test_model_size():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     data = lgb.Dataset(X, y)
     bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
@@ -2079,6 +2089,7 @@ def test_model_size():
 
 
 def test_get_split_value_histogram():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
     gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
@@ -2159,6 +2170,7 @@ def test_get_split_value_histogram():
 
 
 def test_early_stopping_for_only_first_metric():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
 
     def metrics_combination_train_regression(valid_sets, metric_list, assumed_iteration,
                                              first_metric_only, feval=None):
@@ -2465,6 +2477,7 @@ def test_dataset_params_with_reference():
 
 
 def test_extra_trees():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     # check extra trees increases regularization
     X, y = load_boston(return_X_y=True)
     lgb_x = lgb.Dataset(X, label=y)
@@ -2484,6 +2497,7 @@ def test_extra_trees():
 
 
 def test_path_smoothing():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     # check path smoothing increases regularization
     X, y = load_boston(return_X_y=True)
     lgb_x = lgb.Dataset(X, label=y)
@@ -2554,6 +2568,7 @@ def _imptcs_to_numpy(X, impcts_dict):
 
 
 def test_interaction_constraints():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     num_features = X.shape[1]
     train_data = lgb.Dataset(X, label=y)
@@ -2709,6 +2724,7 @@ def test_linear_single_leaf():
 
 
 def test_predict_with_start_iteration():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     def inner_test(X, y, params, early_stopping_rounds):
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         train_data = lgb.Dataset(X_train, label=y_train)

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
@@ -18,7 +18,7 @@
 
 import lightgbm as lgb
 
-from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking
+from .utils import load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking
 
 sk_version = parse_version(sk_version)
 if sk_version < parse_version("0.23"):
@@ -91,6 +91,7 @@ def test_binary():
 
 
 def test_regression():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
@@ -157,6 +158,7 @@ def test_eval_at_aliases():
 
 
 def test_regression_with_custom_objective():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
@@ -180,6 +182,7 @@ def test_binary_classification_with_custom_objective():
 
 
 def test_dart():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
@@ -217,6 +220,7 @@ def test_stacking_classifier():
 # sklearn <0.23 does not have a stacking regressor and n_features_in_ property
 @pytest.mark.skipif(sk_version < parse_version('0.23'), reason='scikit-learn version is less than 0.23')
 def test_stacking_regressor():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     from sklearn.ensemble import StackingRegressor
 
     X, y = load_boston(return_X_y=True)
@@ -384,14 +388,6 @@ def test_regressor_chain():
 
 
 def test_clone_and_property():
-    X, y = load_boston(return_X_y=True)
-    gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
-    gbm.fit(X, y, verbose=False)
-
-    gbm_clone = clone(gbm)
-    assert isinstance(gbm.booster_, lgb.Booster)
-    assert isinstance(gbm.feature_importances_, np.ndarray)
-
     X, y = load_digits(n_class=2, return_X_y=True)
     clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
     clf.fit(X, y, verbose=False)
@@ -402,6 +398,7 @@ def test_clone_and_property():
 
 
 def test_joblib():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
@@ -644,6 +641,7 @@ def test_predict():
 
 
 def test_evaluate_train_set():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
@@ -658,6 +656,7 @@ def test_evaluate_train_set():
 
 
 def test_metrics():
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     X, y = load_boston(return_X_y=True)
     params = {'n_estimators': 2, 'verbose': -1}
     params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
@@ -989,7 +988,7 @@ def test_nan_handle():
 
 
 def test_first_metric_only():
-
+    pytest.skip("load_boston() was removed in scikit-learn 1.2.0")
     def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_only):
         params['first_metric_only'] = first_metric_only
         gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
@@ -1204,7 +1203,7 @@ def test_parameters_default_constructible(estimator):
     check_parameters_default_constructible(name, Estimator)
 
 
-@pytest.mark.parametrize('task', ['classification', 'ranking', 'regression'])
+@pytest.mark.parametrize('task', ['classification', 'ranking'])
 def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task):
     pd = pytest.importorskip("pandas")
     if task == 'ranking':
@@ -1214,9 +1213,6 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
     elif task == 'classification':
         X, y = load_iris(return_X_y=True)
         model_factory = lgb.LGBMClassifier
-    elif task == 'regression':
-        X, y = load_boston(return_X_y=True)
-        model_factory = lgb.LGBMRegressor
     X = pd.DataFrame(X)
     y_col_array = y.reshape(-1, 1)
     params = {

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
@@ -6,11 +6,6 @@
 from sklearn.utils import check_random_state
 
 
-@lru_cache(maxsize=None)
-def load_boston(**kwargs):
-    return sklearn.datasets.load_boston(**kwargs)
-
-
 @lru_cache(maxsize=None)
 def load_breast_cancer(**kwargs):
     return sklearn.datasets.load_breast_cancer(**kwargs)