From deb70773b87a494f626c5b2c088f372f116c6559 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Mon, 13 Nov 2023 19:03:58 +0100
Subject: [PATCH 01/19] [python-package] Allow to pass Arrow array as weights
 (#6164)

---
 include/LightGBM/c_api.h                |  4 +-
 include/LightGBM/dataset.h              |  4 ++
 python-package/lightgbm/basic.py        | 29 +++++++----
 python-package/lightgbm/compat.py       |  7 +++
 src/io/dataset.cpp                      |  2 +
 src/io/metadata.cpp                     | 28 ++++++++---
 tests/python_package_test/test_arrow.py | 66 ++++++++++++++++++++-----
 7 files changed, 108 insertions(+), 32 deletions(-)
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index a46f8332811a..fd337cbc7cbe 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -558,9 +558,9 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetSetField(DatasetHandle handle,
 /*!
  * \brief Set vector to a content in info.
  * \note
- * - \a label convert input datatype into ``float32``.
+ * - \a label and \a weight convert input datatype into ``float32``.
  * \param handle Handle of dataset
- * \param field_name Field name, can be \a label
+ * \param field_name Field name, can be \a label, \a weight
  * \param n_chunks The number of Arrow arrays passed to this function
  * \param chunks Pointer to the list of Arrow arrays
  * \param schema Pointer to the schema of all Arrow arrays
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index 56bc7b841dc3..48c1bee804d7 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -113,6 +113,7 @@ class Metadata {
   void SetLabel(const ArrowChunkedArray& array);
 
   void SetWeights(const label_t* weights, data_size_t len);
+  void SetWeights(const ArrowChunkedArray& array);
 
   void SetQuery(const data_size_t* query, data_size_t len);
 
@@ -340,6 +341,9 @@ class Metadata {
   void SetLabelsFromIterator(It first, It last);
   /*! \brief Insert weights at the given index */
   void InsertWeights(const label_t* weights, data_size_t start_index, data_size_t len);
+  /*! \brief Set weights from pointers to the first element and the end of an iterator. */
+  template <typename It>
+  void SetWeightsFromIterator(It first, It last);
   /*! \brief Insert initial scores at the given index */
   void InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size);
   /*! \brief Insert queries at the given index */
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index e8d8bd84cbe7..939842df3389 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -19,7 +19,8 @@
 import scipy.sparse
 
 from .compat import (PANDAS_INSTALLED, PYARROW_INSTALLED, arrow_cffi, arrow_is_floating, arrow_is_integer, concat,
-                     dt_DataTable, pa_Array, pa_ChunkedArray, pa_Table, pd_CategoricalDtype, pd_DataFrame, pd_Series)
+                     dt_DataTable, pa_Array, pa_ChunkedArray, pa_compute, pa_Table, pd_CategoricalDtype, pd_DataFrame,
+                     pd_Series)
 from .libpath import find_lib_path
 
 if TYPE_CHECKING:
@@ -115,7 +116,9 @@
     List[float],
     List[int],
     np.ndarray,
-    pd_Series
+    pd_Series,
+    pa_Array,
+    pa_ChunkedArray,
 ]
 ZERO_THRESHOLD = 1e-35
 
@@ -1635,7 +1638,7 @@ def __init__(
             Label of the data.
         reference : Dataset or None, optional (default=None)
             If this is Dataset for validation, training data should be used as reference.
-        weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Weight for each instance. Weights should be non-negative.
         group : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Group/query data.
@@ -2415,7 +2418,7 @@ def create_valid(
             If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
         label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Label of the data.
-        weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Weight for each instance. Weights should be non-negative.
         group : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Group/query data.
@@ -2830,7 +2833,7 @@ def set_weight(
 
         Parameters
         ----------
-        weight : list, numpy 1-D array, pandas Series or None
+        weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None
             Weight to be set for each data point. Weights should be non-negative.
 
         Returns
@@ -2838,11 +2841,19 @@ def set_weight(
         self : Dataset
             Dataset with set weight.
         """
-        if weight is not None and np.all(weight == 1):
-            weight = None
+        # Check if the weight contains values other than one
+        if weight is not None:
+            if _is_pyarrow_array(weight):
+                if pa_compute.all(pa_compute.equal(weight, 1)).as_py():
+                    weight = None
+            elif np.all(weight == 1):
+                weight = None
         self.weight = weight
+
+        # Set field
         if self._handle is not None and weight is not None:
-            weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight')
+            if not _is_pyarrow_array(weight):
+                weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight')
             self.set_field('weight', weight)
             self.weight = self.get_field('weight')  # original values can be modified at cpp side
         return self
@@ -4414,7 +4425,7 @@ def refit(
 
             .. versionadded:: 4.0.0
 
-        weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Weight for each ``data`` instance. Weights should be non-negative.
 
             .. versionadded:: 4.0.0
diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index 984972ed1ae3..dc48dbf792cf 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -197,6 +197,7 @@ def __init__(self, *args, **kwargs):
 
 """pyarrow"""
 try:
+    import pyarrow.compute as pa_compute
     from pyarrow import Array as pa_Array
     from pyarrow import ChunkedArray as pa_ChunkedArray
     from pyarrow import Table as pa_Table
@@ -236,6 +237,12 @@ class arrow_cffi:  # type: ignore
         def __init__(self, *args, **kwargs):
             pass
 
+    class pa_compute:  # type: ignore
+        """Dummy class for pyarrow.compute."""
+
+        all = None
+        equal = None
+
     arrow_is_integer = None
     arrow_is_floating = None
 
diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index e78f8a6b696c..01eb41b71367 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -902,6 +902,8 @@ bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray
   name = Common::Trim(name);
   if (name == std::string("label") || name == std::string("target")) {
     metadata_.SetLabel(ca);
+  } else if (name == std::string("weight") || name == std::string("weights")) {
+    metadata_.SetWeights(ca);
   } else {
     return false;
   }
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index 41f9e3bf43c6..ed4fb135e62a 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -450,33 +450,45 @@ void Metadata::InsertLabels(const label_t* labels, data_size_t start_index, data
   // CUDA is handled after all insertions are complete
 }
 
-void Metadata::SetWeights(const label_t* weights, data_size_t len) {
+template <typename It>
+void Metadata::SetWeightsFromIterator(It first, It last) {
   std::lock_guard<std::mutex> lock(mutex_);
-  // save to nullptr
-  if (weights == nullptr || len == 0) {
+  // Clear weights on empty input
+  if (last - first == 0) {
     weights_.clear();
     num_weights_ = 0;
     return;
   }
-  if (num_data_ != len) {
-    Log::Fatal("Length of weights is not same with #data");
+  if (num_data_ != last - first) {
+    Log::Fatal("Length of weights differs from the length of #data");
+  }
+  if (weights_.empty()) {
+    weights_.resize(num_data_);
   }
-  if (weights_.empty()) { weights_.resize(num_data_); }
   num_weights_ = num_data_;
 
   #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (num_weights_ >= 1024)
   for (data_size_t i = 0; i < num_weights_; ++i) {
-    weights_[i] = Common::AvoidInf(weights[i]);
+    weights_[i] = Common::AvoidInf(first[i]);
   }
   CalculateQueryWeights();
   weight_load_from_file_ = false;
+
   #ifdef USE_CUDA
   if (cuda_metadata_ != nullptr) {
-    cuda_metadata_->SetWeights(weights_.data(), len);
+    cuda_metadata_->SetWeights(weights_.data(), weights_.size());
   }
   #endif  // USE_CUDA
 }
 
+void Metadata::SetWeights(const label_t* weights, data_size_t len) {
+  SetWeightsFromIterator(weights, weights + len);
+}
+
+void Metadata::SetWeights(const ArrowChunkedArray& array) {
+  SetWeightsFromIterator(array.begin<label_t>(), array.end<label_t>());
+}
+
 void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, data_size_t len) {
   if (!weights) {
     Log::Fatal("Passed null weights");
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index 1dd270c8ec53..40482a904a62 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -9,6 +9,8 @@
 
 import lightgbm as lgb
 
+from .utils import np_assert_array_equal
+
 # ----------------------------------------------------------------------------------------------- #
 #                                            UTILITIES                                            #
 # ----------------------------------------------------------------------------------------------- #
@@ -67,10 +69,6 @@ def dummy_dataset_params() -> Dict[str, Any]:
     }
 
 
-def assert_arrays_equal(lhs: np.ndarray, rhs: np.ndarray):
-    assert lhs.dtype == rhs.dtype and np.array_equal(lhs, rhs)
-
-
 # ----------------------------------------------------------------------------------------------- #
 #                                            UNIT TESTS                                           #
 # ----------------------------------------------------------------------------------------------- #
@@ -103,6 +101,34 @@ def test_dataset_construct_fuzzy(
     assert filecmp.cmp(tmp_path / "arrow.txt", tmp_path / "pandas.txt")
 
 
+# -------------------------------------------- FIELDS ------------------------------------------- #
+
+
+def test_dataset_construct_fields_fuzzy():
+    arrow_table = generate_random_arrow_table(3, 1000, 42)
+    arrow_labels = generate_random_arrow_array(1000, 42)
+    arrow_weights = generate_random_arrow_array(1000, 42)
+
+    arrow_dataset = lgb.Dataset(arrow_table, label=arrow_labels, weight=arrow_weights)
+    arrow_dataset.construct()
+
+    pandas_dataset = lgb.Dataset(
+        arrow_table.to_pandas(), label=arrow_labels.to_numpy(), weight=arrow_weights.to_numpy()
+    )
+    pandas_dataset.construct()
+
+    # Check for equality
+    for field in ("label", "weight"):
+        np_assert_array_equal(
+            arrow_dataset.get_field(field), pandas_dataset.get_field(field), strict=True
+        )
+    np_assert_array_equal(arrow_dataset.get_label(), pandas_dataset.get_label(), strict=True)
+    np_assert_array_equal(arrow_dataset.get_weight(), pandas_dataset.get_weight(), strict=True)
+
+
+# -------------------------------------------- LABELS ------------------------------------------- #
+
+
 @pytest.mark.parametrize(
     ["array_type", "label_data"],
     [(pa.array, [0, 1, 0, 0, 1]), (pa.chunked_array, [[0], [1, 0, 0, 1]])],
@@ -129,17 +155,31 @@ def test_dataset_construct_labels(array_type: Any, label_data: Any, arrow_type:
     dataset.construct()
 
     expected = np.array([0, 1, 0, 0, 1], dtype=np.float32)
-    assert_arrays_equal(expected, dataset.get_label())
+    np_assert_array_equal(expected, dataset.get_label(), strict=True)
 
 
-def test_dataset_construct_labels_fuzzy():
-    arrow_table = generate_random_arrow_table(3, 1000, 42)
-    arrow_array = generate_random_arrow_array(1000, 42)
+# ------------------------------------------- WEIGHTS ------------------------------------------- #
 
-    arrow_dataset = lgb.Dataset(arrow_table, label=arrow_array)
-    arrow_dataset.construct()
 
-    pandas_dataset = lgb.Dataset(arrow_table.to_pandas(), label=arrow_array.to_numpy())
-    pandas_dataset.construct()
+def test_dataset_construct_weights_none():
+    data = generate_dummy_arrow_table()
+    weight = pa.array([1, 1, 1, 1, 1])
+    dataset = lgb.Dataset(data, weight=weight, params=dummy_dataset_params())
+    dataset.construct()
+    assert dataset.get_weight() is None
+    assert dataset.get_field("weight") is None
+
+
+@pytest.mark.parametrize(
+    ["array_type", "weight_data"],
+    [(pa.array, [3, 0.7, 1.5, 0.5, 0.1]), (pa.chunked_array, [[3], [0.7, 1.5, 0.5, 0.1]])],
+)
+@pytest.mark.parametrize("arrow_type", [pa.float32(), pa.float64()])
+def test_dataset_construct_weights(array_type: Any, weight_data: Any, arrow_type: Any):
+    data = generate_dummy_arrow_table()
+    weights = array_type(weight_data, type=arrow_type)
+    dataset = lgb.Dataset(data, weight=weights, params=dummy_dataset_params())
+    dataset.construct()
 
-    assert_arrays_equal(arrow_dataset.get_label(), pandas_dataset.get_label())
+    expected = np.array([3, 0.7, 1.5, 0.5, 0.1], dtype=np.float32)
+    np_assert_array_equal(expected, dataset.get_weight(), strict=True)

From 694e41e4a3ec44987667755dd989f83f0ecd4311 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 13 Nov 2023 12:26:44 -0600
Subject: [PATCH 02/19] [R-package] standardize naming of internal functions
 (#6179)

---
 R-package/R/callback.R                        |  8 +--
 R-package/R/lgb.Booster.R                     | 34 +++++-----
 R-package/R/lgb.DataProcessor.R               |  2 +-
 R-package/R/lgb.Dataset.R                     | 54 +++++++--------
 R-package/R/lgb.Predictor.R                   | 10 +--
 R-package/R/lgb.cv.R                          | 38 +++++------
 R-package/R/lgb.drop_serialized.R             |  2 +-
 R-package/R/lgb.importance.R                  |  2 +-
 R-package/R/lgb.interprete.R                  |  8 +--
 R-package/R/lgb.make_serializable.R           |  2 +-
 R-package/R/lgb.model.dt.tree.R               |  7 +-
 R-package/R/lgb.plot.interpretation.R         |  6 +-
 R-package/R/lgb.restore_handle.R              |  2 +-
 R-package/R/lgb.train.R                       | 34 ++++++----
 R-package/R/lightgbm.R                        | 10 +--
 R-package/R/saveRDS.lgb.Booster.R             |  2 +-
 R-package/R/utils.R                           | 20 +++---
 R-package/tests/testthat/test_Predictor.R     | 10 +--
 R-package/tests/testthat/test_basic.R         | 22 +++----
 R-package/tests/testthat/test_dataset.R       | 16 ++---
 .../tests/testthat/test_learning_to_rank.R    |  2 +-
 R-package/tests/testthat/test_lgb.Booster.R   | 40 +++++------
 R-package/tests/testthat/test_utils.R         | 66 +++++++++----------
 23 files changed, 203 insertions(+), 194 deletions(-)

diff --git a/R-package/R/callback.R b/R-package/R/callback.R
index 3569b47f5b14..c436409ddafb 100644
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -323,17 +323,17 @@ cb_early_stop <- function(stopping_rounds, first_metric_only, verbose) {
 }
 
 # Extract callback names from the list of callbacks
-callback.names <- function(cb_list) {
+.callback_names <- function(cb_list) {
   return(unlist(lapply(cb_list, attr, "name")))
 }
 
-add.cb <- function(cb_list, cb) {
+.add_cb <- function(cb_list, cb) {
 
   # Combine two elements
   cb_list <- c(cb_list, cb)
 
   # Set names of elements
-  names(cb_list) <- callback.names(cb_list = cb_list)
+  names(cb_list) <- .callback_names(cb_list = cb_list)
 
   if ("cb_early_stop" %in% names(cb_list)) {
 
@@ -349,7 +349,7 @@ add.cb <- function(cb_list, cb) {
 
 }
 
-categorize.callbacks <- function(cb_list) {
+.categorize_callbacks <- function(cb_list) {
 
   # Check for pre-iteration or post-iteration
   return(
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 2256a250b131..17da9545ae19 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -31,12 +31,12 @@ Booster <- R6::R6Class(
 
       if (!is.null(train_set)) {
 
-        if (!lgb.is.Dataset(train_set)) {
+        if (!.is_Dataset(train_set)) {
           stop("lgb.Booster: Can only use lgb.Dataset as training data")
         }
         train_set_handle <- train_set$.__enclos_env__$private$get_handle()
         params <- utils::modifyList(params, train_set$get_params())
-        params_str <- lgb.params2str(params = params)
+        params_str <- .params2str(params = params)
         # Store booster handle
         handle <- .Call(
           LGBM_BoosterCreate_R
@@ -130,7 +130,7 @@ Booster <- R6::R6Class(
     # Add validation data
     add_valid = function(data, name) {
 
-      if (!lgb.is.Dataset(data)) {
+      if (!.is_Dataset(data)) {
         stop("lgb.Booster.add_valid: Can only use lgb.Dataset as validation data")
       }
 
@@ -167,7 +167,7 @@ Booster <- R6::R6Class(
         params <- utils::modifyList(self$params, params)
       }
 
-      params_str <- lgb.params2str(params = params)
+      params_str <- .params2str(params = params)
 
       self$restore_handle()
 
@@ -193,7 +193,7 @@ Booster <- R6::R6Class(
 
       if (!is.null(train_set)) {
 
-        if (!lgb.is.Dataset(train_set)) {
+        if (!.is_Dataset(train_set)) {
           stop("lgb.Booster.update: Only can use lgb.Dataset as training data")
         }
 
@@ -340,7 +340,7 @@ Booster <- R6::R6Class(
     # Evaluate data on metrics
     eval = function(data, name, feval = NULL) {
 
-      if (!lgb.is.Dataset(data)) {
+      if (!.is_Dataset(data)) {
         stop("lgb.Booster.eval: Can only use lgb.Dataset to eval")
       }
 
@@ -508,17 +508,17 @@ Booster <- R6::R6Class(
       # NOTE: doing this here instead of in Predictor$predict() to keep
       #       Predictor$predict() as fast as possible
       if (length(params) > 0L) {
-        params <- lgb.check.wrapper_param(
+        params <- .check_wrapper_param(
           main_param_name = "predict_raw_score"
           , params = params
           , alternative_kwarg_value = rawscore
         )
-        params <- lgb.check.wrapper_param(
+        params <- .check_wrapper_param(
           main_param_name = "predict_leaf_index"
           , params = params
           , alternative_kwarg_value = predleaf
         )
-        params <- lgb.check.wrapper_param(
+        params <- .check_wrapper_param(
           main_param_name = "predict_contrib"
           , params = params
           , alternative_kwarg_value = predcontrib
@@ -586,7 +586,7 @@ Booster <- R6::R6Class(
         , predcontrib
         , start_iteration
         , num_iteration
-        , lgb.params2str(params = params)
+        , .params2str(params = params)
       )
 
       private$fast_predict_config <- list(
@@ -622,7 +622,7 @@ Booster <- R6::R6Class(
     },
 
     check_null_handle = function() {
-      return(lgb.is.null.handle(private$handle))
+      return(.is_null_handle(private$handle))
     },
 
     restore_handle = function() {
@@ -959,7 +959,7 @@ predict.lgb.Booster <- function(object,
                                 params = list(),
                                 ...) {
 
-  if (!lgb.is.Booster(x = object)) {
+  if (!.is_Booster(x = object)) {
     stop("predict.lgb.Booster: object should be an ", sQuote("lgb.Booster"))
   }
 
@@ -1114,7 +1114,7 @@ lgb.configure_fast_predict <- function(model,
                                        num_iteration = NULL,
                                        type = "response",
                                        params = list()) {
-  if (!lgb.is.Booster(x = model)) {
+  if (!.is_Booster(x = model)) {
     stop("lgb.configure_fast_predict: model should be an ", sQuote("lgb.Booster"))
   }
   if (type == "class") {
@@ -1160,7 +1160,7 @@ lgb.configure_fast_predict <- function(model,
 print.lgb.Booster <- function(x, ...) {
   # nolint start
   handle <- x$.__enclos_env__$private$handle
-  handle_is_null <- lgb.is.null.handle(handle)
+  handle_is_null <- .is_null_handle(handle)
 
   if (!handle_is_null) {
     ntrees <- x$current_iter()
@@ -1316,7 +1316,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @export
 lgb.save <- function(booster, filename, num_iteration = NULL) {
 
-  if (!lgb.is.Booster(x = booster)) {
+  if (!.is_Booster(x = booster)) {
     stop("lgb.save: booster should be an ", sQuote("lgb.Booster"))
   }
 
@@ -1372,7 +1372,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @export
 lgb.dump <- function(booster, num_iteration = NULL) {
 
-  if (!lgb.is.Booster(x = booster)) {
+  if (!.is_Booster(x = booster)) {
     stop("lgb.dump: booster should be an ", sQuote("lgb.Booster"))
   }
 
@@ -1430,7 +1430,7 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #' @export
 lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_err = FALSE) {
 
-  if (!lgb.is.Booster(x = booster)) {
+  if (!.is_Booster(x = booster)) {
     stop("lgb.get.eval.result: Can only use ", sQuote("lgb.Booster"), " to get eval result")
   }
 
diff --git a/R-package/R/lgb.DataProcessor.R b/R-package/R/lgb.DataProcessor.R
index fc7061945b62..c35ce4f93bd3 100644
--- a/R-package/R/lgb.DataProcessor.R
+++ b/R-package/R/lgb.DataProcessor.R
@@ -39,7 +39,7 @@ DataProcessor <- R6::R6Class(
             )
           }
           data_num_class <- length(self$factor_levels)
-          params <- lgb.check.wrapper_param(
+          params <- .check_wrapper_param(
               main_param_name = "num_class"
               , params = params
               , alternative_kwarg_value = data_num_class
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index e2892ea4bae0..ddc338d2cae3 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -55,10 +55,10 @@ Dataset <- R6::R6Class(
                           init_score = NULL) {
 
       # validate inputs early to avoid unnecessary computation
-      if (!(is.null(reference) || lgb.is.Dataset(reference))) {
+      if (!(is.null(reference) || .is_Dataset(reference))) {
           stop("lgb.Dataset: If provided, reference must be a ", sQuote("lgb.Dataset"))
       }
-      if (!(is.null(predictor) || lgb.is.Predictor(predictor))) {
+      if (!(is.null(predictor) || .is_Predictor(predictor))) {
           stop("lgb.Dataset: If provided, predictor must be a ", sQuote("lgb.Predictor"))
       }
 
@@ -135,7 +135,7 @@ Dataset <- R6::R6Class(
     construct = function() {
 
       # Check for handle null
-      if (!lgb.is.null.handle(x = private$handle)) {
+      if (!.is_null_handle(x = private$handle)) {
         return(invisible(self))
       }
 
@@ -191,7 +191,7 @@ Dataset <- R6::R6Class(
       }
 
       # Generate parameter str
-      params_str <- lgb.params2str(params = private$params)
+      params_str <- .params2str(params = private$params)
 
       # Get handle of reference dataset
       ref_handle <- NULL
@@ -277,7 +277,7 @@ Dataset <- R6::R6Class(
         )
 
       }
-      if (lgb.is.null.handle(x = handle)) {
+      if (.is_null_handle(x = handle)) {
         stop("lgb.Dataset.construct: cannot create Dataset handle")
       }
       # Setup class and private type
@@ -345,7 +345,7 @@ Dataset <- R6::R6Class(
     dim = function() {
 
       # Check for handle
-      if (!lgb.is.null.handle(x = private$handle)) {
+      if (!.is_null_handle(x = private$handle)) {
 
         num_row <- 0L
         num_col <- 0L
@@ -385,7 +385,7 @@ Dataset <- R6::R6Class(
 
     # Get number of bins for feature
     get_feature_num_bin = function(feature) {
-      if (lgb.is.null.handle(x = private$handle)) {
+      if (.is_null_handle(x = private$handle)) {
         stop("Cannot get number of bins in feature before constructing Dataset.")
       }
       if (is.character(feature)) {
@@ -409,7 +409,7 @@ Dataset <- R6::R6Class(
     get_colnames = function() {
 
       # Check for handle
-      if (!lgb.is.null.handle(x = private$handle)) {
+      if (!.is_null_handle(x = private$handle)) {
         private$colnames <- .Call(
           LGBM_DatasetGetFeatureNames_R
           , private$handle
@@ -449,7 +449,7 @@ Dataset <- R6::R6Class(
 
       # Write column names
       private$colnames <- colnames
-      if (!lgb.is.null.handle(x = private$handle)) {
+      if (!.is_null_handle(x = private$handle)) {
 
         # Merge names with tab separation
         merged_name <- paste0(as.list(private$colnames), collapse = "\t")
@@ -478,7 +478,7 @@ Dataset <- R6::R6Class(
       # Check for info name and handle
       if (is.null(private$info[[field_name]])) {
 
-        if (lgb.is.null.handle(x = private$handle)) {
+        if (.is_null_handle(x = private$handle)) {
           stop("Cannot perform Dataset$get_field() before constructing Dataset.")
         }
 
@@ -536,7 +536,7 @@ Dataset <- R6::R6Class(
       # Store information privately
       private$info[[field_name]] <- data
 
-      if (!lgb.is.null.handle(x = private$handle) && !is.null(data)) {
+      if (!.is_null_handle(x = private$handle) && !is.null(data)) {
 
         if (length(data) > 0L) {
 
@@ -583,14 +583,14 @@ Dataset <- R6::R6Class(
         return(invisible(self))
       }
       new_params <- utils::modifyList(private$params, params)
-      if (lgb.is.null.handle(x = private$handle)) {
+      if (.is_null_handle(x = private$handle)) {
         private$params <- new_params
       } else {
         tryCatch({
           .Call(
             LGBM_DatasetUpdateParamChecking_R
-            , lgb.params2str(params = private$params)
-            , lgb.params2str(params = new_params)
+            , .params2str(params = private$params)
+            , .params2str(params = new_params)
           )
           private$params <- new_params
         }, error = function(e) {
@@ -663,7 +663,7 @@ Dataset <- R6::R6Class(
           please set ", sQuote("free_raw_data = FALSE"), " when you construct lgb.Dataset")
       }
 
-      if (!lgb.is.Dataset(reference)) {
+      if (!.is_Dataset(reference)) {
         stop("set_reference: Can only use lgb.Dataset as a reference")
       }
 
@@ -711,7 +711,7 @@ Dataset <- R6::R6Class(
     get_handle = function() {
 
       # Get handle and construct if needed
-      if (lgb.is.null.handle(x = private$handle)) {
+      if (.is_null_handle(x = private$handle)) {
         self$construct()
       }
       return(private$handle)
@@ -734,7 +734,7 @@ Dataset <- R6::R6Class(
       if (!is.null(predictor)) {
 
         # Predictor is unknown
-        if (!lgb.is.Predictor(predictor)) {
+        if (!.is_Predictor(predictor)) {
           stop("set_predictor: Can only use lgb.Predictor as predictor")
         }
 
@@ -888,7 +888,7 @@ lgb.Dataset.create.valid <- function(dataset,
                                      init_score = NULL,
                                      params = list()) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("lgb.Dataset.create.valid: input data should be an lgb.Dataset object")
   }
 
@@ -922,7 +922,7 @@ lgb.Dataset.create.valid <- function(dataset,
 #' @export
 lgb.Dataset.construct <- function(dataset) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("lgb.Dataset.construct: input data should be an lgb.Dataset object")
   }
 
@@ -954,7 +954,7 @@ lgb.Dataset.construct <- function(dataset) {
 #' @export
 dim.lgb.Dataset <- function(x) {
 
-  if (!lgb.is.Dataset(x = x)) {
+  if (!.is_Dataset(x = x)) {
     stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
   }
 
@@ -989,7 +989,7 @@ dim.lgb.Dataset <- function(x) {
 #' @export
 dimnames.lgb.Dataset <- function(x) {
 
-  if (!lgb.is.Dataset(x = x)) {
+  if (!.is_Dataset(x = x)) {
     stop("dimnames.lgb.Dataset: input data should be an lgb.Dataset object")
   }
 
@@ -1062,7 +1062,7 @@ slice <- function(dataset, idxset) {
 #' @export
 slice.lgb.Dataset <- function(dataset, idxset) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("slice.lgb.Dataset: input dataset should be an lgb.Dataset object")
   }
 
@@ -1110,7 +1110,7 @@ get_field <- function(dataset, field_name) {
 get_field.lgb.Dataset <- function(dataset, field_name) {
 
   # Check if dataset is not a dataset
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("get_field.lgb.Dataset(): input dataset should be an lgb.Dataset object")
   }
 
@@ -1158,7 +1158,7 @@ set_field <- function(dataset, field_name, data) {
 #' @export
 set_field.lgb.Dataset <- function(dataset, field_name, data) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("set_field.lgb.Dataset: input dataset should be an lgb.Dataset object")
   }
 
@@ -1189,7 +1189,7 @@ set_field.lgb.Dataset <- function(dataset, field_name, data) {
 #' @export
 lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("lgb.Dataset.set.categorical: input dataset should be an lgb.Dataset object")
   }
 
@@ -1222,7 +1222,7 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #' @export
 lgb.Dataset.set.reference <- function(dataset, reference) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("lgb.Dataset.set.reference: input dataset should be an lgb.Dataset object")
   }
 
@@ -1248,7 +1248,7 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' @export
 lgb.Dataset.save <- function(dataset, fname) {
 
-  if (!lgb.is.Dataset(x = dataset)) {
+  if (!.is_Dataset(x = dataset)) {
     stop("lgb.Dataset.save: input dataset should be an lgb.Dataset object")
   }
 
diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R
index 0b7b39e2d8c2..3a411efd75ba 100644
--- a/R-package/R/lgb.Predictor.R
+++ b/R-package/R/lgb.Predictor.R
@@ -28,7 +28,7 @@ Predictor <- R6::R6Class(
 
     # Initialize will create a starter model
     initialize = function(modelfile, params = list(), fast_predict_config = list()) {
-      private$params <- lgb.params2str(params = params)
+      private$params <- .params2str(params = params)
       handle <- NULL
 
       if (is.character(modelfile)) {
@@ -46,7 +46,7 @@ Predictor <- R6::R6Class(
         handle <- modelfile
         private$need_free_handle <- FALSE
 
-      } else if (lgb.is.Booster(modelfile)) {
+      } else if (.is_Booster(modelfile)) {
 
         handle <- modelfile$get_handle()
         private$need_free_handle <- FALSE
@@ -512,7 +512,7 @@ Predictor <- R6::R6Class(
         return(FALSE)
       }
 
-      if (lgb.is.null.handle(private$fast_predict_config$handle)) {
+      if (.is_null_handle(private$fast_predict_config$handle)) {
         warning(paste0("Model had fast CSR predict configuration, but it is inactive."
                        , " Try re-generating it through 'lgb.configure_fast_predict'."))
         return(FALSE)
@@ -527,8 +527,8 @@ Predictor <- R6::R6Class(
         private$fast_predict_config$rawscore == rawscore &&
         private$fast_predict_config$predleaf == predleaf &&
         private$fast_predict_config$predcontrib == predcontrib &&
-        lgb.equal.or.both.null(private$fast_predict_config$start_iteration, start_iteration) &&
-        lgb.equal.or.both.null(private$fast_predict_config$num_iteration, num_iteration)
+        .equal_or_both_null(private$fast_predict_config$start_iteration, start_iteration) &&
+        .equal_or_both_null(private$fast_predict_config$num_iteration, num_iteration)
       )
     }
   )
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index f81026fe673f..11768c5bfa0b 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -99,7 +99,7 @@ lgb.cv <- function(params = list()
   }
 
   # If 'data' is not an lgb.Dataset, try to construct one using 'label'
-  if (!lgb.is.Dataset(x = data)) {
+  if (!.is_Dataset(x = data)) {
     if (is.null(label)) {
       stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
     }
@@ -110,27 +110,27 @@ lgb.cv <- function(params = list()
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
   # what was passed in
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "verbosity"
     , params = params
     , alternative_kwarg_value = verbose
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "num_iterations"
     , params = params
     , alternative_kwarg_value = nrounds
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "metric"
     , params = params
     , alternative_kwarg_value = NULL
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "objective"
     , params = params
     , alternative_kwarg_value = obj
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "early_stopping_round"
     , params = params
     , alternative_kwarg_value = early_stopping_rounds
@@ -148,7 +148,7 @@ lgb.cv <- function(params = list()
   # (for backwards compatibility). If it is a list of functions, store
   # all of them. This makes it possible to pass any mix of strings like "auc"
   # and custom functions to eval
-  params <- lgb.check.eval(params = params, eval = eval)
+  params <- .check_eval(params = params, eval = eval)
   eval_functions <- list(NULL)
   if (is.function(eval)) {
     eval_functions <- list(eval)
@@ -166,7 +166,7 @@ lgb.cv <- function(params = list()
   # Check for boosting from a trained model
   if (is.character(init_model)) {
     predictor <- Predictor$new(modelfile = init_model)
-  } else if (lgb.is.Booster(x = init_model)) {
+  } else if (.is_Booster(x = init_model)) {
     predictor <- init_model$to_predictor()
   }
 
@@ -193,7 +193,7 @@ lgb.cv <- function(params = list()
   } else if (!is.null(data$get_colnames())) {
     cnames <- data$get_colnames()
   }
-  params[["interaction_constraints"]] <- lgb.check_interaction_constraints(
+  params[["interaction_constraints"]] <- .check_interaction_constraints(
     interaction_constraints = interaction_constraints
     , column_names = cnames
   )
@@ -232,7 +232,7 @@ lgb.cv <- function(params = list()
     }
 
     # Create folds
-    folds <- generate.cv.folds(
+    folds <- .generate_cv_folds(
       nfold = nfold
       , nrows = nrow(data)
       , stratified = stratified
@@ -245,12 +245,12 @@ lgb.cv <- function(params = list()
 
   # Add printing log callback
   if (params[["verbosity"]] > 0L && eval_freq > 0L) {
-    callbacks <- add.cb(cb_list = callbacks, cb = cb_print_evaluation(period = eval_freq))
+    callbacks <- .add_cb(cb_list = callbacks, cb = cb_print_evaluation(period = eval_freq))
   }
 
   # Add evaluation log callback
   if (record) {
-    callbacks <- add.cb(cb_list = callbacks, cb = cb_record_evaluation())
+    callbacks <- .add_cb(cb_list = callbacks, cb = cb_record_evaluation())
   }
 
   # Did user pass parameters that indicate they want to use early stopping?
@@ -282,7 +282,7 @@ lgb.cv <- function(params = list()
 
   # If user supplied early_stopping_rounds, add the early stopping callback
   if (using_early_stopping) {
-    callbacks <- add.cb(
+    callbacks <- .add_cb(
       cb_list = callbacks
       , cb = cb_early_stop(
         stopping_rounds = early_stopping_rounds
@@ -292,7 +292,7 @@ lgb.cv <- function(params = list()
     )
   }
 
-  cb <- categorize.callbacks(cb_list = callbacks)
+  cb <- .categorize_callbacks(cb_list = callbacks)
 
   # Construct booster for each fold. The data.table() code below is used to
   # guarantee that indices are sorted while keeping init_score and weight together
@@ -387,7 +387,7 @@ lgb.cv <- function(params = list()
     })
 
     # Prepare collection of evaluation results
-    merged_msg <- lgb.merge.cv.result(
+    merged_msg <- .merge_cv_result(
       msg = msg
       , showsd = showsd
     )
@@ -463,7 +463,7 @@ lgb.cv <- function(params = list()
 }
 
 # Generates random (stratified if needed) CV folds
-generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
+.generate_cv_folds <- function(nfold, nrows, stratified, label, group, params) {
 
   # Check for group existence
   if (is.null(group)) {
@@ -476,7 +476,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
 
       y <- label[rnd_idx]
       y <- as.factor(y)
-      folds <- lgb.stratified.folds(y = y, k = nfold)
+      folds <- .stratified_folds(y = y, k = nfold)
 
     } else {
 
@@ -528,7 +528,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
 # It was borrowed from caret::createFolds and simplified
 # by always returning an unnamed list of fold indices.
 #' @importFrom stats quantile
-lgb.stratified.folds <- function(y, k) {
+.stratified_folds <- function(y, k) {
 
   # Group the numeric data based on their magnitudes
   # and sample within those groups.
@@ -594,7 +594,7 @@ lgb.stratified.folds <- function(y, k) {
   return(out)
 }
 
-lgb.merge.cv.result <- function(msg, showsd) {
+.merge_cv_result <- function(msg, showsd) {
 
   if (length(msg) == 0L) {
     stop("lgb.cv: size of cv result error")
diff --git a/R-package/R/lgb.drop_serialized.R b/R-package/R/lgb.drop_serialized.R
index bcc2480e8ccc..e53f2cafac11 100644
--- a/R-package/R/lgb.drop_serialized.R
+++ b/R-package/R/lgb.drop_serialized.R
@@ -13,7 +13,7 @@
 #' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}.
 #' @export
 lgb.drop_serialized <- function(model) {
-  if (!lgb.is.Booster(x = model)) {
+  if (!.is_Booster(x = model)) {
     stop("lgb.drop_serialized: model should be an ", sQuote("lgb.Booster"))
   }
   model$drop_raw()
diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R
index 5a58770553f9..27efb17392df 100644
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -39,7 +39,7 @@
 #' @export
 lgb.importance <- function(model, percentage = TRUE) {
 
-  if (!lgb.is.Booster(x = model)) {
+  if (!.is_Booster(x = model)) {
     stop("'model' has to be an object of class lgb.Booster")
   }
 
diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R
index 7de772664d8b..976315262792 100644
--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
@@ -86,7 +86,7 @@ lgb.interprete <- function(model,
   )
 
   for (i in seq_along(idxset)) {
-    tree_interpretation_dt_list[[i]] <- single.row.interprete(
+    tree_interpretation_dt_list[[i]] <- .single_row_interprete(
       tree_dt = tree_dt
       , num_class = num_class
       , tree_index_mat = tree_index_mat_list[[i]]
@@ -151,7 +151,7 @@ single.tree.interprete <- function(tree_dt,
 }
 
 #' @importFrom data.table := rbindlist setorder
-multiple.tree.interprete <- function(tree_dt,
+.multiple_tree_interprete <- function(tree_dt,
                                      tree_index,
                                      leaf_index) {
 
@@ -186,7 +186,7 @@ multiple.tree.interprete <- function(tree_dt,
 }
 
 #' @importFrom data.table set setnames
-single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index_mat) {
+.single_row_interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index_mat) {
 
   # Prepare vector list
   tree_interpretation <- vector(mode = "list", length = num_class)
@@ -194,7 +194,7 @@ single.row.interprete <- function(tree_dt, num_class, tree_index_mat, leaf_index
   # Loop throughout each class
   for (i in seq_len(num_class)) {
 
-    next_interp_dt <- multiple.tree.interprete(
+    next_interp_dt <- .multiple_tree_interprete(
       tree_dt = tree_dt
       , tree_index = tree_index_mat[, i]
       , leaf_index = leaf_index_mat[, i]
diff --git a/R-package/R/lgb.make_serializable.R b/R-package/R/lgb.make_serializable.R
index 58bdd194df4d..5a639aacb2b5 100644
--- a/R-package/R/lgb.make_serializable.R
+++ b/R-package/R/lgb.make_serializable.R
@@ -13,7 +13,7 @@
 #' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}.
 #' @export
 lgb.make_serializable <- function(model) {
-  if (!lgb.is.Booster(x = model)) {
+  if (!.is_Booster(x = model)) {
     stop("lgb.make_serializable: model should be an ", sQuote("lgb.Booster"))
   }
   model$save_raw()
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index 8b0d8d81e2e8..5d994accfa7f 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -62,7 +62,10 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
   )
 
   # Parse tree model
-  tree_list <- lapply(parsed_json_model$tree_info, single.tree.parse)
+  tree_list <- lapply(
+    X = parsed_json_model$tree_info
+    , FUN = .single_tree_parse
+  )
 
   # Combine into single data.table
   tree_dt <- data.table::rbindlist(l = tree_list, use.names = TRUE)
@@ -84,7 +87,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {
 
 
 #' @importFrom data.table := data.table rbindlist
-single.tree.parse <- function(lgb_tree) {
+.single_tree_parse <- function(lgb_tree) {
 
   # Traverse tree function
   pre_order_traversal <- function(env = NULL, tree_node_leaf, current_depth = 0L, parent_index = NA_integer_) {
diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R
index a88f14bf83f0..8b95371eb3c2 100644
--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -89,7 +89,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
   if (num_class == 1L) {
 
     # Only one class, plot straight away
-    multiple.tree.plot.interpretation(
+    .multiple_tree_plot_interpretation(
       tree_interpretation = tree_interpretation_dt
       , top_n = top_n
       , title = NULL
@@ -118,7 +118,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
         , old = names(plot_dt)
         , new = c("Feature", "Contribution")
       )
-      multiple.tree.plot.interpretation(
+      .multiple_tree_plot_interpretation(
         tree_interpretation = plot_dt
         , top_n = top_n
         , title = paste("Class", i - 1L)
@@ -131,7 +131,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
 }
 
 #' @importFrom graphics barplot
-multiple.tree.plot.interpretation <- function(tree_interpretation,
+.multiple_tree_plot_interpretation <- function(tree_interpretation,
                                               top_n,
                                               title,
                                               cex) {
diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R
index 4de93d46c96a..0ed25ef26f3d 100644
--- a/R-package/R/lgb.restore_handle.R
+++ b/R-package/R/lgb.restore_handle.R
@@ -35,7 +35,7 @@
 #' model_new$check_null_handle()
 #' @export
 lgb.restore_handle <- function(model) {
-  if (!lgb.is.Booster(x = model)) {
+  if (!.is_Booster(x = model)) {
     stop("lgb.restore_handle: model should be an ", sQuote("lgb.Booster"))
   }
   model$restore_handle()
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index 20916c9844b5..6979558d22cd 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -63,11 +63,11 @@ lgb.train <- function(params = list(),
   if (nrounds <= 0L) {
     stop("nrounds should be greater than zero")
   }
-  if (!lgb.is.Dataset(x = data)) {
+  if (!.is_Dataset(x = data)) {
     stop("lgb.train: data must be an lgb.Dataset instance")
   }
   if (length(valids) > 0L) {
-    if (!identical(class(valids), "list") || !all(vapply(valids, lgb.is.Dataset, logical(1L)))) {
+    if (!identical(class(valids), "list") || !all(vapply(valids, .is_Dataset, logical(1L)))) {
       stop("lgb.train: valids must be a list of lgb.Dataset elements")
     }
     evnames <- names(valids)
@@ -80,27 +80,27 @@ lgb.train <- function(params = list(),
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
   # what was passed in
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "verbosity"
     , params = params
     , alternative_kwarg_value = verbose
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "num_iterations"
     , params = params
     , alternative_kwarg_value = nrounds
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "metric"
     , params = params
     , alternative_kwarg_value = NULL
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "objective"
     , params = params
     , alternative_kwarg_value = obj
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "early_stopping_round"
     , params = params
     , alternative_kwarg_value = early_stopping_rounds
@@ -118,7 +118,7 @@ lgb.train <- function(params = list(),
   # (for backwards compatibility). If it is a list of functions, store
   # all of them. This makes it possible to pass any mix of strings like "auc"
   # and custom functions to eval
-  params <- lgb.check.eval(params = params, eval = eval)
+  params <- .check_eval(params = params, eval = eval)
   eval_functions <- list(NULL)
   if (is.function(eval)) {
     eval_functions <- list(eval)
@@ -136,7 +136,7 @@ lgb.train <- function(params = list(),
   # Check for boosting from a trained model
   if (is.character(init_model)) {
     predictor <- Predictor$new(modelfile = init_model)
-  } else if (lgb.is.Booster(x = init_model)) {
+  } else if (.is_Booster(x = init_model)) {
     predictor <- init_model$to_predictor()
   }
 
@@ -166,7 +166,7 @@ lgb.train <- function(params = list(),
   } else if (!is.null(data$get_colnames())) {
     cnames <- data$get_colnames()
   }
-  params[["interaction_constraints"]] <- lgb.check_interaction_constraints(
+  params[["interaction_constraints"]] <- .check_interaction_constraints(
     interaction_constraints = interaction_constraints
     , column_names = cnames
   )
@@ -212,12 +212,18 @@ lgb.train <- function(params = list(),
 
   # Add printing log callback
   if (params[["verbosity"]] > 0L && eval_freq > 0L) {
-    callbacks <- add.cb(cb_list = callbacks, cb = cb_print_evaluation(period = eval_freq))
+    callbacks <- .add_cb(
+        cb_list = callbacks
+        , cb = cb_print_evaluation(period = eval_freq)
+    )
   }
 
   # Add evaluation log callback
   if (record && length(valids) > 0L) {
-    callbacks <- add.cb(cb_list = callbacks, cb = cb_record_evaluation())
+    callbacks <- .add_cb(
+        cb_list = callbacks
+        , cb = cb_record_evaluation()
+    )
   }
 
   # Did user pass parameters that indicate they want to use early stopping?
@@ -249,7 +255,7 @@ lgb.train <- function(params = list(),
 
   # If user supplied early_stopping_rounds, add the early stopping callback
   if (using_early_stopping) {
-    callbacks <- add.cb(
+    callbacks <- .add_cb(
       cb_list = callbacks
       , cb = cb_early_stop(
         stopping_rounds = early_stopping_rounds
@@ -259,7 +265,7 @@ lgb.train <- function(params = list(),
     )
   }
 
-  cb <- categorize.callbacks(cb_list = callbacks)
+  cb <- .categorize_callbacks(cb_list = callbacks)
 
   # Construct booster with datasets
   booster <- Booster$new(params = params, train_set = data)
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index 711b3ef0dc38..e5df7a93fc97 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -184,21 +184,21 @@ lightgbm <- function(data,
   }
 
   if (is.null(num_threads)) {
-    num_threads <- lgb.get.default.num.threads()
+    num_threads <- .get_default_num_threads()
   }
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "num_threads"
     , params = params
     , alternative_kwarg_value = num_threads
   )
-  params <- lgb.check.wrapper_param(
+  params <- .check_wrapper_param(
     main_param_name = "verbosity"
     , params = params
     , alternative_kwarg_value = verbose
   )
 
   # Process factors as labels and auto-determine objective
-  if (!lgb.is.Dataset(data)) {
+  if (!.is_Dataset(data)) {
     data_processor <- DataProcessor$new()
     temp <- data_processor$process_label(
         label = label
@@ -220,7 +220,7 @@ lightgbm <- function(data,
   dtrain <- data
 
   # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
-  if (!lgb.is.Dataset(x = dtrain)) {
+  if (!.is_Dataset(x = dtrain)) {
     dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
   }
 
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index 5d3af097301f..d75056e69734 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -57,7 +57,7 @@ saveRDS.lgb.Booster <- function(object,
 
   warning("'saveRDS.lgb.Booster' is deprecated and will be removed in a future release. Use saveRDS() instead.")
 
-  if (!lgb.is.Booster(x = object)) {
+  if (!.is_Booster(x = object)) {
     stop("saveRDS.lgb.Booster: object should be an ", sQuote("lgb.Booster"))
   }
 
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index c9ba780316df..1ac6f197ca77 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -1,16 +1,16 @@
-lgb.is.Booster <- function(x) {
+.is_Booster <- function(x) {
   return(all(c("R6", "lgb.Booster") %in% class(x)))  # nolint: class_equals
 }
 
-lgb.is.Dataset <- function(x) {
+.is_Dataset <- function(x) {
   return(all(c("R6", "lgb.Dataset") %in% class(x)))  # nolint: class_equals
 }
 
-lgb.is.Predictor <- function(x) {
+.is_Predictor <- function(x) {
   return(all(c("R6", "lgb.Predictor") %in% class(x)))  # nolint: class_equals
 }
 
-lgb.is.null.handle <- function(x) {
+.is_null_handle <- function(x) {
   if (is.null(x)) {
     return(TRUE)
   }
@@ -19,7 +19,7 @@ lgb.is.null.handle <- function(x) {
   )
 }
 
-lgb.params2str <- function(params) {
+.params2str <- function(params) {
 
   if (!identical(class(params), "list")) {
     stop("params must be a list")
@@ -59,7 +59,7 @@ lgb.params2str <- function(params) {
 
 }
 
-lgb.check_interaction_constraints <- function(interaction_constraints, column_names) {
+.check_interaction_constraints <- function(interaction_constraints, column_names) {
 
   # Convert interaction constraints to feature numbers
   string_constraints <- list()
@@ -129,7 +129,7 @@ lgb.check_interaction_constraints <- function(interaction_constraints, column_na
 #     This has to account for the fact that `eval` could be a character vector,
 #     a function, a list of functions, or a list with a mix of strings and
 #     functions
-lgb.check.eval <- function(params, eval) {
+.check_eval <- function(params, eval) {
 
   if (is.null(params$metric)) {
     params$metric <- list()
@@ -194,7 +194,7 @@ lgb.check.eval <- function(params, eval) {
 # [return]
 #     params with num_iterations set to the chosen value, and other aliases
 #     of num_iterations removed
-lgb.check.wrapper_param <- function(main_param_name, params, alternative_kwarg_value) {
+.check_wrapper_param <- function(main_param_name, params, alternative_kwarg_value) {
 
   aliases <- .PARAMETER_ALIASES()[[main_param_name]]
   aliases_provided <- aliases[aliases %in% names(params)]
@@ -225,7 +225,7 @@ lgb.check.wrapper_param <- function(main_param_name, params, alternative_kwarg_v
 }
 
 #' @importFrom parallel detectCores
-lgb.get.default.num.threads <- function() {
+.get_default_num_threads <- function() {
   if (requireNamespace("RhpcBLASctl", quietly = TRUE)) {  # nolint: undesirable_function
     return(RhpcBLASctl::get_num_cores())
   } else {
@@ -247,7 +247,7 @@ lgb.get.default.num.threads <- function() {
   }
 }
 
-lgb.equal.or.both.null <- function(a, b) {
+.equal_or_both_null <- function(a, b) {
   if (is.null(a)) {
     if (!is.null(b)) {
       return(FALSE)
diff --git a/R-package/tests/testthat/test_Predictor.R b/R-package/tests/testthat/test_Predictor.R
index 90be1d08cf67..192171c915bf 100644
--- a/R-package/tests/testthat/test_Predictor.R
+++ b/R-package/tests/testthat/test_Predictor.R
@@ -17,16 +17,16 @@ test_that("Predictor$finalize() should not fail", {
     bst$save_model(filename = model_file)
     predictor <- Predictor$new(modelfile = model_file)
 
-    expect_true(lgb.is.Predictor(predictor))
+    expect_true(.is_Predictor(predictor))
 
-    expect_false(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
+    expect_false(.is_null_handle(predictor$.__enclos_env__$private$handle))
 
     predictor$finalize()
-    expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
+    expect_true(.is_null_handle(predictor$.__enclos_env__$private$handle))
 
     # calling finalize() a second time shouldn't cause any issues
     predictor$finalize()
-    expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
+    expect_true(.is_null_handle(predictor$.__enclos_env__$private$handle))
 })
 
 test_that("predictions do not fail for integer input", {
@@ -79,7 +79,7 @@ test_that("start_iteration works correctly", {
         , valids = list("test" = dtest)
         , early_stopping_rounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     pred1 <- predict(bst, newdata = test$data, type = "raw")
     pred_contrib1 <- predict(bst, test$data, type = "contrib")
     pred2 <- rep(0.0, length(pred1))
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 9b84017476a7..75abd26dd152 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -1094,7 +1094,7 @@ test_that("lgb.train() works as expected with sparse features", {
     , nrounds = nrounds
   )
 
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
   expect_equal(bst$current_iter(), nrounds)
   parsed_model <- jsonlite::fromJSON(bst$dump_model())
   expect_equal(parsed_model$objective, "binary sigmoid:1")
@@ -1816,7 +1816,7 @@ test_that("lgb.train() supports non-ASCII feature names", {
     )
     , colnames = feature_names
   )
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
   dumped_model <- jsonlite::fromJSON(bst$dump_model())
 
   # UTF-8 strings are not well-supported on Windows
@@ -2522,7 +2522,7 @@ test_that("lgb.train() fit on linearly-relatead data improves when using linear
     , params = params
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
 
   dtrain <- .new_dataset()
   bst_linear <- lgb.train(
@@ -2531,7 +2531,7 @@ test_that("lgb.train() fit on linearly-relatead data improves when using linear
     , params = utils::modifyList(params, list(linear_tree = TRUE))
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst_linear))
+  expect_true(.is_Booster(bst_linear))
 
   bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
   bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
@@ -2599,7 +2599,7 @@ test_that("lgb.train() works with linear learners even if Dataset has missing va
     , params = params
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
 
   dtrain <- .new_dataset()
   bst_linear <- lgb.train(
@@ -2608,7 +2608,7 @@ test_that("lgb.train() works with linear learners even if Dataset has missing va
     , params = utils::modifyList(params, list(linear_tree = TRUE))
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst_linear))
+  expect_true(.is_Booster(bst_linear))
 
   bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
   bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
@@ -2649,7 +2649,7 @@ test_that("lgb.train() works with linear learners, bagging, and a Dataset that h
     , params = params
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
 
   dtrain <- .new_dataset()
   bst_linear <- lgb.train(
@@ -2658,7 +2658,7 @@ test_that("lgb.train() works with linear learners, bagging, and a Dataset that h
     , params = utils::modifyList(params, list(linear_tree = TRUE))
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst_linear))
+  expect_true(.is_Booster(bst_linear))
 
   bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
   bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
@@ -2699,7 +2699,7 @@ test_that("lgb.train() works with linear learners and data where a feature has o
     , nrounds = 10L
     , params = utils::modifyList(params, list(linear_tree = TRUE))
   )
-  expect_true(lgb.is.Booster(bst_linear))
+  expect_true(.is_Booster(bst_linear))
 })
 
 test_that("lgb.train() works with linear learners when Dataset has categorical features", {
@@ -2732,7 +2732,7 @@ test_that("lgb.train() works with linear learners when Dataset has categorical f
     , params = params
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst))
+  expect_true(.is_Booster(bst))
 
   dtrain <- .new_dataset()
   bst_linear <- lgb.train(
@@ -2741,7 +2741,7 @@ test_that("lgb.train() works with linear learners when Dataset has categorical f
     , params = utils::modifyList(params, list(linear_tree = TRUE))
     , valids = list("train" = dtrain)
   )
-  expect_true(lgb.is.Booster(bst_linear))
+  expect_true(.is_Booster(bst_linear))
 
   bst_last_mse <- bst$record_evals[["train"]][["l2"]][["eval"]][[10L]]
   bst_lin_last_mse <- bst_linear$record_evals[["train"]][["l2"]][["eval"]][[10L]]
diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R
index 401d1babf5e9..a8585baa2621 100644
--- a/R-package/tests/testthat/test_dataset.R
+++ b/R-package/tests/testthat/test_dataset.R
@@ -206,7 +206,7 @@ test_that("lgb.Dataset: Dataset should be able to construct from matrix and retu
     , rawData
     , nrow(rawData)
     , ncol(rawData)
-    , lightgbm:::lgb.params2str(params = list())
+    , lightgbm:::.params2str(params = list())
     , ref_handle
   )
   expect_true(methods::is(handle, "externalptr"))
@@ -322,7 +322,7 @@ test_that("Dataset$update_parameters() does nothing for empty inputs", {
   res <- ds$update_params(
     params = list()
   )
-  expect_true(lgb.is.Dataset(res))
+  expect_true(.is_Dataset(res))
 
   new_params <- ds$get_params()
   expect_identical(new_params, initial_params)
@@ -343,7 +343,7 @@ test_that("Dataset$update_params() works correctly for recognized Dataset parame
   res <- ds$update_params(
     params = new_params
   )
-  expect_true(lgb.is.Dataset(res))
+  expect_true(.is_Dataset(res))
 
   updated_params <- ds$get_params()
   for (param_name in names(new_params)) {
@@ -356,17 +356,17 @@ test_that("Dataset$finalize() should not fail on an already-finalized Dataset",
     data = test_data
     , label = test_label
   )
-  expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
+  expect_true(.is_null_handle(dtest$.__enclos_env__$private$handle))
 
   dtest$construct()
-  expect_false(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
+  expect_false(.is_null_handle(dtest$.__enclos_env__$private$handle))
 
   dtest$finalize()
-  expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
+  expect_true(.is_null_handle(dtest$.__enclos_env__$private$handle))
 
   # calling finalize() a second time shouldn't cause any issues
   dtest$finalize()
-  expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
+  expect_true(.is_null_handle(dtest$.__enclos_env__$private$handle))
 })
 
 test_that("lgb.Dataset: should be able to run lgb.train() immediately after using lgb.Dataset() on a file", {
@@ -401,7 +401,7 @@ test_that("lgb.Dataset: should be able to run lgb.train() immediately after usin
     , data = dtest_read_in
   )
 
-  expect_true(lgb.is.Booster(x = bst))
+  expect_true(.is_Booster(x = bst))
 })
 
 test_that("lgb.Dataset: should be able to run lgb.cv() immediately after using lgb.Dataset() on a file", {
diff --git a/R-package/tests/testthat/test_learning_to_rank.R b/R-package/tests/testthat/test_learning_to_rank.R
index b4ebe7bd67c3..e99aff44ceb3 100644
--- a/R-package/tests/testthat/test_learning_to_rank.R
+++ b/R-package/tests/testthat/test_learning_to_rank.R
@@ -25,7 +25,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
         , data = dtrain
         , nrounds = 10L
     )
-    expect_true(lgb.is.Booster(model))
+    expect_true(.is_Booster(model))
 
     dumped_model <- jsonlite::fromJSON(
         model$dump_model()
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 7ebb236cd923..c1fc02630c13 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -11,16 +11,16 @@ test_that("Booster$finalize() should not fail", {
         , verbose = .LGB_VERBOSITY
         , nrounds = 3L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
-    expect_false(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
+    expect_false(.is_null_handle(bst$.__enclos_env__$private$handle))
 
     bst$finalize()
-    expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
+    expect_true(.is_null_handle(bst$.__enclos_env__$private$handle))
 
     # calling finalize() a second time shouldn't cause any issues
     bst$finalize()
-    expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
+    expect_true(.is_null_handle(bst$.__enclos_env__$private$handle))
 })
 
 test_that("lgb.get.eval.result() should throw an informative error if booster is not an lgb.Booster", {
@@ -188,7 +188,7 @@ test_that("Loading a Booster from a text file works", {
         , params = params
         , nrounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     pred <- predict(bst, test$data)
     model_file <- tempfile(fileext = ".model")
@@ -232,7 +232,7 @@ test_that("boosters with linear models at leaves can be written to text file and
         , params = params
         , verbose = .LGB_VERBOSITY
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     # save predictions, then write the model to a file and destroy it in R
     preds <- predict(bst, X)
@@ -269,7 +269,7 @@ test_that("Loading a Booster from a string works", {
         )
         , nrounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     pred <- predict(bst, test$data)
     model_string <- bst$save_model_to_string()
@@ -376,7 +376,7 @@ test_that("If a string and a file are both passed to lgb.load() the file is used
         )
         , nrounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     pred <- predict(bst, test$data)
     model_file <- tempfile(fileext = ".model")
@@ -411,7 +411,7 @@ test_that("Creating a Booster from a Dataset should work", {
         ),
         train_set = dtrain
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     expect_equal(bst$current_iter(), 0L)
     expect_true(is.na(bst$best_score))
     expect_true(all(bst$predict(agaricus.train$data) == 0.5))
@@ -446,10 +446,10 @@ test_that("Creating a Booster from a Dataset with an existing predictor should w
             , num_threads = .LGB_MAX_THREADS
         )
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     expect_equal(bst$current_iter(), nrounds)
     expect_equal(bst$eval_train()[[1L]][["value"]], 0.1115352)
-    expect_true(lgb.is.Booster(bst_from_ds))
+    expect_true(.is_Booster(bst_from_ds))
     expect_equal(bst_from_ds$current_iter(), nrounds)
     expect_equal(bst_from_ds$eval_train()[[1L]][["value"]], 5.65704892)
     dumped_model <- jsonlite::fromJSON(bst$dump_model())
@@ -531,7 +531,7 @@ test_that("Booster$rollback_one_iter() should work as expected", {
         , nrounds = nrounds
     )
     expect_equal(bst$current_iter(), nrounds)
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     logloss <- bst$eval_train()[[1L]][["value"]]
     expect_equal(logloss, 0.01904786)
 
@@ -539,7 +539,7 @@ test_that("Booster$rollback_one_iter() should work as expected", {
 
     # rollback_one_iter() should return a booster and modify the original
     # booster in place
-    expect_true(lgb.is.Booster(x))
+    expect_true(.is_Booster(x))
     expect_equal(bst$current_iter(), nrounds - 1L)
 
     # score should now come from the model as of 4 iterations
@@ -565,7 +565,7 @@ test_that("Booster$update() passing a train_set works as expected", {
         )
         , nrounds = nrounds
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     expect_equal(bst$current_iter(), nrounds)
     bst$update(
         train_set = Dataset$new(
@@ -574,7 +574,7 @@ test_that("Booster$update() passing a train_set works as expected", {
             , params = list(verbose = .LGB_VERBOSITY)
         )
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
     expect_equal(bst$current_iter(), nrounds + 1L)
 
     # train with 3 rounds directly
@@ -590,7 +590,7 @@ test_that("Booster$update() passing a train_set works as expected", {
         )
         , nrounds = nrounds +  1L
     )
-    expect_true(lgb.is.Booster(bst2))
+    expect_true(.is_Booster(bst2))
     expect_equal(bst2$current_iter(), nrounds +  1L)
 
     # model with 2 rounds + 1 update should be identical to 3 rounds
@@ -716,7 +716,7 @@ test_that("Saving a model with different feature importance types works", {
         )
         , nrounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     .feat_importance_from_string <- function(model_string) {
         file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
@@ -772,7 +772,7 @@ test_that("Saving a model with unknown importance type fails", {
         )
         , nrounds = 2L
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     UNSUPPORTED_IMPORTANCE <- 2L
     expect_error({
@@ -1372,7 +1372,7 @@ test_that("boosters with linear models at leaves work with saveRDS.lgb.Booster a
         , nrounds = 10L
         , params = params
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     # save predictions, then write the model to a file and destroy it in R
     preds <- predict(bst, X)
@@ -1412,7 +1412,7 @@ test_that("boosters with linear models at leaves can be written to RDS and re-lo
         , nrounds = 10L
         , params = params
     )
-    expect_true(lgb.is.Booster(bst))
+    expect_true(.is_Booster(bst))
 
     # save predictions, then write the model to a file and destroy it in R
     preds <- predict(bst, X)
diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R
index 4ab05e075ae3..898aed9b0915 100644
--- a/R-package/tests/testthat/test_utils.R
+++ b/R-package/tests/testthat/test_utils.R
@@ -1,12 +1,12 @@
-test_that("lgb.params2str() works as expected for empty lists", {
-    out_str <- lgb.params2str(
+test_that(".params2str() works as expected for empty lists", {
+    out_str <- .params2str(
         params = list()
     )
     expect_identical(class(out_str), "character")
     expect_equal(out_str, "")
 })
 
-test_that("lgb.params2str() works as expected for a key in params with multiple different-length elements", {
+test_that(".params2str() works as expected for a key in params with multiple different-length elements", {
     metrics <- c("a", "ab", "abc", "abcdefg")
     params <- list(
         objective = "magic"
@@ -14,7 +14,7 @@ test_that("lgb.params2str() works as expected for a key in params with multiple
         , nrounds = 10L
         , learning_rate = 0.0000001
     )
-    out_str <- lgb.params2str(
+    out_str <- .params2str(
         params = params
     )
     expect_identical(class(out_str), "character")
@@ -24,8 +24,8 @@ test_that("lgb.params2str() works as expected for a key in params with multiple
     )
 })
 
-test_that("lgb.params2str() passes through duplicated params", {
-    out_str <- lgb.params2str(
+test_that(".params2str() passes through duplicated params", {
+    out_str <- .params2str(
         params = list(
             objective = "regression"
             , bagging_fraction = 0.8
@@ -35,8 +35,8 @@ test_that("lgb.params2str() passes through duplicated params", {
     expect_equal(out_str, "objective=regression bagging_fraction=0.8 bagging_fraction=0.5")
 })
 
-test_that("lgb.check.eval works as expected with no metric", {
-    params <- lgb.check.eval(
+test_that(".check_eval works as expected with no metric", {
+    params <- .check_eval(
         params = list(device = "cpu")
         , eval = "binary_error"
     )
@@ -44,8 +44,8 @@ test_that("lgb.check.eval works as expected with no metric", {
     expect_identical(params[["metric"]], list("binary_error"))
 })
 
-test_that("lgb.check.eval adds eval to metric in params", {
-    params <- lgb.check.eval(
+test_that(".check_eval adds eval to metric in params", {
+    params <- .check_eval(
         params = list(metric = "auc")
         , eval = "binary_error"
     )
@@ -53,8 +53,8 @@ test_that("lgb.check.eval adds eval to metric in params", {
     expect_identical(params[["metric"]], list("auc", "binary_error"))
 })
 
-test_that("lgb.check.eval adds eval to metric in params if two evaluation names are provided", {
-    params <- lgb.check.eval(
+test_that(".check_eval adds eval to metric in params if two evaluation names are provided", {
+    params <- .check_eval(
         params = list(metric = "auc")
         , eval = c("binary_error", "binary_logloss")
     )
@@ -62,8 +62,8 @@ test_that("lgb.check.eval adds eval to metric in params if two evaluation names
     expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss"))
 })
 
-test_that("lgb.check.eval adds eval to metric in params if a list is provided", {
-    params <- lgb.check.eval(
+test_that(".check_eval adds eval to metric in params if a list is provided", {
+    params <- .check_eval(
         params = list(metric = "auc")
         , eval = list("binary_error", "binary_logloss")
     )
@@ -71,8 +71,8 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided",
     expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss"))
 })
 
-test_that("lgb.check.eval drops duplicate metrics and preserves order", {
-    params <- lgb.check.eval(
+test_that(".check_eval drops duplicate metrics and preserves order", {
+    params <- .check_eval(
         params = list(metric = "l1")
         , eval = list("l2", "rmse", "l1", "rmse")
     )
@@ -80,9 +80,9 @@ test_that("lgb.check.eval drops duplicate metrics and preserves order", {
     expect_identical(params[["metric"]], list("l1", "l2", "rmse"))
 })
 
-test_that("lgb.check.wrapper_param() uses passed-in keyword arg if no alias found in params", {
+test_that(".check_wrapper_param() uses passed-in keyword arg if no alias found in params", {
     kwarg_val <- sample(seq_len(100L), size = 1L)
-    params <- lgb.check.wrapper_param(
+    params <- .check_wrapper_param(
         main_param_name = "num_iterations"
         , params = list()
         , alternative_kwarg_value = kwarg_val
@@ -90,10 +90,10 @@ test_that("lgb.check.wrapper_param() uses passed-in keyword arg if no alias foun
     expect_equal(params[["num_iterations"]], kwarg_val)
 })
 
-test_that("lgb.check.wrapper_param() prefers main parameter to alias and keyword arg", {
+test_that(".check_wrapper_param() prefers main parameter to alias and keyword arg", {
     num_iterations <- sample(seq_len(100L), size = 1L)
     kwarg_val <- sample(seq_len(100L), size = 1L)
-    params <- lgb.check.wrapper_param(
+    params <- .check_wrapper_param(
         main_param_name = "num_iterations"
         , params = list(
             num_iterations = num_iterations
@@ -108,11 +108,11 @@ test_that("lgb.check.wrapper_param() prefers main parameter to alias and keyword
     expect_identical(params, list(num_iterations = num_iterations))
 })
 
-test_that("lgb.check.wrapper_param() prefers alias to keyword arg", {
+test_that(".check_wrapper_param() prefers alias to keyword arg", {
     n_estimators <- sample(seq_len(100L), size = 1L)
     num_tree <- sample(seq_len(100L), size = 1L)
     kwarg_val <- sample(seq_len(100L), size = 1L)
-    params <- lgb.check.wrapper_param(
+    params <- .check_wrapper_param(
         main_param_name = "num_iterations"
         , params = list(
             num_tree = num_tree
@@ -124,7 +124,7 @@ test_that("lgb.check.wrapper_param() prefers alias to keyword arg", {
     expect_identical(params, list(num_iterations = num_tree))
 
     # switching the order shouldn't switch which one is chosen
-    params2 <- lgb.check.wrapper_param(
+    params2 <- .check_wrapper_param(
         main_param_name = "num_iterations"
         , params = list(
             n_estimators = n_estimators
@@ -136,14 +136,14 @@ test_that("lgb.check.wrapper_param() prefers alias to keyword arg", {
     expect_identical(params2, list(num_iterations = num_tree))
 })
 
-test_that("lgb.equal.or.both.null produces expected results", {
-    expect_true(lgb.equal.or.both.null(NULL, NULL))
-    expect_false(lgb.equal.or.both.null(1.0, NULL))
-    expect_false(lgb.equal.or.both.null(NULL, 1.0))
-    expect_true(lgb.equal.or.both.null(1.0, 1.0))
-    expect_true(lgb.equal.or.both.null(1.0, 1L))
-    expect_false(lgb.equal.or.both.null(NA, NULL))
-    expect_false(lgb.equal.or.both.null(NULL, NA))
-    expect_false(lgb.equal.or.both.null(10.0, 1L))
-    expect_true(lgb.equal.or.both.null(0L, 0L))
+test_that(".equal_or_both_null produces expected results", {
+    expect_true(.equal_or_both_null(NULL, NULL))
+    expect_false(.equal_or_both_null(1.0, NULL))
+    expect_false(.equal_or_both_null(NULL, 1.0))
+    expect_true(.equal_or_both_null(1.0, 1.0))
+    expect_true(.equal_or_both_null(1.0, 1L))
+    expect_false(.equal_or_both_null(NA, NULL))
+    expect_false(.equal_or_both_null(NULL, NA))
+    expect_false(.equal_or_both_null(10.0, 1L))
+    expect_true(.equal_or_both_null(0L, 0L))
 })

From e63e54ace02afbc1c1f27505edf65c92733ac50b Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 13 Nov 2023 20:26:35 -0600
Subject: [PATCH 03/19] [docs] reduce redirects in docs links (#6181)

---
 docs/Experiments.rst             | 26 +++++++++++++-------------
 docs/Features.rst                |  2 +-
 docs/GPU-Performance.rst         |  6 +++---
 docs/Installation-Guide.rst      |  2 +-
 docs/Parallel-Learning-Guide.rst |  4 ++--
 docs/Parameters.rst              | 16 ++++++++--------
 docs/Quick-Start.rst             |  2 +-
 include/LightGBM/config.h        | 16 ++++++++--------
 8 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/docs/Experiments.rst b/docs/Experiments.rst
index c314321e7a3a..4440a2c0ccae 100644
--- a/docs/Experiments.rst
+++ b/docs/Experiments.rst
@@ -18,19 +18,19 @@ Data
 
 We used 5 datasets to conduct our comparison experiments. Details of data are listed in the following table:
 
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| Data      | Task                  | Link                                                                   | #Train\_Set | #Feature | Comments                                     |
-+===========+=======================+========================================================================+=============+==========+==============================================+
-| Higgs     | Binary classification | `link <https://archive.ics.uci.edu/ml/datasets/HIGGS>`__               | 10,500,000  | 28       | last 500,000 samples were used as test set   |
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| Yahoo LTR | Learning to rank      | `link <https://webscope.sandbox.yahoo.com/catalog.php?datatype=c>`__   | 473,134     | 700      | set1.train as train, set1.test as test       |
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| MS LTR    | Learning to rank      | `link <https://www.microsoft.com/en-us/research/project/mslr/>`__      | 2,270,296   | 137      | {S1,S2,S3} as train set, {S5} as test set    |
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| Expo      | Binary classification | `link <http://stat-computing.org/dataexpo/2009/>`__                    | 11,000,000  | 700      | last 1,000,000 samples were used as test set |
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| Allstate  | Binary classification | `link <https://www.kaggle.com/c/ClaimPredictionChallenge>`__           | 13,184,290  | 4228     | last 1,000,000 samples were used as test set |
-+-----------+-----------------------+------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
+| Data      | Task                  | Link                                                                            | #Train\_Set | #Feature | Comments                                     |
++===========+=======================+=================================================================================+=============+==========+==============================================+
+| Higgs     | Binary classification | `link <https://archive.ics.uci.edu/dataset/280/higgs>`__                        | 10,500,000  | 28       | last 500,000 samples were used as test set   |
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
+| Yahoo LTR | Learning to rank      | `link <https://webscope.sandbox.yahoo.com/catalog.php?datatype=c>`__            | 473,134     | 700      | set1.train as train, set1.test as test       |
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
+| MS LTR    | Learning to rank      | `link <https://www.microsoft.com/en-us/research/project/mslr/>`__               | 2,270,296   | 137      | {S1,S2,S3} as train set, {S5} as test set    |
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
+| Expo      | Binary classification | `link <https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2009>`__ | 11,000,000  | 700      | last 1,000,000 samples were used as test set |
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
+| Allstate  | Binary classification | `link <https://www.kaggle.com/c/ClaimPredictionChallenge>`__                    | 13,184,290  | 4228     | last 1,000,000 samples were used as test set |
++-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
 
 Environment
 ^^^^^^^^^^^
diff --git a/docs/Features.rst b/docs/Features.rst
index a7db86ec2935..89b56646588f 100644
--- a/docs/Features.rst
+++ b/docs/Features.rst
@@ -291,7 +291,7 @@ References
 
 .. _On Grouping for Maximum Homogeneity: https://www.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479
 
-.. _Optimization of collective communication operations in MPICH: https://www.mcs.anl.gov/~thakur/papers/ijhpca-coll.pdf
+.. _Optimization of collective communication operations in MPICH: https://web.cels.anl.gov/~thakur/papers/ijhpca-coll.pdf
 
 .. _A Communication-Efficient Parallel Algorithm for Decision Tree: http://papers.nips.cc/paper/6381-a-communication-efficient-parallel-algorithm-for-decision-tree
 
diff --git a/docs/GPU-Performance.rst b/docs/GPU-Performance.rst
index be1c1051bb28..64cd78eb4202 100644
--- a/docs/GPU-Performance.rst
+++ b/docs/GPU-Performance.rst
@@ -194,7 +194,7 @@ following article:
 
 Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boosting`_. SysML Conference, 2018.
 
-.. _link1: https://archive.ics.uci.edu/ml/datasets/HIGGS
+.. _link1: https://archive.ics.uci.edu/dataset/280/higgs
 
 .. _link2: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html
 
@@ -202,9 +202,9 @@ Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boos
 
 .. _link4: https://webscope.sandbox.yahoo.com/catalog.php?datatype=c
 
-.. _link5: http://research.microsoft.com/en-us/projects/mslr/
+.. _link5: https://www.microsoft.com/en-us/research/project/mslr/
 
-.. _link6: http://stat-computing.org/dataexpo/2009/
+.. _link6: https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2009
 
 .. _0bb4a82: https://github.com/microsoft/LightGBM/commit/0bb4a82
 
diff --git a/docs/Installation-Guide.rst b/docs/Installation-Guide.rst
index 1acfbcefa711..564fa7304902 100644
--- a/docs/Installation-Guide.rst
+++ b/docs/Installation-Guide.rst
@@ -950,7 +950,7 @@ gcc
 
 .. _RDMA: https://en.wikipedia.org/wiki/Remote_direct_memory_access
 
-.. _MS MPI: https://docs.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes
+.. _MS MPI: https://learn.microsoft.com/en-us/message-passing-interface/microsoft-mpi-release-notes
 
 .. _Open MPI: https://www.open-mpi.org/
 
diff --git a/docs/Parallel-Learning-Guide.rst b/docs/Parallel-Learning-Guide.rst
index a347be942570..cbc7b1012b98 100644
--- a/docs/Parallel-Learning-Guide.rst
+++ b/docs/Parallel-Learning-Guide.rst
@@ -518,7 +518,7 @@ See `the mars documentation`_ for usage examples.
 
 .. _the Dask DataFrame documentation: https://docs.dask.org/en/latest/dataframe.html
 
-.. _the Dask prediction example: https://github.com/microsoft/lightgbm/tree/master/examples/python-guide/dask/prediction.py
+.. _the Dask prediction example: https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/dask/prediction.py
 
 .. _the Dask worker documentation: https://distributed.dask.org/en/stable/worker-memory.html
 
@@ -536,7 +536,7 @@ See `the mars documentation`_ for usage examples.
 
 .. _lightgbm_ray: https://github.com/ray-project/lightgbm_ray
 
-.. _Ray: https://ray.io/
+.. _Ray: https://www.ray.io/
 
 .. _the lightgbm_ray documentation: https://docs.ray.io/en/latest/tune/api_docs/integration.html#lightgbm-tune-integration-lightgbm
 
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 86104ba5be55..329f9c38656e 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -119,7 +119,7 @@ Core Parameters
 
    -  ranking application
 
-      -  ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+      -  ``lambdarank``, `lambdarank <https://proceedings.neurips.cc/paper_files/paper/2006/file/af44c4c56f385c43f2529f9b1b018f6a-Paper.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
 
       -  ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
 
@@ -536,15 +536,15 @@ Learning Control Parameters
 
       -  ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions
 
-      -  ``intermediate``, a `more advanced method <https://hal.archives-ouvertes.fr/hal-02862802/document>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
+      -  ``intermediate``, a `more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
 
-      -  ``advanced``, an `even more advanced method <https://hal.archives-ouvertes.fr/hal-02862802/document>`__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results
+      -  ``advanced``, an `even more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results
 
 -  ``monotone_penalty`` :raw-html:`<a id="monotone_penalty" title="Permalink to this parameter" href="#monotone_penalty">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0``
 
    -  used only if ``monotone_constraints`` is set
 
-   -  `monotone penalty <https://hal.archives-ouvertes.fr/hal-02862802/document>`__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
+   -  `monotone penalty <https://hal.science/hal-02862802/document>`__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
 
    -  if ``0.0`` (the default), no penalization is applied
 
@@ -564,7 +564,7 @@ Learning Control Parameters
 
    -  **Note**: the forced split logic will be ignored, if the split makes gain worse
 
-   -  see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/binary_classification/forced_splits.json>`__ as an example
+   -  see `this file <https://github.com/microsoft/LightGBM/blob/master/examples/binary_classification/forced_splits.json>`__ as an example
 
 -  ``refit_decay_rate`` :raw-html:`<a id="refit_decay_rate" title="Permalink to this parameter" href="#refit_decay_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.9``, type = double, constraints: ``0.0 <= refit_decay_rate <= 1.0``
 
@@ -770,7 +770,7 @@ Dataset Parameters
 
 -  ``enable_bundle`` :raw-html:`<a id="enable_bundle" title="Permalink to this parameter" href="#enable_bundle">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool, aliases: ``is_enable_bundle``, ``bundle``
 
-   -  set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree>`__
+   -  set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree <https://papers.nips.cc/paper_files/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html>`__
 
    -  **Note**: disabling this may cause the slow training speed for sparse datasets
 
@@ -894,7 +894,7 @@ Dataset Parameters
 
    -  ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bound`` (array of thresholds for binning)
 
-   -  see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example
+   -  see `this file <https://github.com/microsoft/LightGBM/blob/master/examples/regression/forced_bins.json>`__ as an example
 
 -  ``save_binary`` :raw-html:`<a id="save_binary" title="Permalink to this parameter" href="#save_binary">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``is_save_binary``, ``is_save_binary_file``
 
@@ -961,7 +961,7 @@ Predict Parameters
 
    -  produces ``#features + 1`` values where the last value is the expected value of the model output over the training data
 
-   -  **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package <https://github.com/slundberg/shap>`__
+   -  **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package <https://github.com/shap>`__
 
    -  **Note**: unlike the shap package, with ``predict_contrib`` we return a matrix with an extra column, where the last column is the expected value
 
diff --git a/docs/Quick-Start.rst b/docs/Quick-Start.rst
index 04e64beb1281..30b0b3c228a2 100644
--- a/docs/Quick-Start.rst
+++ b/docs/Quick-Start.rst
@@ -85,4 +85,4 @@ Examples
 
 .. _LibSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
 
-.. _Expo data: http://stat-computing.org/dataexpo/2009/
+.. _Expo data: https://community.amstat.org/jointscsg-section/dataexpo/dataexpo2009
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 6d61bc764924..6500cb77272d 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -150,7 +150,7 @@ struct Config {
   // descl2 = ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda``
   // descl2 = label is anything in interval [0, 1]
   // desc = ranking application
-  // descl2 = ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+  // descl2 = ``lambdarank``, `lambdarank <https://proceedings.neurips.cc/paper_files/paper/2006/file/af44c4c56f385c43f2529f9b1b018f6a-Paper.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
   // descl2 = ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
   // descl2 = ``rank_xendcg`` is faster than and achieves the similar performance as ``lambdarank``
   // descl2 = label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect)
@@ -501,14 +501,14 @@ struct Config {
   // desc = used only if ``monotone_constraints`` is set
   // desc = monotone constraints method
   // descl2 = ``basic``, the most basic monotone constraints method. It does not slow the library at all, but over-constrains the predictions
-  // descl2 = ``intermediate``, a `more advanced method <https://hal.archives-ouvertes.fr/hal-02862802/document>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
-  // descl2 = ``advanced``, an `even more advanced method <https://hal.archives-ouvertes.fr/hal-02862802/document>`__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results
+  // descl2 = ``intermediate``, a `more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
+  // descl2 = ``advanced``, an `even more advanced method <https://hal.science/hal-02862802/document>`__, which may slow the library. However, this method is even less constraining than the intermediate method and should again significantly improve the results
   std::string monotone_constraints_method = "basic";
 
   // alias = monotone_splits_penalty, ms_penalty, mc_penalty
   // check = >=0.0
   // desc = used only if ``monotone_constraints`` is set
-  // desc = `monotone penalty <https://hal.archives-ouvertes.fr/hal-02862802/document>`__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
+  // desc = `monotone penalty <https://hal.science/hal-02862802/document>`__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
   // desc = if ``0.0`` (the default), no penalization is applied
   double monotone_penalty = 0.0;
 
@@ -524,7 +524,7 @@ struct Config {
   // desc = ``.json`` file can be arbitrarily nested, and each split contains ``feature``, ``threshold`` fields, as well as ``left`` and ``right`` fields representing subsplits
   // desc = categorical splits are forced in a one-hot fashion, with ``left`` representing the split containing the feature value and ``right`` representing other values
   // desc = **Note**: the forced split logic will be ignored, if the split makes gain worse
-  // desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/binary_classification/forced_splits.json>`__ as an example
+  // desc = see `this file <https://github.com/microsoft/LightGBM/blob/master/examples/binary_classification/forced_splits.json>`__ as an example
   std::string forcedsplits_filename = "";
 
   // check = >=0.0
@@ -683,7 +683,7 @@ struct Config {
   bool is_enable_sparse = true;
 
   // alias = is_enable_bundle, bundle
-  // desc = set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree>`__
+  // desc = set this to ``false`` to disable Exclusive Feature Bundling (EFB), which is described in `LightGBM: A Highly Efficient Gradient Boosting Decision Tree <https://papers.nips.cc/paper_files/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html>`__
   // desc = **Note**: disabling this may cause the slow training speed for sparse datasets
   bool enable_bundle = true;
 
@@ -770,7 +770,7 @@ struct Config {
 
   // desc = path to a ``.json`` file that specifies bin upper bounds for some or all features
   // desc = ``.json`` file should contain an array of objects, each containing the word ``feature`` (integer feature index) and ``bin_upper_bound`` (array of thresholds for binning)
-  // desc = see `this file <https://github.com/microsoft/LightGBM/tree/master/examples/regression/forced_bins.json>`__ as an example
+  // desc = see `this file <https://github.com/microsoft/LightGBM/blob/master/examples/regression/forced_bins.json>`__ as an example
   std::string forcedbins_filename = "";
 
   // [no-save]
@@ -826,7 +826,7 @@ struct Config {
   // desc = used only in ``prediction`` task
   // desc = set this to ``true`` to estimate `SHAP values <https://arxiv.org/abs/1706.06060>`__, which represent how each feature contributes to each prediction
   // desc = produces ``#features + 1`` values where the last value is the expected value of the model output over the training data
-  // desc = **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package <https://github.com/slundberg/shap>`__
+  // desc = **Note**: if you want to get more explanation for your model's predictions using SHAP values like SHAP interaction values, you can install `shap package <https://github.com/shap>`__
   // desc = **Note**: unlike the shap package, with ``predict_contrib`` we return a matrix with an extra column, where the last column is the expected value
   // desc = **Note**: this feature is not implemented for linear trees
   bool predict_contrib = false;

From 18dbd65e57995618ee2a8b1f7e4cb0df1f9c6333 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 15 Nov 2023 22:10:54 -0600
Subject: [PATCH 04/19] [python-package] consolidate pandas-to-numpy conversion
 code (#6156)

---
 python-package/lightgbm/basic.py | 53 ++++++++++++++++----------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 939842df3389..b085e6fe8d36 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -758,6 +758,23 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
                          f'Fields with bad pandas dtypes: {", ".join(bad_pandas_dtypes)}')
 
 
+def _pandas_to_numpy(
+    data: pd_DataFrame,
+    target_dtype: "np.typing.DTypeLike"
+) -> np.ndarray:
+    _check_for_bad_pandas_dtypes(data.dtypes)
+    try:
+        # most common case (no nullable dtypes)
+        return data.to_numpy(dtype=target_dtype, copy=False)
+    except TypeError:
+        # 1.0 <= pd version < 1.1 and nullable dtypes, least common case
+        # raises error because array is casted to type(pd.NA) and there's no na_value argument
+        return data.astype(target_dtype, copy=False).values
+    except ValueError:
+        # data has nullable dtypes, but we can specify na_value argument and copy will be made
+        return data.to_numpy(dtype=target_dtype, na_value=np.nan)
+
+
 def _data_from_pandas(
     data: pd_DataFrame,
     feature_name: _LGBM_FeatureNameConfiguration,
@@ -790,22 +807,17 @@ def _data_from_pandas(
     else:  # use cat cols specified by user
         categorical_feature = list(categorical_feature)  # type: ignore[assignment]
 
-    # get numpy representation of the data
-    _check_for_bad_pandas_dtypes(data.dtypes)
     df_dtypes = [dtype.type for dtype in data.dtypes]
-    df_dtypes.append(np.float32)  # so that the target dtype considers floats
+    # so that the target dtype considers floats
+    df_dtypes.append(np.float32)
     target_dtype = np.result_type(*df_dtypes)
-    try:
-        # most common case (no nullable dtypes)
-        data = data.to_numpy(dtype=target_dtype, copy=False)
-    except TypeError:
-        # 1.0 <= pd version < 1.1 and nullable dtypes, least common case
-        # raises error because array is casted to type(pd.NA) and there's no na_value argument
-        data = data.astype(target_dtype, copy=False).values
-    except ValueError:
-        # data has nullable dtypes, but we can specify na_value argument and copy will be made
-        data = data.to_numpy(dtype=target_dtype, na_value=np.nan)
-    return data, feature_name, categorical_feature, pandas_categorical
+
+    return (
+        _pandas_to_numpy(data, target_dtype=target_dtype),
+        feature_name,
+        categorical_feature,
+        pandas_categorical
+    )
 
 
 def _dump_pandas_categorical(
@@ -2805,18 +2817,7 @@ def set_label(self, label: Optional[_LGBM_LabelType]) -> "Dataset":
             if isinstance(label, pd_DataFrame):
                 if len(label.columns) > 1:
                     raise ValueError('DataFrame for label cannot have multiple columns')
-                _check_for_bad_pandas_dtypes(label.dtypes)
-                try:
-                    # most common case (no nullable dtypes)
-                    label = label.to_numpy(dtype=np.float32, copy=False)
-                except TypeError:
-                    # 1.0 <= pd version < 1.1 and nullable dtypes, least common case
-                    # raises error because array is casted to type(pd.NA) and there's no na_value argument
-                    label = label.astype(np.float32, copy=False).values
-                except ValueError:
-                    # data has nullable dtypes, but we can specify na_value argument and copy will be made
-                    label = label.to_numpy(dtype=np.float32, na_value=np.nan)
-                label_array = np.ravel(label)
+                label_array = np.ravel(_pandas_to_numpy(label, target_dtype=np.float32))
             elif _is_pyarrow_array(label):
                 label_array = label
             else:

From d9a4cf498c336d8ca7803f9da3320a36080398af Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 22 Nov 2023 12:36:21 -0600
Subject: [PATCH 05/19] [python-package] ignore mypy errors related to ctypes
 string buffers (#6198)

---
 python-package/lightgbm/basic.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index b085e6fe8d36..008ff1727d78 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2948,7 +2948,7 @@ def get_feature_name(self) -> List[str]:
         reserved_string_buffer_size = 255
         required_string_buffer_size = ctypes.c_size_t(0)
         string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)]
-        ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
+        ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
         _safe_call(_LIB.LGBM_DatasetGetFeatureNames(
             self._handle,
             ctypes.c_int(num_feature),
@@ -2962,7 +2962,7 @@ def get_feature_name(self) -> List[str]:
         # if buffer length is not long enough, reallocate buffers
         if reserved_string_buffer_size < actual_string_buffer_size:
             string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)]
-            ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
+            ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
             _safe_call(_LIB.LGBM_DatasetGetFeatureNames(
                 self._handle,
                 ctypes.c_int(num_feature),
@@ -4628,7 +4628,7 @@ def feature_name(self) -> List[str]:
         reserved_string_buffer_size = 255
         required_string_buffer_size = ctypes.c_size_t(0)
         string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)]
-        ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
+        ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
         _safe_call(_LIB.LGBM_BoosterGetFeatureNames(
             self._handle,
             ctypes.c_int(num_feature),
@@ -4642,7 +4642,7 @@ def feature_name(self) -> List[str]:
         # if buffer length is not long enough, reallocate buffers
         if reserved_string_buffer_size < actual_string_buffer_size:
             string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)]
-            ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
+            ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
             _safe_call(_LIB.LGBM_BoosterGetFeatureNames(
                 self._handle,
                 ctypes.c_int(num_feature),
@@ -4852,7 +4852,7 @@ def __get_eval_info(self) -> None:
                 string_buffers = [
                     ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(self.__num_inner_eval)
                 ]
-                ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
+                ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
                 _safe_call(_LIB.LGBM_BoosterGetEvalNames(
                     self._handle,
                     ctypes.c_int(self.__num_inner_eval),
@@ -4868,7 +4868,7 @@ def __get_eval_info(self) -> None:
                     string_buffers = [
                         ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(self.__num_inner_eval)
                     ]
-                    ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
+                    ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))  # type: ignore[misc]
                     _safe_call(_LIB.LGBM_BoosterGetEvalNames(
                         self._handle,
                         ctypes.c_int(self.__num_inner_eval),

From bc6942226e475be048dd0aac7f2cc6334f849aef Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 22 Nov 2023 14:15:11 -0600
Subject: [PATCH 06/19] [CUDA] fix typo in error message (#6207)

---
 src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
index fa782ebaad25..a1ea79efa1a1 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
@@ -155,7 +155,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
     #pragma warning(disable : 4702)
     explicit CUDASingleGPUTreeLearner(const Config* tree_config, const bool /*boosting_on_cuda*/) : SerialTreeLearner(tree_config) {
       Log::Fatal("CUDA Tree Learner was not enabled in this build.\n"
-                 "Please recompile with CMake option -DUSE_CUDAP=1");
+                 "Please recompile with CMake option -DUSE_CUDA=1");
     }
 };
 

From 516bde95015b05e57ff41b19d9bec19b0c48d7e6 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Wed, 22 Nov 2023 22:50:31 +0100
Subject: [PATCH 07/19] [python-package] Allow to pass Arrow array as groups
 (#6166)

---
 include/LightGBM/c_api.h                |  3 +-
 include/LightGBM/dataset.h              |  4 ++
 python-package/lightgbm/basic.py        | 15 +++--
 src/io/dataset.cpp                      |  2 +
 src/io/metadata.cpp                     | 28 ++++++---
 tests/python_package_test/test_arrow.py | 77 +++++++++++++++++--------
 6 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index fd337cbc7cbe..eafe6fab7825 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -558,9 +558,10 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetSetField(DatasetHandle handle,
 /*!
  * \brief Set vector to a content in info.
  * \note
+ * - \a group converts input datatype into ``int32``;
  * - \a label and \a weight convert input datatype into ``float32``.
  * \param handle Handle of dataset
- * \param field_name Field name, can be \a label, \a weight
+ * \param field_name Field name, can be \a label, \a weight, \a group
  * \param n_chunks The number of Arrow arrays passed to this function
  * \param chunks Pointer to the list of Arrow arrays
  * \param schema Pointer to the schema of all Arrow arrays
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index 48c1bee804d7..bf8264276a5f 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -116,6 +116,7 @@ class Metadata {
   void SetWeights(const ArrowChunkedArray& array);
 
   void SetQuery(const data_size_t* query, data_size_t len);
+  void SetQuery(const ArrowChunkedArray& array);
 
   void SetPosition(const data_size_t* position, data_size_t len);
 
@@ -348,6 +349,9 @@ class Metadata {
   void InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size);
   /*! \brief Insert queries at the given index */
   void InsertQueries(const data_size_t* queries, data_size_t start_index, data_size_t len);
+  /*! \brief Set queries from pointers to the first element and the end of an iterator. */
+  template <typename It>
+  void SetQueriesFromIterator(It first, It last);
   /*! \brief Filename of current data */
   std::string data_filename_;
   /*! \brief Number of data */
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 008ff1727d78..b55546941f77 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -70,7 +70,9 @@
     List[float],
     List[int],
     np.ndarray,
-    pd_Series
+    pd_Series,
+    pa_Array,
+    pa_ChunkedArray,
 ]
 _LGBM_PositionType = Union[
     np.ndarray,
@@ -1652,7 +1654,7 @@ def __init__(
             If this is Dataset for validation, training data should be used as reference.
         weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Weight for each instance. Weights should be non-negative.
-        group : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        group : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Group/query data.
             Only used in the learning-to-rank task.
             sum(group) = n_samples.
@@ -2432,7 +2434,7 @@ def create_valid(
             Label of the data.
         weight : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Weight for each instance. Weights should be non-negative.
-        group : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        group : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Group/query data.
             Only used in the learning-to-rank task.
             sum(group) = n_samples.
@@ -2889,7 +2891,7 @@ def set_group(
 
         Parameters
         ----------
-        group : list, numpy 1-D array, pandas Series or None
+        group : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None
             Group/query data.
             Only used in the learning-to-rank task.
             sum(group) = n_samples.
@@ -2903,7 +2905,8 @@ def set_group(
         """
         self.group = group
         if self._handle is not None and group is not None:
-            group = _list_to_1d_numpy(group, dtype=np.int32, name='group')
+            if not _is_pyarrow_array(group):
+                group = _list_to_1d_numpy(group, dtype=np.int32, name='group')
             self.set_field('group', group)
             # original values can be modified at cpp side
             constructed_group = self.get_field('group')
@@ -4431,7 +4434,7 @@ def refit(
 
             .. versionadded:: 4.0.0
 
-        group : list, numpy 1-D array, pandas Series or None, optional (default=None)
+        group : list, numpy 1-D array, pandas Series, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
             Group/query size for ``data``.
             Only used in the learning-to-rank task.
             sum(group) = n_samples.
diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index 01eb41b71367..78dd5e4319a5 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -904,6 +904,8 @@ bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray
     metadata_.SetLabel(ca);
   } else if (name == std::string("weight") || name == std::string("weights")) {
     metadata_.SetWeights(ca);
+  } else if (name == std::string("query") || name == std::string("group")) {
+    metadata_.SetQuery(ca);
   } else {
     return false;
   }
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index ed4fb135e62a..d94b0ed3f2f7 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -507,30 +507,34 @@ void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, da
   // CUDA is handled after all insertions are complete
 }
 
-void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
+template <typename It>
+void Metadata::SetQueriesFromIterator(It first, It last) {
   std::lock_guard<std::mutex> lock(mutex_);
-  // save to nullptr
-  if (query == nullptr || len == 0) {
+  // Clear query boundaries on empty input
+  if (last - first == 0) {
     query_boundaries_.clear();
     num_queries_ = 0;
     return;
   }
+
   data_size_t sum = 0;
   #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:sum)
-  for (data_size_t i = 0; i < len; ++i) {
-    sum += query[i];
+  for (data_size_t i = 0; i < last - first; ++i) {
+    sum += first[i];
   }
   if (num_data_ != sum) {
-    Log::Fatal("Sum of query counts is not same with #data");
+    Log::Fatal("Sum of query counts (%i) differs from the length of #data (%i)", num_data_, sum);
   }
-  num_queries_ = len;
+  num_queries_ = last - first;
+
   query_boundaries_.resize(num_queries_ + 1);
   query_boundaries_[0] = 0;
   for (data_size_t i = 0; i < num_queries_; ++i) {
-    query_boundaries_[i + 1] = query_boundaries_[i] + query[i];
+    query_boundaries_[i + 1] = query_boundaries_[i] + first[i];
   }
   CalculateQueryWeights();
   query_load_from_file_ = false;
+
   #ifdef USE_CUDA
   if (cuda_metadata_ != nullptr) {
     if (query_weights_.size() > 0) {
@@ -543,6 +547,14 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
   #endif  // USE_CUDA
 }
 
+void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
+  SetQueriesFromIterator(query, query + len);
+}
+
+void Metadata::SetQuery(const ArrowChunkedArray& array) {
+  SetQueriesFromIterator(array.begin<data_size_t>(), array.end<data_size_t>());
+}
+
 void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
   std::lock_guard<std::mutex> lock(mutex_);
   // save to nullptr
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index 40482a904a62..38b053e94fd5 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -1,7 +1,6 @@
 # coding: utf-8
 import filecmp
-from pathlib import Path
-from typing import Any, Callable, Dict
+from typing import Any, Dict
 
 import numpy as np
 import pyarrow as pa
@@ -15,6 +14,21 @@
 #                                            UTILITIES                                            #
 # ----------------------------------------------------------------------------------------------- #
 
+_INTEGER_TYPES = [
+    pa.int8(),
+    pa.int16(),
+    pa.int32(),
+    pa.int64(),
+    pa.uint8(),
+    pa.uint16(),
+    pa.uint32(),
+    pa.uint64(),
+]
+_FLOAT_TYPES = [
+    pa.float32(),
+    pa.float64(),
+]
+
 
 def generate_simple_arrow_table() -> pa.Table:
     columns = [
@@ -85,9 +99,7 @@ def dummy_dataset_params() -> Dict[str, Any]:
         (lambda: generate_random_arrow_table(100, 10000, 43), {}),
     ],
 )
-def test_dataset_construct_fuzzy(
-    tmp_path: Path, arrow_table_fn: Callable[[], pa.Table], dataset_params: Dict[str, Any]
-):
+def test_dataset_construct_fuzzy(tmp_path, arrow_table_fn, dataset_params):
     arrow_table = arrow_table_fn()
 
     arrow_dataset = lgb.Dataset(arrow_table, params=dataset_params)
@@ -108,17 +120,23 @@ def test_dataset_construct_fields_fuzzy():
     arrow_table = generate_random_arrow_table(3, 1000, 42)
     arrow_labels = generate_random_arrow_array(1000, 42)
     arrow_weights = generate_random_arrow_array(1000, 42)
+    arrow_groups = pa.chunked_array([[300, 400, 50], [250]], type=pa.int32())
 
-    arrow_dataset = lgb.Dataset(arrow_table, label=arrow_labels, weight=arrow_weights)
+    arrow_dataset = lgb.Dataset(
+        arrow_table, label=arrow_labels, weight=arrow_weights, group=arrow_groups
+    )
     arrow_dataset.construct()
 
     pandas_dataset = lgb.Dataset(
-        arrow_table.to_pandas(), label=arrow_labels.to_numpy(), weight=arrow_weights.to_numpy()
+        arrow_table.to_pandas(),
+        label=arrow_labels.to_numpy(),
+        weight=arrow_weights.to_numpy(),
+        group=arrow_groups.to_numpy(),
     )
     pandas_dataset.construct()
 
     # Check for equality
-    for field in ("label", "weight"):
+    for field in ("label", "weight", "group"):
         np_assert_array_equal(
             arrow_dataset.get_field(field), pandas_dataset.get_field(field), strict=True
         )
@@ -133,22 +151,8 @@ def test_dataset_construct_fields_fuzzy():
     ["array_type", "label_data"],
     [(pa.array, [0, 1, 0, 0, 1]), (pa.chunked_array, [[0], [1, 0, 0, 1]])],
 )
-@pytest.mark.parametrize(
-    "arrow_type",
-    [
-        pa.int8(),
-        pa.int16(),
-        pa.int32(),
-        pa.int64(),
-        pa.uint8(),
-        pa.uint16(),
-        pa.uint32(),
-        pa.uint64(),
-        pa.float32(),
-        pa.float64(),
-    ],
-)
-def test_dataset_construct_labels(array_type: Any, label_data: Any, arrow_type: Any):
+@pytest.mark.parametrize("arrow_type", _INTEGER_TYPES + _FLOAT_TYPES)
+def test_dataset_construct_labels(array_type, label_data, arrow_type):
     data = generate_dummy_arrow_table()
     labels = array_type(label_data, type=arrow_type)
     dataset = lgb.Dataset(data, label=labels, params=dummy_dataset_params())
@@ -175,7 +179,7 @@ def test_dataset_construct_weights_none():
     [(pa.array, [3, 0.7, 1.5, 0.5, 0.1]), (pa.chunked_array, [[3], [0.7, 1.5, 0.5, 0.1]])],
 )
 @pytest.mark.parametrize("arrow_type", [pa.float32(), pa.float64()])
-def test_dataset_construct_weights(array_type: Any, weight_data: Any, arrow_type: Any):
+def test_dataset_construct_weights(array_type, weight_data, arrow_type):
     data = generate_dummy_arrow_table()
     weights = array_type(weight_data, type=arrow_type)
     dataset = lgb.Dataset(data, weight=weights, params=dummy_dataset_params())
@@ -183,3 +187,26 @@ def test_dataset_construct_weights(array_type: Any, weight_data: Any, arrow_type
 
     expected = np.array([3, 0.7, 1.5, 0.5, 0.1], dtype=np.float32)
     np_assert_array_equal(expected, dataset.get_weight(), strict=True)
+
+
+# -------------------------------------------- GROUPS ------------------------------------------- #
+
+
+@pytest.mark.parametrize(
+    ["array_type", "group_data"],
+    [
+        (pa.array, [2, 3]),
+        (pa.chunked_array, [[2], [3]]),
+        (pa.chunked_array, [[], [2, 3]]),
+        (pa.chunked_array, [[2], [], [3], []]),
+    ],
+)
+@pytest.mark.parametrize("arrow_type", _INTEGER_TYPES)
+def test_dataset_construct_groups(array_type, group_data, arrow_type):
+    data = generate_dummy_arrow_table()
+    groups = array_type(group_data, type=arrow_type)
+    dataset = lgb.Dataset(data, group=groups, params=dummy_dataset_params())
+    dataset.construct()
+
+    expected = np.array([0, 2, 5], dtype=np.int32)
+    np_assert_array_equal(expected, dataset.get_field("group"), strict=True)

From cd36ffeaedd948330b5512aa7e3b58e9e6a9220c Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Sat, 25 Nov 2023 05:48:52 +0100
Subject: [PATCH 08/19] [R-package] Fix inefficiency in retrieving pointers
 (#6208)

---
 R-package/src/lightgbm_R.cpp | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 21ba801a3a60..270f2a2d54d5 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -226,9 +226,10 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
   int32_t len = static_cast<int32_t>(Rf_asInteger(len_used_row_indices));
   std::vector<int32_t> idxvec(len);
   // convert from one-based to zero-based index
+  const int *used_row_indices_ = INTEGER(used_row_indices);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (len >= 1024)
   for (int32_t i = 0; i < len; ++i) {
-    idxvec[i] = static_cast<int32_t>(INTEGER(used_row_indices)[i] - 1);
+    idxvec[i] = static_cast<int32_t>(used_row_indices_[i] - 1);
   }
   const char* parameters_ptr = CHAR(PROTECT(Rf_asChar(parameters)));
   DatasetHandle res = nullptr;
@@ -339,18 +340,20 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
   const char* name = CHAR(PROTECT(Rf_asChar(field_name)));
   if (!strcmp("group", name) || !strcmp("query", name)) {
     std::vector<int32_t> vec(len);
+    const int *field_data_ = INTEGER(field_data);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (len >= 1024)
     for (int i = 0; i < len; ++i) {
-      vec[i] = static_cast<int32_t>(INTEGER(field_data)[i]);
+      vec[i] = static_cast<int32_t>(field_data_[i]);
     }
     CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.data(), len, C_API_DTYPE_INT32));
   } else if (!strcmp("init_score", name)) {
     CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, REAL(field_data), len, C_API_DTYPE_FLOAT64));
   } else {
     std::vector<float> vec(len);
+    const double *field_data_ = REAL(field_data);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (len >= 1024)
     for (int i = 0; i < len; ++i) {
-      vec[i] = static_cast<float>(REAL(field_data)[i]);
+      vec[i] = static_cast<float>(field_data_[i]);
     }
     CHECK_CALL(LGBM_DatasetSetField(R_ExternalPtrAddr(handle), name, vec.data(), len, C_API_DTYPE_FLOAT32));
   }
@@ -372,21 +375,24 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
   if (!strcmp("group", name) || !strcmp("query", name)) {
     auto p_data = reinterpret_cast<const int32_t*>(res);
     // convert from boundaries to size
+    int *field_data_ = INTEGER(field_data);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (out_len >= 1024)
     for (int i = 0; i < out_len - 1; ++i) {
-      INTEGER(field_data)[i] = p_data[i + 1] - p_data[i];
+      field_data_[i] = p_data[i + 1] - p_data[i];
     }
   } else if (!strcmp("init_score", name)) {
     auto p_data = reinterpret_cast<const double*>(res);
+    double *field_data_ = REAL(field_data);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (out_len >= 1024)
     for (int i = 0; i < out_len; ++i) {
-      REAL(field_data)[i] = p_data[i];
+      field_data_[i] = p_data[i];
     }
   } else {
     auto p_data = reinterpret_cast<const float*>(res);
+    double *field_data_ = REAL(field_data);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (out_len >= 1024)
     for (int i = 0; i < out_len; ++i) {
-      REAL(field_data)[i] = p_data[i];
+      field_data_[i] = p_data[i];
     }
   }
   UNPROTECT(1);
@@ -611,10 +617,12 @@ SEXP LGBM_BoosterUpdateOneIterCustom_R(SEXP handle,
   int is_finished = 0;
   int int_len = Rf_asInteger(len);
   std::vector<float> tgrad(int_len), thess(int_len);
+  const double *grad_ = REAL(grad);
+  const double *hess_ = REAL(hess);
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (int_len >= 1024)
   for (int j = 0; j < int_len; ++j) {
-    tgrad[j] = static_cast<float>(REAL(grad)[j]);
-    thess[j] = static_cast<float>(REAL(hess)[j]);
+    tgrad[j] = static_cast<float>(grad_[j]);
+    thess[j] = static_cast<float>(hess_[j]);
   }
   CHECK_CALL(LGBM_BoosterUpdateOneIterCustom(R_ExternalPtrAddr(handle), tgrad.data(), thess.data(), &is_finished));
   return R_NilValue;

From 2ee3ec84b70df1a9e249d3b3bff9458fe3726cd4 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Sat, 25 Nov 2023 13:24:44 +0800
Subject: [PATCH 09/19] [python-package] fix libpath.py (#6192)

---
 python-package/lightgbm/libpath.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python-package/lightgbm/libpath.py b/python-package/lightgbm/libpath.py
index c096a6f1b5e2..21222228b0c2 100644
--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
@@ -16,8 +16,7 @@ def find_lib_path() -> List[str]:
        List of all found library paths to LightGBM.
     """
     curr_path = Path(__file__).absolute()
-    dll_path = [curr_path,
-                curr_path.parents[1],
+    dll_path = [curr_path.parents[1],
                 curr_path.parents[0] / 'bin',
                 curr_path.parents[0] / 'lib']
     if system() in ('Windows', 'Microsoft'):

From 848e76c3d690b3806a1e44809434d34aebad734a Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 29 Nov 2023 22:33:46 -0600
Subject: [PATCH 10/19] [R-package] use safer pattern for error formatting
 (fixes #6212) (#6216)

---
 R-package/src/lightgbm_R.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 270f2a2d54d5..3ae7a98d8537 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -40,7 +40,7 @@ void LGBM_R_save_exception_msg(const std::string &err);
   catch(std::exception& ex) { LGBM_R_save_exception_msg(ex); } \
   catch(std::string& ex) { LGBM_R_save_exception_msg(ex); } \
   catch(...) { Rf_error("unknown exception"); } \
-  Rf_error(R_errmsg_buffer); \
+  Rf_error("%s", R_errmsg_buffer); \
   return R_NilValue; /* <- won't be reached */
 
 #define CHECK_CALL(x) \

From 5083df15c6866a78704787ca942931feaa096a76 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 30 Nov 2023 19:07:33 -0600
Subject: [PATCH 11/19] [docs] remove links to Laurae++ site (#6193)

---
 README.md           | 1 -
 docs/FAQ.rst        | 2 +-
 docs/Parameters.rst | 6 ------
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/README.md b/README.md
index f6f4e8c570e0..f3f63404b399 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,6 @@ Next you may want to read:
 - [**Features**](https://github.com/microsoft/LightGBM/blob/master/docs/Features.rst) and algorithms supported by LightGBM.
 - [**Parameters**](https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst) is an exhaustive list of customization you can make.
 - [**Distributed Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst) and [**GPU Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/GPU-Tutorial.rst) can speed up computation.
-- [**Laurae++ interactive documentation**](https://sites.google.com/view/lauraepp/parameters) is a detailed guide for hyperparameters.
 - [**FLAML**](https://www.microsoft.com/en-us/research/project/fast-and-lightweight-automl-for-large-scale-data/articles/flaml-a-fast-and-lightweight-automl-library/) provides automated tuning for LightGBM ([code examples](https://microsoft.github.io/FLAML/docs/Examples/AutoML-for-LightGBM/)).
 - [**Optuna Hyperparameter Tuner**](https://medium.com/optuna/lightgbm-tuner-new-optuna-integration-for-hyperparameter-optimization-8b7095e99258) provides automated tuning for LightGBM hyperparameters ([code examples](https://github.com/optuna/optuna-examples/blob/main/lightgbm/lightgbm_tuner_simple.py)).
 - [**Understanding LightGBM Parameters (and How to Tune Them using Neptune)**](https://neptune.ai/blog/lightgbm-parameters-guide).
diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index 2e0002cb6bc1..31b35e4867d4 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -62,7 +62,7 @@ General LightGBM Questions
 1. Where do I find more details about LightGBM parameters?
 ----------------------------------------------------------
 
-Take a look at `Parameters <./Parameters.rst>`__ and the `Laurae++/Parameters <https://sites.google.com/view/lauraepp/parameters>`__ website.
+Take a look at `Parameters <./Parameters.rst>`__.
 
 2. On datasets with millions of features, training does not start (or starts after a very long time).
 -----------------------------------------------------------------------------------------------------
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 329f9c38656e..341cdd487c71 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -14,10 +14,6 @@ This page contains descriptions of all parameters in LightGBM.
 
 - `Parameters Tuning <./Parameters-Tuning.rst>`__
 
-**External Links**
-
-- `Laurae++ Interactive Documentation`_
-
 Parameters Format
 -----------------
 
@@ -1380,5 +1376,3 @@ If the name of data file is ``train.txt``, the query file should be named as ``t
 In this case, LightGBM will load the query file automatically if it exists.
 
 Also, you can include query/group id column in your data file. Please refer to the ``group_column`` `parameter <#group_column>`__ in above.
-
-.. _Laurae++ Interactive Documentation: https://sites.google.com/view/lauraepp/parameters

From f5b6bd60d9d752c8e5a75b11ab771d0422214bb4 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Mon, 4 Dec 2023 19:26:55 +0000
Subject: [PATCH 12/19] [python-package] Allow to pass Arrow table and array as
 init scores (#6167)

---
 include/LightGBM/c_api.h                |  5 +--
 include/LightGBM/dataset.h              |  4 +++
 python-package/lightgbm/basic.py        | 28 ++++++++++-----
 python-package/lightgbm/compat.py       |  2 ++
 src/io/dataset.cpp                      |  2 ++
 src/io/metadata.cpp                     | 28 ++++++++++-----
 tests/python_package_test/test_arrow.py | 45 ++++++++++++++++++++++++-
 7 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index eafe6fab7825..ada2e4109638 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -559,9 +559,10 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetSetField(DatasetHandle handle,
  * \brief Set vector to a content in info.
  * \note
  * - \a group converts input datatype into ``int32``;
- * - \a label and \a weight convert input datatype into ``float32``.
+ * - \a label and \a weight convert input datatype into ``float32``;
+ * - \a init_score converts input datatype into ``float64``.
  * \param handle Handle of dataset
- * \param field_name Field name, can be \a label, \a weight, \a group
+ * \param field_name Field name, can be \a label, \a weight, \a init_score, \a group
  * \param n_chunks The number of Arrow arrays passed to this function
  * \param chunks Pointer to the list of Arrow arrays
  * \param schema Pointer to the schema of all Arrow arrays
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index bf8264276a5f..220a1f9f009c 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -125,6 +125,7 @@ class Metadata {
   * \param init_score Initial scores, this class will manage memory for init_score.
   */
   void SetInitScore(const double* init_score, data_size_t len);
+  void SetInitScore(const ArrowChunkedArray& array);
 
 
   /*!
@@ -347,6 +348,9 @@ class Metadata {
   void SetWeightsFromIterator(It first, It last);
   /*! \brief Insert initial scores at the given index */
   void InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size);
+  /*! \brief Set init scores from pointers to the first element and the end of an iterator. */
+  template <typename It>
+  void SetInitScoresFromIterator(It first, It last);
   /*! \brief Insert queries at the given index */
   void InsertQueries(const data_size_t* queries, data_size_t start_index, data_size_t len);
   /*! \brief Set queries from pointers to the first element and the end of an iterator. */
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index b55546941f77..31ae5182ee9e 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -19,8 +19,8 @@
 import scipy.sparse
 
 from .compat import (PANDAS_INSTALLED, PYARROW_INSTALLED, arrow_cffi, arrow_is_floating, arrow_is_integer, concat,
-                     dt_DataTable, pa_Array, pa_ChunkedArray, pa_compute, pa_Table, pd_CategoricalDtype, pd_DataFrame,
-                     pd_Series)
+                     dt_DataTable, pa_Array, pa_chunked_array, pa_ChunkedArray, pa_compute, pa_Table,
+                     pd_CategoricalDtype, pd_DataFrame, pd_Series)
 from .libpath import find_lib_path
 
 if TYPE_CHECKING:
@@ -84,6 +84,9 @@
     np.ndarray,
     pd_Series,
     pd_DataFrame,
+    pa_Table,
+    pa_Array,
+    pa_ChunkedArray,
 ]
 _LGBM_TrainDataType = Union[
     str,
@@ -1660,7 +1663,7 @@ def __init__(
             sum(group) = n_samples.
             For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
             where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
-        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), or None, optional (default=None)
+        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), pyarrow Array, pyarrow ChunkedArray, pyarrow Table (for multi-class task) or None, optional (default=None)
             Init score for Dataset.
         feature_name : list of str, or 'auto', optional (default="auto")
             Feature names.
@@ -2440,7 +2443,7 @@ def create_valid(
             sum(group) = n_samples.
             For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
             where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
-        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), or None, optional (default=None)
+        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), pyarrow Array, pyarrow ChunkedArray, pyarrow Table (for multi-class task) or None, optional (default=None)
             Init score for Dataset.
         params : dict or None, optional (default=None)
             Other parameters for validation Dataset.
@@ -2547,7 +2550,7 @@ def _reverse_update_params(self) -> "Dataset":
     def set_field(
         self,
         field_name: str,
-        data: Optional[Union[List[List[float]], List[List[int]], List[float], List[int], np.ndarray, pd_Series, pd_DataFrame, pa_Array, pa_ChunkedArray]]
+        data: Optional[Union[List[List[float]], List[List[int]], List[float], List[int], np.ndarray, pd_Series, pd_DataFrame, pa_Table, pa_Array, pa_ChunkedArray]]
     ) -> "Dataset":
         """Set property into the Dataset.
 
@@ -2576,7 +2579,16 @@ def set_field(
             return self
 
         # If the data is a arrow data, we can just pass it to C
-        if _is_pyarrow_array(data):
+        if _is_pyarrow_array(data) or _is_pyarrow_table(data):
+            # If a table is being passed, we concatenate the columns. This is only valid for
+            # 'init_score'.
+            if _is_pyarrow_table(data):
+                if field_name != "init_score":
+                    raise ValueError(f"pyarrow tables are not supported for field '{field_name}'")
+                data = pa_chunked_array([
+                    chunk for array in data.columns for chunk in array.chunks  # type: ignore
+                ])
+
             c_array = _export_arrow_to_c(data)
             _safe_call(_LIB.LGBM_DatasetSetFieldFromArrow(
                 self._handle,
@@ -2869,7 +2881,7 @@ def set_init_score(
 
         Parameters
         ----------
-        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), or None
+        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), pyarrow Array, pyarrow ChunkedArray, pyarrow Table (for multi-class task) or None
             Init score for Booster.
 
         Returns
@@ -4443,7 +4455,7 @@ def refit(
 
             .. versionadded:: 4.0.0
 
-        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), or None, optional (default=None)
+        init_score : list, list of lists (for multi-class task), numpy array, pandas Series, pandas DataFrame (for multi-class task), pyarrow Array, pyarrow ChunkedArray, pyarrow Table (for multi-class task) or None, optional (default=None)
             Init score for ``data``.
 
             .. versionadded:: 4.0.0
diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py
index dc48dbf792cf..bd1b29a1e802 100644
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -201,6 +201,7 @@ def __init__(self, *args, **kwargs):
     from pyarrow import Array as pa_Array
     from pyarrow import ChunkedArray as pa_ChunkedArray
     from pyarrow import Table as pa_Table
+    from pyarrow import chunked_array as pa_chunked_array
     from pyarrow.cffi import ffi as arrow_cffi
     from pyarrow.types import is_floating as arrow_is_floating
     from pyarrow.types import is_integer as arrow_is_integer
@@ -243,6 +244,7 @@ class pa_compute:  # type: ignore
         all = None
         equal = None
 
+    pa_chunked_array = None
     arrow_is_integer = None
     arrow_is_floating = None
 
diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index 78dd5e4319a5..058d7bd328ad 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -904,6 +904,8 @@ bool Dataset::SetFieldFromArrow(const char* field_name, const ArrowChunkedArray
     metadata_.SetLabel(ca);
   } else if (name == std::string("weight") || name == std::string("weights")) {
     metadata_.SetWeights(ca);
+  } else if (name == std::string("init_score")) {
+    metadata_.SetInitScore(ca);
   } else if (name == std::string("query") || name == std::string("group")) {
     metadata_.SetQuery(ca);
   } else {
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index d94b0ed3f2f7..55440649f55e 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -355,32 +355,44 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
   }
 }
 
-void Metadata::SetInitScore(const double* init_score, data_size_t len) {
+template <typename It>
+void Metadata::SetInitScoresFromIterator(It first, It last) {
   std::lock_guard<std::mutex> lock(mutex_);
-  // save to nullptr
-  if (init_score == nullptr || len == 0) {
+  // Clear init scores on empty input
+  if (last - first == 0) {
     init_score_.clear();
     num_init_score_ = 0;
     return;
   }
-  if ((len % num_data_) != 0) {
+  if (((last - first) % num_data_) != 0) {
     Log::Fatal("Initial score size doesn't match data size");
   }
-  if (init_score_.empty()) { init_score_.resize(len); }
-  num_init_score_ = len;
+  if (init_score_.empty()) {
+    init_score_.resize(last - first);
+  }
+  num_init_score_ = last - first;
 
   #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (num_init_score_ >= 1024)
   for (int64_t i = 0; i < num_init_score_; ++i) {
-    init_score_[i] = Common::AvoidInf(init_score[i]);
+    init_score_[i] = Common::AvoidInf(first[i]);
   }
   init_score_load_from_file_ = false;
+
   #ifdef USE_CUDA
   if (cuda_metadata_ != nullptr) {
-    cuda_metadata_->SetInitScore(init_score_.data(), len);
+    cuda_metadata_->SetInitScore(init_score_.data(), init_score_.size());
   }
   #endif  // USE_CUDA
 }
 
+void Metadata::SetInitScore(const double* init_score, data_size_t len) {
+  SetInitScoresFromIterator(init_score, init_score + len);
+}
+
+void Metadata::SetInitScore(const ArrowChunkedArray& array) {
+  SetInitScoresFromIterator(array.begin<double>(), array.end<double>());
+}
+
 void Metadata::InsertInitScores(const double* init_scores, data_size_t start_index, data_size_t len, data_size_t source_size) {
   if (num_init_score_ <= 0) {
     Log::Fatal("Inserting initial score data into dataset with no initial scores");
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index 38b053e94fd5..fd20df25dd87 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -178,7 +178,7 @@ def test_dataset_construct_weights_none():
     ["array_type", "weight_data"],
     [(pa.array, [3, 0.7, 1.5, 0.5, 0.1]), (pa.chunked_array, [[3], [0.7, 1.5, 0.5, 0.1]])],
 )
-@pytest.mark.parametrize("arrow_type", [pa.float32(), pa.float64()])
+@pytest.mark.parametrize("arrow_type", _FLOAT_TYPES)
 def test_dataset_construct_weights(array_type, weight_data, arrow_type):
     data = generate_dummy_arrow_table()
     weights = array_type(weight_data, type=arrow_type)
@@ -210,3 +210,46 @@ def test_dataset_construct_groups(array_type, group_data, arrow_type):
 
     expected = np.array([0, 2, 5], dtype=np.int32)
     np_assert_array_equal(expected, dataset.get_field("group"), strict=True)
+
+
+# ----------------------------------------- INIT SCORES ----------------------------------------- #
+
+
+@pytest.mark.parametrize(
+    ["array_type", "init_score_data"],
+    [
+        (pa.array, [0, 1, 2, 3, 3]),
+        (pa.chunked_array, [[0, 1, 2], [3, 3]]),
+        (pa.chunked_array, [[], [0, 1, 2], [3, 3]]),
+        (pa.chunked_array, [[0, 1], [], [], [2], [3, 3], []]),
+    ],
+)
+@pytest.mark.parametrize("arrow_type", _INTEGER_TYPES + _FLOAT_TYPES)
+def test_dataset_construct_init_scores_array(
+    array_type: Any, init_score_data: Any, arrow_type: Any
+):
+    data = generate_dummy_arrow_table()
+    init_scores = array_type(init_score_data, type=arrow_type)
+    dataset = lgb.Dataset(data, init_score=init_scores, params=dummy_dataset_params())
+    dataset.construct()
+
+    expected = np.array([0, 1, 2, 3, 3], dtype=np.float64)
+    np_assert_array_equal(expected, dataset.get_init_score(), strict=True)
+
+
+def test_dataset_construct_init_scores_table():
+    data = generate_dummy_arrow_table()
+    init_scores = pa.Table.from_arrays(
+        [
+            generate_random_arrow_array(5, seed=1),
+            generate_random_arrow_array(5, seed=2),
+            generate_random_arrow_array(5, seed=3),
+        ],
+        names=["a", "b", "c"],
+    )
+    dataset = lgb.Dataset(data, init_score=init_scores, params=dummy_dataset_params())
+    dataset.construct()
+
+    actual = dataset.get_init_score()
+    expected = init_scores.to_pandas().to_numpy().astype(np.float64)
+    np_assert_array_equal(expected, actual, strict=True)

From d84582b746500237c52701975e006ba8a813d229 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Wed, 6 Dec 2023 16:18:28 +0000
Subject: [PATCH 13/19] Fix null handling for Arrow data (#6227)

---
 include/LightGBM/arrow.tpp              |  2 +-
 tests/cpp_tests/test_arrow.cpp          |  6 ++++--
 tests/python_package_test/test_arrow.py | 11 +++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/LightGBM/arrow.tpp b/include/LightGBM/arrow.tpp
index 67b481c9497e..8d1ce4f4c0c1 100644
--- a/include/LightGBM/arrow.tpp
+++ b/include/LightGBM/arrow.tpp
@@ -144,7 +144,7 @@ struct ArrayIndexAccessor {
     //  - The structure of validity bitmasks is taken from here:
     //    https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
     //  - If the bitmask is NULL, all indices are valid
-    if (validity == nullptr || !(validity[buffer_idx / 8] & (1 << (buffer_idx % 8)))) {
+    if (validity == nullptr || (validity[buffer_idx / 8] & (1 << (buffer_idx % 8)))) {
       // In case the index is valid, we take it from the data buffer
       auto data = static_cast<const T*>(array->buffers[1]);
       return static_cast<double>(data[buffer_idx]);
diff --git a/tests/cpp_tests/test_arrow.cpp b/tests/cpp_tests/test_arrow.cpp
index 7e3c57c401f4..e975b6ba374b 100644
--- a/tests/cpp_tests/test_arrow.cpp
+++ b/tests/cpp_tests/test_arrow.cpp
@@ -41,10 +41,12 @@ class ArrowChunkedArrayTest : public testing::Test {
     // 1) Create validity bitmap
     char* validity = nullptr;
     if (!null_indices.empty()) {
-      validity = static_cast<char*>(calloc(values.size() + sizeof(char) - 1, sizeof(char)));
+      auto num_bytes = (values.size() + 7) / 8;
+      validity = static_cast<char*>(calloc(num_bytes, sizeof(char)));
+      memset(validity, 0xff, num_bytes * sizeof(char));
       for (size_t i = 0; i < values.size(); ++i) {
         if (std::find(null_indices.begin(), null_indices.end(), i) != null_indices.end()) {
-          validity[i / 8] |= (1 << (i % 8));
+          validity[i / 8] &= ~(1 << (i % 8));
         }
       }
     }
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index fd20df25dd87..5e09465e34b3 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -46,6 +46,16 @@ def generate_simple_arrow_table() -> pa.Table:
     return pa.Table.from_arrays(columns, names=[f"col_{i}" for i in range(len(columns))])
 
 
+def generate_nullable_arrow_table() -> pa.Table:
+    columns = [
+        pa.chunked_array([[1, None, 3, 4, 5]], type=pa.float32()),
+        pa.chunked_array([[None, 2, 3, 4, 5]], type=pa.float32()),
+        pa.chunked_array([[1, 2, 3, 4, None]], type=pa.float32()),
+        pa.chunked_array([[None, None, None, None, None]], type=pa.float32()),
+    ]
+    return pa.Table.from_arrays(columns, names=[f"col_{i}" for i in range(len(columns))])
+
+
 def generate_dummy_arrow_table() -> pa.Table:
     col1 = pa.chunked_array([[1, 2, 3], [4, 5]], type=pa.uint8())
     col2 = pa.chunked_array([[0.5, 0.6], [0.1, 0.8, 1.5]], type=pa.float32())
@@ -95,6 +105,7 @@ def dummy_dataset_params() -> Dict[str, Any]:
     [  # Use lambda functions here to minimize memory consumption
         (lambda: generate_simple_arrow_table(), dummy_dataset_params()),
         (lambda: generate_dummy_arrow_table(), dummy_dataset_params()),
+        (lambda: generate_nullable_arrow_table(), dummy_dataset_params()),
         (lambda: generate_random_arrow_table(3, 1000, 42), {}),
         (lambda: generate_random_arrow_table(100, 10000, 43), {}),
     ],

From 4aba4fc1326210a1501f144bd54d77a64d127362 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 6 Dec 2023 12:56:27 -0600
Subject: [PATCH 14/19] [R-package] change CRAN maintainer (#6224)

---
 R-package/DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 1193c0d463b9..62b479530b4a 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -4,10 +4,10 @@ Title: Light Gradient Boosting Machine
 Version: ~~VERSION~~
 Date: ~~DATE~~
 Authors@R: c(
-    person("Yu", "Shi", email = "yushi2@microsoft.com", role = c("aut", "cre")),
+    person("Yu", "Shi", email = "yushi2@microsoft.com", role = c("aut")),
     person("Guolin", "Ke", email = "guolin.ke@outlook.com", role = c("aut")),
     person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")),
-    person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")),
+    person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut", "cre")),
     person("Qi", "Meng", role = c("aut")),
     person("Thomas", "Finley", role = c("aut")),
     person("Taifeng", "Wang", role = c("aut")),

From e797985227a012a837c20eddc457de6b7fc7aeaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Thu, 7 Dec 2023 08:54:18 -0600
Subject: [PATCH 15/19] [python-package] take shallow copy of dataframe in
 predict (fixes #6195) (#6218)

---
 python-package/lightgbm/basic.py        |  5 ++++-
 tests/python_package_test/test_basic.py | 19 ++++++++++++++++---
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 31ae5182ee9e..c4022e7fdd9a 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -789,6 +789,10 @@ def _data_from_pandas(
     if len(data.shape) != 2 or data.shape[0] < 1:
         raise ValueError('Input data must be 2 dimensional and non empty.')
 
+    # take shallow copy in case we modify categorical columns
+    # whole column modifications don't change the original df
+    data = data.copy(deep=False)
+
     # determine feature names
     if feature_name == 'auto':
         feature_name = [str(col) for col in data.columns]
@@ -805,7 +809,6 @@ def _data_from_pandas(
             if list(data[col].cat.categories) != list(category):
                 data[col] = data[col].cat.set_categories(category)
     if len(cat_cols):  # cat_cols is list
-        data = data.copy(deep=False)  # not alter origin DataFrame
         data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes).replace({-1: np.nan})
     if categorical_feature == 'auto':  # use cat cols from DataFrame
         categorical_feature = cat_cols_not_ordered
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 2f6b07e7a77f..b8ef43e41397 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -822,21 +822,34 @@ def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name):
 
 
 @pytest.mark.parametrize('feature_name', [['x1'], [42], 'auto'])
-def test_categorical_code_conversion_doesnt_modify_original_data(feature_name):
+@pytest.mark.parametrize('categories', ['seen', 'unseen'])
+def test_categorical_code_conversion_doesnt_modify_original_data(feature_name, categories):
     pd = pytest.importorskip('pandas')
     X = np.random.choice(['a', 'b'], 100).reshape(-1, 1)
     column_name = 'a' if feature_name == 'auto' else feature_name[0]
     df = pd.DataFrame(X.copy(), columns=[column_name], dtype='category')
+    if categories == 'seen':
+        pandas_categorical = [['a', 'b']]
+    else:
+        pandas_categorical = [['a']]
     data = lgb.basic._data_from_pandas(
         data=df,
         feature_name=feature_name,
         categorical_feature="auto",
-        pandas_categorical=None
+        pandas_categorical=pandas_categorical,
     )[0]
     # check that the original data wasn't modified
     np.testing.assert_equal(df[column_name], X[:, 0])
     # check that the built data has the codes
-    np.testing.assert_equal(df[column_name].cat.codes, data[:, 0])
+    if categories == 'seen':
+        # if all categories were seen during training we just take the codes
+        codes = df[column_name].cat.codes
+    else:
+        # if we only saw 'a' during training we just replace its code
+        # and leave the rest as nan
+        a_code = df[column_name].cat.categories.get_loc('a')
+        codes = np.where(df[column_name] == 'a', a_code, np.nan)
+    np.testing.assert_equal(codes, data[:, 0])
 
 
 @pytest.mark.parametrize('min_data_in_bin', [2, 10])

From 1548b42bac5d5b7c295ba4d3132e8bda47e34fd1 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 7 Dec 2023 17:03:16 -0600
Subject: [PATCH 16/19] [R-package] [c++] add tighter multithreading control,
 avoid global OpenMP side effects (fixes #4705, fixes #5102) (#6226)

---
 .ci/lint-cpp.sh                               |  3 +-
 CMakeLists.txt                                |  1 +
 R-package/NAMESPACE                           |  2 +
 R-package/R/lgb.Booster.R                     | 12 +++++
 R-package/R/lgb.Dataset.R                     | 22 ++++++++
 R-package/R/lgb.cv.R                          |  2 +
 R-package/R/lgb.importance.R                  |  2 +
 R-package/R/lgb.interprete.R                  |  2 +
 R-package/R/lgb.model.dt.tree.R               |  2 +
 R-package/R/lgb.plot.importance.R             |  2 +
 R-package/R/lgb.plot.interpretation.R         |  2 +
 R-package/R/lgb.restore_handle.R              |  4 ++
 R-package/R/lgb.train.R                       |  2 +
 R-package/R/multithreading.R                  | 51 +++++++++++++++++++
 R-package/R/readRDS.lgb.Booster.R             |  2 +
 R-package/R/saveRDS.lgb.Booster.R             |  2 +
 R-package/man/dim.Rd                          |  2 +
 R-package/man/dimnames.lgb.Dataset.Rd         |  2 +
 R-package/man/getLGBMThreads.Rd               | 26 ++++++++++
 R-package/man/get_field.Rd                    |  2 +
 R-package/man/lgb.Dataset.Rd                  |  2 +
 R-package/man/lgb.Dataset.construct.Rd        |  2 +
 R-package/man/lgb.Dataset.create.valid.Rd     |  2 +
 R-package/man/lgb.Dataset.save.Rd             |  2 +
 R-package/man/lgb.Dataset.set.categorical.Rd  |  2 +
 R-package/man/lgb.Dataset.set.reference.Rd    |  2 +
 R-package/man/lgb.configure_fast_predict.Rd   |  2 +
 R-package/man/lgb.cv.Rd                       |  2 +
 R-package/man/lgb.dump.Rd                     |  2 +
 R-package/man/lgb.get.eval.result.Rd          |  2 +
 R-package/man/lgb.importance.Rd               |  2 +
 R-package/man/lgb.interprete.Rd               |  2 +
 R-package/man/lgb.load.Rd                     |  2 +
 R-package/man/lgb.model.dt.tree.Rd            |  2 +
 R-package/man/lgb.plot.importance.Rd          |  2 +
 R-package/man/lgb.plot.interpretation.Rd      |  2 +
 R-package/man/lgb.restore_handle.Rd           |  4 ++
 R-package/man/lgb.save.Rd                     |  2 +
 R-package/man/lgb.train.Rd                    |  2 +
 R-package/man/predict.lgb.Booster.Rd          |  2 +
 R-package/man/readRDS.lgb.Booster.Rd          |  2 +
 R-package/man/saveRDS.lgb.Booster.Rd          |  2 +
 R-package/man/setLGBMThreads.Rd               | 32 ++++++++++++
 R-package/man/set_field.Rd                    |  2 +
 R-package/man/slice.Rd                        |  2 +
 R-package/src/Makevars.in                     |  1 +
 R-package/src/Makevars.win.in                 |  1 +
 R-package/src/lightgbm_R.cpp                  | 19 +++++++
 R-package/src/lightgbm_R.h                    | 19 +++++++
 R-package/tests/testthat/helper.R             |  5 ++
 .../tests/testthat/test_multithreading.R      | 16 ++++++
 R-package/vignettes/basic_walkthrough.Rmd     |  6 +++
 build-cran-package.sh                         |  1 +
 include/LightGBM/c_api.h                      | 14 +++++
 include/LightGBM/utils/openmp_wrapper.h       | 47 ++++++++++-------
 src/c_api.cpp                                 | 17 +++++++
 src/utils/openmp_wrapper.cpp                  | 44 ++++++++++++++++
 tests/c_api_test/test_.py                     | 33 ++++++++++++
 58 files changed, 429 insertions(+), 21 deletions(-)
 create mode 100644 R-package/R/multithreading.R
 create mode 100644 R-package/man/getLGBMThreads.Rd
 create mode 100644 R-package/man/setLGBMThreads.Rd
 create mode 100644 R-package/tests/testthat/test_multithreading.R
 create mode 100644 src/utils/openmp_wrapper.cpp

diff --git a/.ci/lint-cpp.sh b/.ci/lint-cpp.sh
index 56489ecf3325..2d91f8e85f00 100755
--- a/.ci/lint-cpp.sh
+++ b/.ci/lint-cpp.sh
@@ -30,8 +30,7 @@ get_omp_pragmas_without_num_threads() {
         --include='*.h' \
         --include='*.hpp' \
         'pragma omp parallel' \
-    | grep -v ' num_threads' \
-    | grep -v 'openmp_wrapper.h'
+    | grep -v ' num_threads'
 }
 PROBLEMATIC_LINES=$(
     get_omp_pragmas_without_num_threads
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50b3cbaaf189..aef95871e4cc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -432,6 +432,7 @@ file(
       src/objective/*.cpp
       src/network/*.cpp
       src/treelearner/*.cpp
+      src/utils/*.cpp
 if(USE_CUDA)
       src/treelearner/*.cu
       src/boosting/cuda/*.cpp
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index e07af84d8824..ab987d0593eb 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -9,6 +9,7 @@ S3method(print,lgb.Booster)
 S3method(set_field,lgb.Dataset)
 S3method(slice,lgb.Dataset)
 S3method(summary,lgb.Booster)
+export(getLGBMthreads)
 export(get_field)
 export(lgb.Dataset)
 export(lgb.Dataset.construct)
@@ -35,6 +36,7 @@ export(lgb.train)
 export(lightgbm)
 export(readRDS.lgb.Booster)
 export(saveRDS.lgb.Booster)
+export(setLGBMthreads)
 export(set_field)
 export(slice)
 import(methods)
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 17da9545ae19..4437c6fa552e 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -917,6 +917,8 @@ NULL
 #'         the factor levels not being present in the output.
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1082,6 +1084,8 @@ predict.lgb.Booster <- function(object,
 #'         \link{predict.lgb.Booster}.
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' library(lightgbm)
 #' data(mtcars)
 #' X <- as.matrix(mtcars[, -1L])
@@ -1224,6 +1228,8 @@ summary.lgb.Booster <- function(object, ...) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1289,6 +1295,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -1346,6 +1354,8 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @examples
 #' \donttest{
 #' library(lightgbm)
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1396,6 +1406,8 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' # train a regression model
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index ddc338d2cae3..ff9b0b4fa38a 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -780,6 +780,8 @@ Dataset <- R6::R6Class(
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -837,6 +839,8 @@ lgb.Dataset <- function(data,
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -913,6 +917,8 @@ lgb.Dataset.create.valid <- function(dataset,
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -942,6 +948,8 @@ lgb.Dataset.construct <- function(dataset) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -975,6 +983,8 @@ dim.lgb.Dataset <- function(x) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1045,6 +1055,8 @@ dimnames.lgb.Dataset <- function(x) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1089,6 +1101,8 @@ slice.lgb.Dataset <- function(dataset, idxset) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1138,6 +1152,8 @@ get_field.lgb.Dataset <- function(dataset, field_name) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1177,6 +1193,8 @@ set_field.lgb.Dataset <- function(dataset, field_name, data) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1207,6 +1225,8 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' # create training Dataset
 #' data(agaricus.train, package ="lightgbm")
 #' train <- agaricus.train
@@ -1240,6 +1260,8 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 11768c5bfa0b..0545fbf71899 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -51,6 +51,8 @@ CVBooster <- R6::R6Class(
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R
index 27efb17392df..7c76131f4f53 100644
--- a/R-package/R/lgb.importance.R
+++ b/R-package/R/lgb.importance.R
@@ -14,6 +14,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R
index 976315262792..8f93d45429f1 100644
--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
@@ -17,6 +17,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' Logit <- function(x) log(x / (1.0 - x))
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R
index 5d994accfa7f..bf4562e41018 100644
--- a/R-package/R/lgb.model.dt.tree.R
+++ b/R-package/R/lgb.model.dt.tree.R
@@ -29,6 +29,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R
index fc59ebd0efec..b8a90ca158ae 100644
--- a/R-package/R/lgb.plot.importance.R
+++ b/R-package/R/lgb.plot.importance.R
@@ -19,6 +19,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R
index 8b95371eb3c2..97650f30a7d3 100644
--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -16,6 +16,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' Logit <- function(x) {
 #'   log(x / (1.0 - x))
 #' }
diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R
index 0ed25ef26f3d..8a24cc628ca9 100644
--- a/R-package/R/lgb.restore_handle.R
+++ b/R-package/R/lgb.restore_handle.R
@@ -16,7 +16,10 @@
 #' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly).
 #' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
 #' @examples
+#' \donttest{
 #' library(lightgbm)
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data("agaricus.train")
 #' model <- lightgbm(
 #'   agaricus.train$data
@@ -33,6 +36,7 @@
 #' model_new$check_null_handle()
 #' lgb.restore_handle(model_new)
 #' model_new$check_null_handle()
+#' }
 #' @export
 lgb.restore_handle <- function(model) {
   if (!.is_Booster(x = model)) {
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index 6979558d22cd..8a299fb6b8ac 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -19,6 +19,8 @@
 #'
 #' @examples
 #' \donttest{
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/multithreading.R b/R-package/R/multithreading.R
new file mode 100644
index 000000000000..a8d6b51a8968
--- /dev/null
+++ b/R-package/R/multithreading.R
@@ -0,0 +1,51 @@
+#' @name setLGBMThreads
+#' @title Set maximum number of threads used by LightGBM
+#' @description LightGBM attempts to speed up many operations by using multi-threading.
+#'              The number of threads used in those operations can be controlled via the
+#'              \code{num_threads} parameter passed through \code{params} to functions like
+#'              \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
+#'              a model from a text file) are done via code paths that don't explicitly accept thread-control
+#'              configuration.
+#'
+#'              Use this function to set the maximum number of threads LightGBM will use for such operations.
+#'
+#'              This function affects all LightGBM operations in the same process.
+#'
+#'              So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
+#'              operation in the same process will use more than 4 threads.
+#'
+#'              Call \code{setLGBMthreads(-1)} to remove this limitation.
+#' @param num_threads maximum number of threads to be used by LightGBM in multi-threaded operations
+#' @return NULL
+#' @seealso \link{getLGBMthreads}
+#' @export
+setLGBMthreads <- function(num_threads) {
+    .Call(
+        LGBM_SetMaxThreads_R,
+        num_threads
+    )
+    return(invisible(NULL))
+}
+
+#' @name getLGBMThreads
+#' @title Get default number of threads used by LightGBM
+#' @description LightGBM attempts to speed up many operations by using multi-threading.
+#'              The number of threads used in those operations can be controlled via the
+#'              \code{num_threads} parameter passed through \code{params} to functions like
+#'              \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
+#'              a model from a text file) are done via code paths that don't explicitly accept thread-control
+#'              configuration.
+#'
+#'              Use this function to see the default number of threads LightGBM will use for such operations.
+#' @return number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
+#'         not explicitly supplied, LightGBM will choose a number of threads to use automatically.
+#' @seealso \link{setLGBMthreads}
+#' @export
+getLGBMthreads <- function() {
+    out <- 0L
+    .Call(
+        LGBM_GetMaxThreads_R,
+        out
+    )
+    return(out)
+}
diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R
index a8abac642c24..69e954fc75f1 100644
--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
@@ -12,6 +12,8 @@
 #' @examples
 #' \donttest{
 #' library(lightgbm)
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index d75056e69734..d227d75eb90d 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -22,6 +22,8 @@
 #' @examples
 #' \donttest{
 #' library(lightgbm)
+#' \dontshow{setLGBMthreads(2L)}
+#' \dontshow{data.table::setDTthreads(1L)}
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd
index 94ca192d8291..69332d0ec397 100644
--- a/R-package/man/dim.Rd
+++ b/R-package/man/dim.Rd
@@ -21,6 +21,8 @@ be directly used with an \code{lgb.Dataset} object.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd
index ec01a04f607b..85f2085f1d77 100644
--- a/R-package/man/dimnames.lgb.Dataset.Rd
+++ b/R-package/man/dimnames.lgb.Dataset.Rd
@@ -28,6 +28,8 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/getLGBMThreads.Rd b/R-package/man/getLGBMThreads.Rd
new file mode 100644
index 000000000000..21af4f4849d4
--- /dev/null
+++ b/R-package/man/getLGBMThreads.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multithreading.R
+\name{getLGBMThreads}
+\alias{getLGBMThreads}
+\alias{getLGBMthreads}
+\title{Get default number of threads used by LightGBM}
+\usage{
+getLGBMthreads()
+}
+\value{
+number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
+        not explicitly supplied, LightGBM will choose a number of threads to use automatically.
+}
+\description{
+LightGBM attempts to speed up many operations by using multi-threading.
+             The number of threads used in those operations can be controlled via the
+             \code{num_threads} parameter passed through \code{params} to functions like
+             \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
+             a model from a text file) are done via code paths that don't explicitly accept thread-control
+             configuration.
+
+             Use this function to see the default number of threads LightGBM will use for such operations.
+}
+\seealso{
+\link{setLGBMthreads}
+}
diff --git a/R-package/man/get_field.Rd b/R-package/man/get_field.Rd
index 1b6692fcf807..e2562cc21364 100644
--- a/R-package/man/get_field.Rd
+++ b/R-package/man/get_field.Rd
@@ -32,6 +32,8 @@ Get one attribute of a \code{lgb.Dataset}
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd
index 4895600ff922..2605657b060a 100644
--- a/R-package/man/lgb.Dataset.Rd
+++ b/R-package/man/lgb.Dataset.Rd
@@ -65,6 +65,8 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd
index 97c9e7887602..e400e0a5f8d5 100644
--- a/R-package/man/lgb.Dataset.construct.Rd
+++ b/R-package/man/lgb.Dataset.construct.Rd
@@ -17,6 +17,8 @@ Construct Dataset explicitly
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd
index ab8ca753c2b9..fc50dff19986 100644
--- a/R-package/man/lgb.Dataset.create.valid.Rd
+++ b/R-package/man/lgb.Dataset.create.valid.Rd
@@ -48,6 +48,8 @@ Construct validation data according to training data
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd
index 5ea38227ba66..b03c2c5e0ac5 100644
--- a/R-package/man/lgb.Dataset.save.Rd
+++ b/R-package/man/lgb.Dataset.save.Rd
@@ -20,6 +20,8 @@ Please note that \code{init_score} is not saved in binary file.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd
index 26eb10770e47..5dfcc9a771e8 100644
--- a/R-package/man/lgb.Dataset.set.categorical.Rd
+++ b/R-package/man/lgb.Dataset.set.categorical.Rd
@@ -22,6 +22,8 @@ Set the categorical features of an \code{lgb.Dataset} object. Use this function
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd
index 349b0b22913e..a4efbfac5962 100644
--- a/R-package/man/lgb.Dataset.set.reference.Rd
+++ b/R-package/man/lgb.Dataset.set.reference.Rd
@@ -19,6 +19,8 @@ If you want to use validation data, you should set reference to training data
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 # create training Dataset
 data(agaricus.train, package ="lightgbm")
 train <- agaricus.train
diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd
index 39fe6afa6b18..e02600451df5 100644
--- a/R-package/man/lgb.configure_fast_predict.Rd
+++ b/R-package/man/lgb.configure_fast_predict.Rd
@@ -114,6 +114,8 @@ Calling this function multiple times with different parameters might not overrid
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 library(lightgbm)
 data(mtcars)
 X <- as.matrix(mtcars[, -1L])
diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index 555cb11c7bb3..7ea2928c6166 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -152,6 +152,8 @@ Cross validation logic used by LightGBM
 
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd
index f4e90242fd75..39f0e3018ac7 100644
--- a/R-package/man/lgb.dump.Rd
+++ b/R-package/man/lgb.dump.Rd
@@ -20,6 +20,8 @@ Dump LightGBM model to json
 \examples{
 \donttest{
 library(lightgbm)
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd
index 9c2293a0f909..0dc7eb0845c3 100644
--- a/R-package/man/lgb.get.eval.result.Rd
+++ b/R-package/man/lgb.get.eval.result.Rd
@@ -33,6 +33,8 @@ Given a \code{lgb.Booster}, return evaluation results for a
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 # train a regression model
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd
index 89a3d4e6b5b7..79cb82f5d8ef 100644
--- a/R-package/man/lgb.importance.Rd
+++ b/R-package/man/lgb.importance.Rd
@@ -25,6 +25,8 @@ Creates a \code{data.table} of feature importances in a model.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd
index c1166b2c1cc9..3acc27955c46 100644
--- a/R-package/man/lgb.interprete.Rd
+++ b/R-package/man/lgb.interprete.Rd
@@ -30,6 +30,8 @@ Computes feature contribution components of rawscore prediction.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 Logit <- function(x) log(x / (1.0 - x))
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd
index c1a00a20974b..f145db5a245e 100644
--- a/R-package/man/lgb.load.Rd
+++ b/R-package/man/lgb.load.Rd
@@ -20,6 +20,8 @@ Load LightGBM takes in either a file path or model string.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd
index 4d02ede9a001..60ef8cdac133 100644
--- a/R-package/man/lgb.model.dt.tree.Rd
+++ b/R-package/man/lgb.model.dt.tree.Rd
@@ -40,6 +40,8 @@ Parse a LightGBM model json dump into a \code{data.table} structure.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd
index 302f46460e3f..bdf354da0385 100644
--- a/R-package/man/lgb.plot.importance.Rd
+++ b/R-package/man/lgb.plot.importance.Rd
@@ -38,6 +38,8 @@ Features are shown ranked in a decreasing importance order.
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd
index a914071e896f..6f168e120a4e 100644
--- a/R-package/man/lgb.plot.interpretation.Rd
+++ b/R-package/man/lgb.plot.interpretation.Rd
@@ -35,6 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 Logit <- function(x) {
   log(x / (1.0 - x))
 }
diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd
index 95cbdc64485d..37922c077642 100644
--- a/R-package/man/lgb.restore_handle.Rd
+++ b/R-package/man/lgb.restore_handle.Rd
@@ -27,7 +27,10 @@ function. If you wish to make fast single-row predictions using a \code{lgb.Boos
 call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
 }
 \examples{
+\donttest{
 library(lightgbm)
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data("agaricus.train")
 model <- lightgbm(
   agaricus.train$data
@@ -45,6 +48,7 @@ model_new$check_null_handle()
 lgb.restore_handle(model_new)
 model_new$check_null_handle()
 }
+}
 \seealso{
 \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
 }
diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd
index efd110c7d816..62ec0ed462f6 100644
--- a/R-package/man/lgb.save.Rd
+++ b/R-package/man/lgb.save.Rd
@@ -21,6 +21,8 @@ Save LightGBM model
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 library(lightgbm)
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
index 0f2961edc415..557c85b7f9dc 100644
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -130,6 +130,8 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
 
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd
index 2df13b9bc374..bcb2f3f980fb 100644
--- a/R-package/man/predict.lgb.Booster.Rd
+++ b/R-package/man/predict.lgb.Booster.Rd
@@ -121,6 +121,8 @@ If the model object has been configured for fast single-row predictions through
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd
index 6a8e4c80ca91..0a144434cd36 100644
--- a/R-package/man/readRDS.lgb.Booster.Rd
+++ b/R-package/man/readRDS.lgb.Booster.Rd
@@ -23,6 +23,8 @@ Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} o
 \examples{
 \donttest{
 library(lightgbm)
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd
index a8664243dce2..b9b34e1fd021 100644
--- a/R-package/man/saveRDS.lgb.Booster.Rd
+++ b/R-package/man/saveRDS.lgb.Booster.Rd
@@ -46,6 +46,8 @@ Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable bef
 \examples{
 \donttest{
 library(lightgbm)
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/setLGBMThreads.Rd b/R-package/man/setLGBMThreads.Rd
new file mode 100644
index 000000000000..53336fc2548e
--- /dev/null
+++ b/R-package/man/setLGBMThreads.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multithreading.R
+\name{setLGBMThreads}
+\alias{setLGBMThreads}
+\alias{setLGBMthreads}
+\title{Set maximum number of threads used by LightGBM}
+\usage{
+setLGBMthreads(num_threads)
+}
+\arguments{
+\item{num_threads}{maximum number of threads to be used by LightGBM in multi-threaded operations}
+}
+\description{
+LightGBM attempts to speed up many operations by using multi-threading.
+             The number of threads used in those operations can be controlled via the
+             \code{num_threads} parameter passed through \code{params} to functions like
+             \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
+             a model from a text file) are done via code paths that don't explicitly accept thread-control
+             configuration.
+
+             Use this function to set the maximum number of threads LightGBM will use for such operations.
+
+             This function affects all LightGBM operations in the same process.
+
+             So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
+             operation in the same process will use more than 4 threads.
+
+             Call \code{setLGBMthreads(-1)} to remove this limitation.
+}
+\seealso{
+\link{getLGBMthreads}
+}
diff --git a/R-package/man/set_field.Rd b/R-package/man/set_field.Rd
index f9901e27eefd..2ceebfb87753 100644
--- a/R-package/man/set_field.Rd
+++ b/R-package/man/set_field.Rd
@@ -34,6 +34,8 @@ Set one attribute of a \code{lgb.Dataset}
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd
index 1d7bec08de0f..a65809a239d8 100644
--- a/R-package/man/slice.Rd
+++ b/R-package/man/slice.Rd
@@ -23,6 +23,8 @@ Get a new \code{lgb.Dataset} containing the specified rows of
 }
 \examples{
 \donttest{
+\dontshow{setLGBMthreads(2L)}
+\dontshow{data.table::setDTthreads(1L)}
 data(agaricus.train, package = "lightgbm")
 train <- agaricus.train
 dtrain <- lgb.Dataset(train$data, label = train$label)
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index ba9ef054bfab..c04263f62c1c 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -53,5 +53,6 @@ OBJECTS = \
     treelearner/serial_tree_learner.o \
     treelearner/tree_learner.o \
     treelearner/voting_parallel_tree_learner.o \
+    utils/openmp_wrapper.o \
     c_api.o \
     lightgbm_R.o
diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index 14f5afde002f..86d56fecdf34 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -54,5 +54,6 @@ OBJECTS = \
     treelearner/serial_tree_learner.o \
     treelearner/tree_learner.o \
     treelearner/voting_parallel_tree_learner.o \
+    utils/openmp_wrapper.o \
     c_api.o \
     lightgbm_R.o
diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 3ae7a98d8537..4799f8540497 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -1212,6 +1212,23 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
   R_API_END();
 }
 
+SEXP LGBM_GetMaxThreads_R(SEXP out) {
+  R_API_BEGIN();
+  int num_threads;
+  CHECK_CALL(LGBM_GetMaxThreads(&num_threads));
+  INTEGER(out)[0] = num_threads;
+  return R_NilValue;
+  R_API_END();
+}
+
+SEXP LGBM_SetMaxThreads_R(SEXP num_threads) {
+  R_API_BEGIN();
+  int new_num_threads = Rf_asInteger(num_threads);
+  CHECK_CALL(LGBM_SetMaxThreads(new_num_threads));
+  return R_NilValue;
+  R_API_END();
+}
+
 // .Call() calls
 static const R_CallMethodDef CallEntries[] = {
   {"LGBM_HandleIsNull_R"                         , (DL_FUNC) &LGBM_HandleIsNull_R                         , 1},
@@ -1268,6 +1285,8 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 3},
   {"LGBM_NullBoosterHandleError_R"               , (DL_FUNC) &LGBM_NullBoosterHandleError_R               , 0},
   {"LGBM_DumpParamAliases_R"                     , (DL_FUNC) &LGBM_DumpParamAliases_R                     , 0},
+  {"LGBM_GetMaxThreads_R"                        , (DL_FUNC) &LGBM_GetMaxThreads_R                        , 1},
+  {"LGBM_SetMaxThreads_R"                        , (DL_FUNC) &LGBM_SetMaxThreads_R                        , 1},
   {NULL, NULL, 0}
 };
 
diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h
index 7141a06a207c..4f0407e8f2ec 100644
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -850,4 +850,23 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
 
+/*!
+* \brief Get current maximum number of threads used by LightGBM routines in this process.
+* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
+* \return R NULL value
+*/
+LIGHTGBM_C_EXPORT SEXP LGBM_GetMaxThreads_R(
+  SEXP out
+);
+
+
+/*!
+* \brief Set maximum number of threads used by LightGBM routines in this process.
+* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
+* \return R NULL value
+*/
+LIGHTGBM_C_EXPORT SEXP LGBM_SetMaxThreads_R(
+  SEXP num_threads
+);
+
 #endif  // LIGHTGBM_R_H_
diff --git a/R-package/tests/testthat/helper.R b/R-package/tests/testthat/helper.R
index 9c928c1f71d1..45edf40efbeb 100644
--- a/R-package/tests/testthat/helper.R
+++ b/R-package/tests/testthat/helper.R
@@ -11,6 +11,11 @@
 #   the check farm is a shared resource and will typically be running many checks simultaneously.
 #
 .LGB_MAX_THREADS <- 2L
+setLGBMthreads(.LGB_MAX_THREADS)
+
+# control data.table parallelism
+# ref: https://github.com/Rdatatable/data.table/issues/5658
+data.table::setDTthreads(1L)
 
 # by default, how much should results in tests be allowed to differ from hard-coded expected numbers?
 .LGB_NUMERIC_TOLERANCE <- 1e-6
diff --git a/R-package/tests/testthat/test_multithreading.R b/R-package/tests/testthat/test_multithreading.R
new file mode 100644
index 000000000000..e2f3169627a2
--- /dev/null
+++ b/R-package/tests/testthat/test_multithreading.R
@@ -0,0 +1,16 @@
+test_that("getLGBMthreads() and setLGBMthreads() work as expected", {
+    # works with integer input
+    ret <- setLGBMthreads(2L)
+    expect_null(ret)
+    expect_equal(getLGBMthreads(), 2L)
+
+    # works with float input
+    ret <- setLGBMthreads(1.0)
+    expect_null(ret)
+    expect_equal(getLGBMthreads(), 1L)
+
+    # setting to any negative number sets max threads to -1
+    ret <- setLGBMthreads(-312L)
+    expect_null(ret)
+    expect_equal(getLGBMthreads(), -1L)
+})
diff --git a/R-package/vignettes/basic_walkthrough.Rmd b/R-package/vignettes/basic_walkthrough.Rmd
index d7aaf676f386..82bd6957640c 100644
--- a/R-package/vignettes/basic_walkthrough.Rmd
+++ b/R-package/vignettes/basic_walkthrough.Rmd
@@ -27,6 +27,12 @@ Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/),
 library(lightgbm)
 ```
 
+```{r, include=FALSE}
+# limit number of threads used, to be respectful of CRAN's resources when it checks this vignette
+data.table::setDTthreads(1L)
+setLGBMthreads(2L)
+```
+
 This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit.
 
 ## The dataset
diff --git a/build-cran-package.sh b/build-cran-package.sh
index 1c8a5dfbdc48..9fa0c5877085 100755
--- a/build-cran-package.sh
+++ b/build-cran-package.sh
@@ -227,6 +227,7 @@ if ${BUILD_VIGNETTES} ; then
         rm -f ./lightgbm/src/network/*.o
         rm -f ./lightgbm/src/objective/*.o
         rm -f ./lightgbm/src/treelearner/*.o
+        rm -f ./lightgbm/src/utils/*.o
 
         echo "re-tarring ${TARBALL_NAME}"
         tar \
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index ada2e4109638..397005477a5c 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -1561,6 +1561,20 @@ LIGHTGBM_C_EXPORT int LGBM_NetworkInitWithFunctions(int num_machines,
                                                     void* reduce_scatter_ext_fun,
                                                     void* allgather_ext_fun);
 
+/*!
+ * \brief Set maximum number of threads used by LightGBM routines in this process.
+ * \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_SetMaxThreads(int num_threads);
+
+/*!
+ * \brief Get current maximum number of threads used by LightGBM routines in this process.
+ * \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_GetMaxThreads(int* out);
+
 #if !defined(__cplusplus) && (!defined(__STDC__) || (__STDC_VERSION__ < 199901L))
 /*! \brief Inline specifier no-op in C using standards before C99. */
 #define INLINE_FUNCTION
diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h
index a337fc353b75..b9a8ea2982fc 100644
--- a/include/LightGBM/utils/openmp_wrapper.h
+++ b/include/LightGBM/utils/openmp_wrapper.h
@@ -5,6 +5,15 @@
 #ifndef LIGHTGBM_OPENMP_WRAPPER_H_
 #define LIGHTGBM_OPENMP_WRAPPER_H_
 
+#include <LightGBM/export.h>
+
+// this can only be changed by LGBM_SetMaxThreads()
+LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS;
+
+// this is modified by OMP_SET_NUM_THREADS(), for example
+// by passing num_thread through params
+LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS;
+
 #ifdef _OPENMP
 
 #include <LightGBM/utils/log.h>
@@ -17,22 +26,25 @@
 #include <stdexcept>
 #include <vector>
 
-inline int OMP_NUM_THREADS() {
-  int ret = 1;
-#pragma omp parallel
-#pragma omp master
-  { ret = omp_get_num_threads(); }
-  return ret;
-}
-
-inline void OMP_SET_NUM_THREADS(int num_threads) {
-  static const int default_omp_num_threads = OMP_NUM_THREADS();
-  if (num_threads > 0) {
-    omp_set_num_threads(num_threads);
-  } else {
-    omp_set_num_threads(default_omp_num_threads);
-  }
-}
+/*
+    Get number of threads to use in OpenMP parallel regions.
+
+    By default, this will return the result of omp_get_max_threads(),
+    which is OpenMP-implementation dependent but generally can be controlled
+    by environment variable OMP_NUM_THREADS.
+
+    ref:
+      - https://www.openmp.org/spec-html/5.0/openmpsu112.html
+      - https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html
+*/
+LIGHTGBM_EXTERN_C int OMP_NUM_THREADS();
+
+/*
+    Update the default number of threads that'll be used in OpenMP parallel
+    regions for LightGBM routines where the number of threads aren't directly
+    supplied.
+*/
+LIGHTGBM_EXTERN_C void OMP_SET_NUM_THREADS(int num_threads);
 
 class ThreadExceptionHelper {
  public:
@@ -102,10 +114,7 @@ class ThreadExceptionHelper {
   /** Fall here if no OPENMP support, so just
       simulate a single thread running.
       All #pragma omp should be ignored by the compiler **/
-  inline void omp_set_num_threads(int) __GOMP_NOTHROW {}  // NOLINT (no cast done here)
   inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {}
-  inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;}
-  inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;}
   inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;}
   inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; }
 #ifdef __cplusplus
diff --git a/src/c_api.cpp b/src/c_api.cpp
index baf934db42b1..dbe5425bd0aa 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -2699,6 +2699,23 @@ int LGBM_NetworkInitWithFunctions(int num_machines, int rank,
   API_END();
 }
 
+int LGBM_SetMaxThreads(int num_threads) {
+  API_BEGIN();
+  if (num_threads <= 0) {
+    LGBM_MAX_NUM_THREADS = -1;
+  } else {
+    LGBM_MAX_NUM_THREADS = num_threads;
+  }
+  API_END();
+}
+
+int LGBM_GetMaxThreads(int* out) {
+  API_BEGIN();
+  *out = LGBM_MAX_NUM_THREADS;
+  API_END();
+}
+
+
 // ---- start of some help functions
 
 
diff --git a/src/utils/openmp_wrapper.cpp b/src/utils/openmp_wrapper.cpp
new file mode 100644
index 000000000000..fb6e661eb67c
--- /dev/null
+++ b/src/utils/openmp_wrapper.cpp
@@ -0,0 +1,44 @@
+/*!
+ * Copyright (c) 2023 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+#include <LightGBM/utils/openmp_wrapper.h>
+
+int LGBM_MAX_NUM_THREADS = -1;
+
+int LGBM_DEFAULT_NUM_THREADS = -1;
+
+#ifdef _OPENMP
+
+#include <omp.h>
+
+int OMP_NUM_THREADS() {
+  int default_num_threads = 1;
+
+  if (LGBM_DEFAULT_NUM_THREADS > 0) {
+    // if LightGBM-specific default has been set, ignore OpenMP-global config
+    default_num_threads = LGBM_DEFAULT_NUM_THREADS;
+  } else {
+    // otherwise, default to OpenMP-global config
+    #pragma omp single
+    { default_num_threads = omp_get_max_threads(); }
+  }
+
+  // ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't
+  // use more than that many threads
+  if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) {
+    return LGBM_MAX_NUM_THREADS;
+  }
+
+  return default_num_threads;
+}
+
+void OMP_SET_NUM_THREADS(int num_threads) {
+  if (num_threads <= 0) {
+    LGBM_DEFAULT_NUM_THREADS = -1;
+  } else {
+    LGBM_DEFAULT_NUM_THREADS = num_threads;
+  }
+}
+
+#endif  // _OPENMP
diff --git a/tests/c_api_test/test_.py b/tests/c_api_test/test_.py
index 4bb76e4aba19..6cfec1c445fc 100644
--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@@ -247,3 +247,36 @@ def test_booster():
         c_str(''),
         c_str('preb.txt'))
     LIB.LGBM_BoosterFree(booster2)
+
+
+def test_max_thread_control():
+    # at initialization, should be -1
+    num_threads = ctypes.c_int(0)
+    ret = LIB.LGBM_GetMaxThreads(
+        ctypes.byref(num_threads)
+    )
+    assert ret == 0
+    assert num_threads.value == -1
+
+    # updating that value through the C API should work
+    ret = LIB.LGBM_SetMaxThreads(
+        ctypes.c_int(6)
+    )
+    assert ret == 0
+
+    ret = LIB.LGBM_GetMaxThreads(
+        ctypes.byref(num_threads)
+    )
+    assert ret == 0
+    assert num_threads.value == 6
+
+    # resetting to any negative number should set it to -1
+    ret = LIB.LGBM_SetMaxThreads(
+        ctypes.c_int(-123)
+    )
+    assert ret == 0
+    ret = LIB.LGBM_GetMaxThreads(
+        ctypes.byref(num_threads)
+    )
+    assert ret == 0
+    assert num_threads.value == -1

From 522f0f07b0eba0e3190c3e5c8e149a205bd20bf3 Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Sun, 10 Dec 2023 18:29:04 +0100
Subject: [PATCH 17/19] [python-package] Add tests for passing Arrow arrays
 with empty chunks (#6210)

---
 include/LightGBM/arrow.h                |  2 ++
 tests/python_package_test/test_arrow.py | 38 ++++++++++++++++---------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/include/LightGBM/arrow.h b/include/LightGBM/arrow.h
index 3d1c74713bd3..75511e17e72a 100644
--- a/include/LightGBM/arrow.h
+++ b/include/LightGBM/arrow.h
@@ -117,6 +117,7 @@ class ArrowChunkedArray {
                            const struct ArrowSchema* schema) {
     chunks_.reserve(n_chunks);
     for (auto k = 0; k < n_chunks; ++k) {
+      if (chunks[k].length == 0) continue;
       chunks_.push_back(&chunks[k]);
     }
     schema_ = schema;
@@ -220,6 +221,7 @@ class ArrowTable {
       std::vector<const ArrowArray*> children_chunks;
       children_chunks.reserve(n_chunks);
       for (int64_t k = 0; k < n_chunks; ++k) {
+        if (chunks[k].length == 0) continue;
         children_chunks.push_back(chunks[k].children[j]);
       }
       columns_.emplace_back(children_chunks, schema->children[j]);
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index 5e09465e34b3..7542368dcd63 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -30,18 +30,19 @@
 ]
 
 
-def generate_simple_arrow_table() -> pa.Table:
+def generate_simple_arrow_table(empty_chunks: bool = False) -> pa.Table:
+    c: list[list[int]] = [[]] if empty_chunks else []
     columns = [
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.uint8()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.int8()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.uint16()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.int16()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.uint32()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.int32()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.uint64()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.int64()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.float32()),
-        pa.chunked_array([[1, 2, 3, 4, 5]], type=pa.float64()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.uint8()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.int8()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.uint16()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.int16()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.uint32()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.int32()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.uint64()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.int64()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.float32()),
+        pa.chunked_array(c + [[1, 2, 3]] + c + [[4, 5]] + c, type=pa.float64()),
     ]
     return pa.Table.from_arrays(columns, names=[f"col_{i}" for i in range(len(columns))])
 
@@ -104,6 +105,7 @@ def dummy_dataset_params() -> Dict[str, Any]:
     ("arrow_table_fn", "dataset_params"),
     [  # Use lambda functions here to minimize memory consumption
         (lambda: generate_simple_arrow_table(), dummy_dataset_params()),
+        (lambda: generate_simple_arrow_table(empty_chunks=True), dummy_dataset_params()),
         (lambda: generate_dummy_arrow_table(), dummy_dataset_params()),
         (lambda: generate_nullable_arrow_table(), dummy_dataset_params()),
         (lambda: generate_random_arrow_table(3, 1000, 42), {}),
@@ -160,7 +162,12 @@ def test_dataset_construct_fields_fuzzy():
 
 @pytest.mark.parametrize(
     ["array_type", "label_data"],
-    [(pa.array, [0, 1, 0, 0, 1]), (pa.chunked_array, [[0], [1, 0, 0, 1]])],
+    [
+        (pa.array, [0, 1, 0, 0, 1]),
+        (pa.chunked_array, [[0], [1, 0, 0, 1]]),
+        (pa.chunked_array, [[], [0], [1, 0, 0, 1]]),
+        (pa.chunked_array, [[0], [], [1, 0], [], [], [0, 1], []]),
+    ],
 )
 @pytest.mark.parametrize("arrow_type", _INTEGER_TYPES + _FLOAT_TYPES)
 def test_dataset_construct_labels(array_type, label_data, arrow_type):
@@ -187,7 +194,12 @@ def test_dataset_construct_weights_none():
 
 @pytest.mark.parametrize(
     ["array_type", "weight_data"],
-    [(pa.array, [3, 0.7, 1.5, 0.5, 0.1]), (pa.chunked_array, [[3], [0.7, 1.5, 0.5, 0.1]])],
+    [
+        (pa.array, [3, 0.7, 1.5, 0.5, 0.1]),
+        (pa.chunked_array, [[3], [0.7, 1.5, 0.5, 0.1]]),
+        (pa.chunked_array, [[], [3], [0.7, 1.5, 0.5, 0.1]]),
+        (pa.chunked_array, [[3], [0.7], [], [], [1.5, 0.5, 0.1], []]),
+    ],
 )
 @pytest.mark.parametrize("arrow_type", _FLOAT_TYPES)
 def test_dataset_construct_weights(array_type, weight_data, arrow_type):

From 6fc80528f15b92921ecffaaa14b6bddaa0de3404 Mon Sep 17 00:00:00 2001
From: June Liu <103498042+Zhaojun-Liu@users.noreply.github.com>
Date: Wed, 13 Dec 2023 12:06:28 +0800
Subject: [PATCH 18/19] fix errors from MSVC '/permissive-' mode (fixes #6230)
 (#6232)

---
 include/LightGBM/arrow.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/LightGBM/arrow.h b/include/LightGBM/arrow.h
index 75511e17e72a..767da12a9809 100644
--- a/include/LightGBM/arrow.h
+++ b/include/LightGBM/arrow.h
@@ -16,6 +16,7 @@
 #include <memory>
 #include <utility>
 #include <vector>
+#include <stdexcept>
 
 /* -------------------------------------- C DATA INTERFACE ------------------------------------- */
 // The C data interface is taken from

From 2dfb9a40478b965db8325baa21a63d9281f96b7c Mon Sep 17 00:00:00 2001
From: Oliver Borchert <oliver.borchert@quantco.com>
Date: Thu, 14 Dec 2023 04:35:46 +0100
Subject: [PATCH 19/19] [python-package] Allow to pass Arrow table for
 prediction (#6168)

---
 include/LightGBM/c_api.h                |  34 ++++++
 python-package/lightgbm/basic.py        |  56 +++++++++-
 src/c_api.cpp                           |  51 +++++++++
 tests/python_package_test/test_arrow.py | 133 +++++++++++++++++++++---
 4 files changed, 259 insertions(+), 15 deletions(-)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 397005477a5c..b43f096c31ee 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -1417,6 +1417,40 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle,
                                                  int64_t* out_len,
                                                  double* out_result);
 
+/*!
+ * \brief Make prediction for a new dataset.
+ * \note
+ * You should pre-allocate memory for ``out_result``:
+ *   - for normal and raw score, its length is equal to ``num_class * num_data``;
+ *   - for leaf index, its length is equal to ``num_class * num_data * num_iteration``;
+ *   - for feature contributions, its length is equal to ``num_class * num_data * (num_feature + 1)``.
+ * \param handle Handle of booster
+ * \param n_chunks The number of Arrow arrays passed to this function
+ * \param chunks Pointer to the list of Arrow arrays
+ * \param schema Pointer to the schema of all Arrow arrays
+ * \param predict_type What should be predicted
+ *   - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed);
+ *   - ``C_API_PREDICT_RAW_SCORE``: raw score;
+ *   - ``C_API_PREDICT_LEAF_INDEX``: leaf index;
+ *   - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values)
+ * \param start_iteration Start index of the iteration to predict
+ * \param num_iteration Number of iteration for prediction, <= 0 means no limit
+ * \param parameter Other parameters for prediction, e.g. early stopping for prediction
+ * \param[out] out_len Length of output result
+ * \param[out] out_result Pointer to array with predictions
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForArrow(BoosterHandle handle,
+                                                  int64_t n_chunks,
+                                                  const ArrowArray* chunks,
+                                                  const ArrowSchema* schema,
+                                                  int predict_type,
+                                                  int start_iteration,
+                                                  int num_iteration,
+                                                  const char* parameter,
+                                                  int64_t* out_len,
+                                                  double* out_result);
+
 /*!
  * \brief Save model into file.
  * \param handle Handle of booster
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index c4022e7fdd9a..560a9a438872 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -115,7 +115,8 @@
     np.ndarray,
     pd_DataFrame,
     dt_DataTable,
-    scipy.sparse.spmatrix
+    scipy.sparse.spmatrix,
+    pa_Table,
 ]
 _LGBM_WeightType = Union[
     List[float],
@@ -1069,7 +1070,7 @@ def predict(
 
         Parameters
         ----------
-        data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
+        data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse
             Data source for prediction.
             If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
         start_iteration : int, optional (default=0)
@@ -1161,6 +1162,13 @@ def predict(
                 num_iteration=num_iteration,
                 predict_type=predict_type
             )
+        elif _is_pyarrow_table(data):
+            preds, nrow = self.__pred_for_pyarrow_table(
+                table=data,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
         elif isinstance(data, list):
             try:
                 data = np.array(data)
@@ -1614,6 +1622,48 @@ def __pred_for_csc(
         if n_preds != out_num_preds.value:
             raise ValueError("Wrong length for predict results")
         return preds, nrow
+    
+    def __pred_for_pyarrow_table(
+        self,
+        table: pa_Table,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int
+    ) -> Tuple[np.ndarray, int]:
+        """Predict for a PyArrow table."""
+        if not PYARROW_INSTALLED:
+            raise LightGBMError("Cannot predict from Arrow without `pyarrow` installed.")
+
+        # Check that the input is valid: we only handle numbers (for now)
+        if not all(arrow_is_integer(t) or arrow_is_floating(t) for t in table.schema.types):
+            raise ValueError("Arrow table may only have integer or floating point datatypes")
+
+        # Prepare prediction output array
+        n_preds = self.__get_num_preds(
+            start_iteration=start_iteration,
+            num_iteration=num_iteration,
+            nrow=table.num_rows,
+            predict_type=predict_type
+        )
+        preds = np.empty(n_preds, dtype=np.float64)
+        out_num_preds = ctypes.c_int64(0)
+
+        # Export Arrow table to C and run prediction
+        c_array = _export_arrow_to_c(table)
+        _safe_call(_LIB.LGBM_BoosterPredictForArrow(
+            self._handle,
+            ctypes.c_int64(c_array.n_chunks),
+            ctypes.c_void_p(c_array.chunks_ptr),
+            ctypes.c_void_p(c_array.schema_ptr),
+            ctypes.c_int(predict_type),
+            ctypes.c_int(start_iteration),
+            ctypes.c_int(num_iteration),
+            _c_str(self.pred_parameter),
+            ctypes.byref(out_num_preds),
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
+        if n_preds != out_num_preds.value:
+            raise ValueError("Wrong length for predict results")
+        return preds, table.num_rows
 
     def current_iteration(self) -> int:
         """Get the index of the current iteration.
@@ -4350,7 +4400,7 @@ def predict(
 
         Parameters
         ----------
-        data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
+        data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse
             Data source for prediction.
             If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
         start_iteration : int, optional (default=0)
diff --git a/src/c_api.cpp b/src/c_api.cpp
index dbe5425bd0aa..67b18003588a 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -2568,6 +2568,57 @@ int LGBM_BoosterPredictForMats(BoosterHandle handle,
   API_END();
 }
 
+int LGBM_BoosterPredictForArrow(BoosterHandle handle,
+                                int64_t n_chunks,
+                                const ArrowArray* chunks,
+                                const ArrowSchema* schema,
+                                int predict_type,
+                                int start_iteration,
+                                int num_iteration,
+                                const char* parameter,
+                                int64_t* out_len,
+                                double* out_result) {
+  API_BEGIN();
+
+  // Apply the configuration
+  auto param = Config::Str2Map(parameter);
+  Config config;
+  config.Set(param);
+  OMP_SET_NUM_THREADS(config.num_threads);
+
+  // Set up chunked array and iterators for all columns
+  ArrowTable table(n_chunks, chunks, schema);
+  std::vector<ArrowChunkedArray::Iterator<double>> its;
+  its.reserve(table.get_num_columns());
+  for (int64_t j = 0; j < table.get_num_columns(); ++j) {
+    its.emplace_back(table.get_column(j).begin<double>());
+  }
+
+  // Build row function
+  auto num_columns = table.get_num_columns();
+  auto row_fn = [num_columns, &its] (int row_idx) {
+    std::vector<std::pair<int, double>> result;
+    result.reserve(num_columns);
+    for (int64_t j = 0; j < num_columns; ++j) {
+      result.emplace_back(static_cast<int>(j), its[j][row_idx]);
+    }
+    return result;
+  };
+
+  // Run prediction
+  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
+  ref_booster->Predict(start_iteration,
+                       num_iteration,
+                       predict_type,
+                       static_cast<int>(table.get_num_rows()),
+                       static_cast<int>(table.get_num_columns()),
+                       row_fn,
+                       config,
+                       out_result,
+                       out_len);
+  API_END();
+}
+
 int LGBM_BoosterSaveModel(BoosterHandle handle,
                           int start_iteration,
                           int num_iteration,
diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
index 7542368dcd63..593c03d8c7ef 100644
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 import filecmp
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 import numpy as np
 import pyarrow as pa
@@ -63,19 +63,40 @@ def generate_dummy_arrow_table() -> pa.Table:
     return pa.Table.from_arrays([col1, col2], names=["a", "b"])
 
 
-def generate_random_arrow_table(num_columns: int, num_datapoints: int, seed: int) -> pa.Table:
-    columns = [generate_random_arrow_array(num_datapoints, seed + i) for i in range(num_columns)]
+def generate_random_arrow_table(
+    num_columns: int,
+    num_datapoints: int,
+    seed: int,
+    generate_nulls: bool = True,
+    values: Optional[np.ndarray] = None,
+) -> pa.Table:
+    columns = [
+        generate_random_arrow_array(
+            num_datapoints, seed + i, generate_nulls=generate_nulls, values=values
+        )
+        for i in range(num_columns)
+    ]
     names = [f"col_{i}" for i in range(num_columns)]
     return pa.Table.from_arrays(columns, names=names)
 
 
-def generate_random_arrow_array(num_datapoints: int, seed: int) -> pa.ChunkedArray:
+def generate_random_arrow_array(
+    num_datapoints: int,
+    seed: int,
+    generate_nulls: bool = True,
+    values: Optional[np.ndarray] = None,
+) -> pa.ChunkedArray:
     generator = np.random.default_rng(seed)
-    data = generator.standard_normal(num_datapoints)
+    data = (
+        generator.standard_normal(num_datapoints)
+        if values is None
+        else generator.choice(values, size=num_datapoints, replace=True)
+    )
 
     # Set random nulls
-    indices = generator.choice(len(data), size=num_datapoints // 10)
-    data[indices] = None
+    if generate_nulls:
+        indices = generator.choice(len(data), size=num_datapoints // 10)
+        data[indices] = None
 
     # Split data into <=2 random chunks
     split_points = np.sort(generator.choice(np.arange(1, num_datapoints), 2, replace=False))
@@ -131,8 +152,8 @@ def test_dataset_construct_fuzzy(tmp_path, arrow_table_fn, dataset_params):
 
 def test_dataset_construct_fields_fuzzy():
     arrow_table = generate_random_arrow_table(3, 1000, 42)
-    arrow_labels = generate_random_arrow_array(1000, 42)
-    arrow_weights = generate_random_arrow_array(1000, 42)
+    arrow_labels = generate_random_arrow_array(1000, 42, generate_nulls=False)
+    arrow_weights = generate_random_arrow_array(1000, 42, generate_nulls=False)
     arrow_groups = pa.chunked_array([[300, 400, 50], [250]], type=pa.int32())
 
     arrow_dataset = lgb.Dataset(
@@ -264,9 +285,9 @@ def test_dataset_construct_init_scores_table():
     data = generate_dummy_arrow_table()
     init_scores = pa.Table.from_arrays(
         [
-            generate_random_arrow_array(5, seed=1),
-            generate_random_arrow_array(5, seed=2),
-            generate_random_arrow_array(5, seed=3),
+            generate_random_arrow_array(5, seed=1, generate_nulls=False),
+            generate_random_arrow_array(5, seed=2, generate_nulls=False),
+            generate_random_arrow_array(5, seed=3, generate_nulls=False),
         ],
         names=["a", "b", "c"],
     )
@@ -276,3 +297,91 @@ def test_dataset_construct_init_scores_table():
     actual = dataset.get_init_score()
     expected = init_scores.to_pandas().to_numpy().astype(np.float64)
     np_assert_array_equal(expected, actual, strict=True)
+
+
+# ------------------------------------------ PREDICTION ----------------------------------------- #
+
+
+def assert_equal_predict_arrow_pandas(booster: lgb.Booster, data: pa.Table):
+    p_arrow = booster.predict(data)
+    p_pandas = booster.predict(data.to_pandas())
+    np_assert_array_equal(p_arrow, p_pandas, strict=True)
+
+    p_raw_arrow = booster.predict(data, raw_score=True)
+    p_raw_pandas = booster.predict(data.to_pandas(), raw_score=True)
+    np_assert_array_equal(p_raw_arrow, p_raw_pandas, strict=True)
+
+    p_leaf_arrow = booster.predict(data, pred_leaf=True)
+    p_leaf_pandas = booster.predict(data.to_pandas(), pred_leaf=True)
+    np_assert_array_equal(p_leaf_arrow, p_leaf_pandas, strict=True)
+
+    p_pred_contrib_arrow = booster.predict(data, pred_contrib=True)
+    p_pred_contrib_pandas = booster.predict(data.to_pandas(), pred_contrib=True)
+    np_assert_array_equal(p_pred_contrib_arrow, p_pred_contrib_pandas, strict=True)
+
+    p_first_iter_arrow = booster.predict(data, start_iteration=0, num_iteration=1, raw_score=True)
+    p_first_iter_pandas = booster.predict(
+        data.to_pandas(), start_iteration=0, num_iteration=1, raw_score=True
+    )
+    np_assert_array_equal(p_first_iter_arrow, p_first_iter_pandas, strict=True)
+
+
+def test_predict_regression():
+    data = generate_random_arrow_table(10, 10000, 42)
+    dataset = lgb.Dataset(
+        data,
+        label=generate_random_arrow_array(10000, 43, generate_nulls=False),
+        params=dummy_dataset_params(),
+    )
+    booster = lgb.train(
+        {"objective": "regression", "num_leaves": 7},
+        dataset,
+        num_boost_round=5,
+    )
+    assert_equal_predict_arrow_pandas(booster, data)
+
+
+def test_predict_binary_classification():
+    data = generate_random_arrow_table(10, 10000, 42)
+    dataset = lgb.Dataset(
+        data,
+        label=generate_random_arrow_array(10000, 43, generate_nulls=False, values=np.arange(2)),
+        params=dummy_dataset_params(),
+    )
+    booster = lgb.train(
+        {"objective": "binary", "num_leaves": 7},
+        dataset,
+        num_boost_round=5,
+    )
+    assert_equal_predict_arrow_pandas(booster, data)
+
+
+def test_predict_multiclass_classification():
+    data = generate_random_arrow_table(10, 10000, 42)
+    dataset = lgb.Dataset(
+        data,
+        label=generate_random_arrow_array(10000, 43, generate_nulls=False, values=np.arange(5)),
+        params=dummy_dataset_params(),
+    )
+    booster = lgb.train(
+        {"objective": "multiclass", "num_leaves": 7, "num_class": 5},
+        dataset,
+        num_boost_round=5,
+    )
+    assert_equal_predict_arrow_pandas(booster, data)
+
+
+def test_predict_ranking():
+    data = generate_random_arrow_table(10, 10000, 42)
+    dataset = lgb.Dataset(
+        data,
+        label=generate_random_arrow_array(10000, 43, generate_nulls=False, values=np.arange(4)),
+        group=np.array([1000, 2000, 3000, 4000]),
+        params=dummy_dataset_params(),
+    )
+    booster = lgb.train(
+        {"objective": "lambdarank", "num_leaves": 7},
+        dataset,
+        num_boost_round=5,
+    )
+    assert_equal_predict_arrow_pandas(booster, data)