From 7f0de8ffaade070f7c6b8f9e6556f9dd705585b3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 4 Dec 2023 22:39:45 -0600 Subject: [PATCH 01/11] [R-package] [c++] add tighter multithreading control, avoid global OpenMP side effects (fixes #4705, fixes #5102) --- .ci/lint-cpp.sh | 1 - CMakeLists.txt | 1 + R-package/NAMESPACE | 2 + R-package/R/aliases.R | 1 + R-package/R/lgb.Booster.R | 12 +++++ R-package/R/lgb.Dataset.R | 22 ++++++++ R-package/R/lgb.cv.R | 2 + R-package/R/lgb.importance.R | 2 + R-package/R/lgb.interprete.R | 2 + R-package/R/lgb.model.dt.tree.R | 2 + R-package/R/lgb.plot.importance.R | 2 + R-package/R/lgb.plot.interpretation.R | 2 + R-package/R/lgb.restore_handle.R | 4 ++ R-package/R/lgb.train.R | 2 + R-package/R/multithreading.R | 51 ++++++++++++++++++ R-package/R/readRDS.lgb.Booster.R | 2 + R-package/R/saveRDS.lgb.Booster.R | 2 + R-package/man/dim.Rd | 2 + R-package/man/dimnames.lgb.Dataset.Rd | 2 + R-package/man/getLGBMThreads.Rd | 26 +++++++++ R-package/man/get_field.Rd | 2 + R-package/man/lgb.Dataset.Rd | 2 + R-package/man/lgb.Dataset.construct.Rd | 2 + R-package/man/lgb.Dataset.create.valid.Rd | 2 + R-package/man/lgb.Dataset.save.Rd | 2 + R-package/man/lgb.Dataset.set.categorical.Rd | 2 + R-package/man/lgb.Dataset.set.reference.Rd | 2 + R-package/man/lgb.configure_fast_predict.Rd | 2 + R-package/man/lgb.cv.Rd | 2 + R-package/man/lgb.dump.Rd | 2 + R-package/man/lgb.get.eval.result.Rd | 2 + R-package/man/lgb.importance.Rd | 2 + R-package/man/lgb.interprete.Rd | 2 + R-package/man/lgb.load.Rd | 2 + R-package/man/lgb.model.dt.tree.Rd | 2 + R-package/man/lgb.plot.importance.Rd | 2 + R-package/man/lgb.plot.interpretation.Rd | 2 + R-package/man/lgb.restore_handle.Rd | 4 ++ R-package/man/lgb.save.Rd | 2 + R-package/man/lgb.train.Rd | 2 + R-package/man/predict.lgb.Booster.Rd | 2 + R-package/man/readRDS.lgb.Booster.Rd | 2 + R-package/man/saveRDS.lgb.Booster.Rd | 2 + R-package/man/setLGBMThreads.Rd | 32 +++++++++++ R-package/man/set_field.Rd | 2 + R-package/man/slice.Rd | 2 + R-package/src/Makevars.in | 1 + R-package/src/Makevars.win.in | 1 + R-package/src/lightgbm_R.cpp | 19 +++++++ R-package/src/lightgbm_R.h | 19 +++++++ .../tests/testthat/test_multithreading.R | 16 ++++++ R-package/vignettes/basic_walkthrough.Rmd | 6 +++ include/LightGBM/c_api.h | 14 +++++ include/LightGBM/utils/openmp_wrapper.h | 54 +++++++++++++------ src/c_api.cpp | 17 ++++++ src/utils/openmp_wrapper.cpp | 21 ++++++++ tests/c_api_test/test_.py | 33 ++++++++++++ 57 files changed, 411 insertions(+), 16 deletions(-) create mode 100644 R-package/R/multithreading.R create mode 100644 R-package/man/getLGBMThreads.Rd create mode 100644 R-package/man/setLGBMThreads.Rd create mode 100644 R-package/tests/testthat/test_multithreading.R create mode 100644 src/utils/openmp_wrapper.cpp diff --git a/.ci/lint-cpp.sh b/.ci/lint-cpp.sh index 56489ecf3325..e8b0d7f60bae 100755 --- a/.ci/lint-cpp.sh +++ b/.ci/lint-cpp.sh @@ -31,7 +31,6 @@ get_omp_pragmas_without_num_threads() { --include='*.hpp' \ 'pragma omp parallel' \ | grep -v ' num_threads' \ - | grep -v 'openmp_wrapper.h' } PROBLEMATIC_LINES=$( get_omp_pragmas_without_num_threads diff --git a/CMakeLists.txt b/CMakeLists.txt index 50b3cbaaf189..aef95871e4cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -432,6 +432,7 @@ file( src/objective/*.cpp src/network/*.cpp src/treelearner/*.cpp + src/utils/*.cpp if(USE_CUDA) src/treelearner/*.cu src/boosting/cuda/*.cpp diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index e07af84d8824..ab987d0593eb 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -9,6 +9,7 @@ S3method(print,lgb.Booster) S3method(set_field,lgb.Dataset) S3method(slice,lgb.Dataset) S3method(summary,lgb.Booster) +export(getLGBMthreads) export(get_field) export(lgb.Dataset) export(lgb.Dataset.construct) @@ -35,6 +36,7 @@ export(lgb.train) export(lightgbm) export(readRDS.lgb.Booster) export(saveRDS.lgb.Booster) +export(setLGBMthreads) export(set_field) export(slice) import(methods) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index d381b70c5b8e..e1014143661e 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -24,6 +24,7 @@ , "max_bin" , "max_bin_by_feature" , "min_data_in_bin" + , "num_threads" , "pre_partition" , "precise_float_parser" , "two_round" diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 17da9545ae19..4437c6fa552e 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -917,6 +917,8 @@ NULL #' the factor levels not being present in the output. #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1082,6 +1084,8 @@ predict.lgb.Booster <- function(object, #' \link{predict.lgb.Booster}. #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' library(lightgbm) #' data(mtcars) #' X <- as.matrix(mtcars[, -1L]) @@ -1224,6 +1228,8 @@ summary.lgb.Booster <- function(object, ...) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1289,6 +1295,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -1346,6 +1354,8 @@ lgb.save <- function(booster, filename, num_iteration = NULL) { #' @examples #' \donttest{ #' library(lightgbm) +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1396,6 +1406,8 @@ lgb.dump <- function(booster, num_iteration = NULL) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' # train a regression model #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index ddc338d2cae3..ff9b0b4fa38a 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -780,6 +780,8 @@ Dataset <- R6::R6Class( #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -837,6 +839,8 @@ lgb.Dataset <- function(data, #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -913,6 +917,8 @@ lgb.Dataset.create.valid <- function(dataset, #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -942,6 +948,8 @@ lgb.Dataset.construct <- function(dataset) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -975,6 +983,8 @@ dim.lgb.Dataset <- function(x) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1045,6 +1055,8 @@ dimnames.lgb.Dataset <- function(x) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1089,6 +1101,8 @@ slice.lgb.Dataset <- function(dataset, idxset) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1138,6 +1152,8 @@ get_field.lgb.Dataset <- function(dataset, field_name) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1177,6 +1193,8 @@ set_field.lgb.Dataset <- function(dataset, field_name, data) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1207,6 +1225,8 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' # create training Dataset #' data(agaricus.train, package ="lightgbm") #' train <- agaricus.train @@ -1240,6 +1260,8 @@ lgb.Dataset.set.reference <- function(dataset, reference) { #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 11768c5bfa0b..0545fbf71899 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -51,6 +51,8 @@ CVBooster <- R6::R6Class( #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 27efb17392df..7c76131f4f53 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -14,6 +14,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R index 976315262792..8f93d45429f1 100644 --- a/R-package/R/lgb.interprete.R +++ b/R-package/R/lgb.interprete.R @@ -17,6 +17,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' Logit <- function(x) log(x / (1.0 - x)) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index 5d994accfa7f..bf4562e41018 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -29,6 +29,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R index fc59ebd0efec..b8a90ca158ae 100644 --- a/R-package/R/lgb.plot.importance.R +++ b/R-package/R/lgb.plot.importance.R @@ -19,6 +19,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index 8b95371eb3c2..97650f30a7d3 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -16,6 +16,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' Logit <- function(x) { #' log(x / (1.0 - x)) #' } diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index 0ed25ef26f3d..8a24cc628ca9 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -16,7 +16,10 @@ #' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly). #' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}. #' @examples +#' \donttest{ #' library(lightgbm) +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data("agaricus.train") #' model <- lightgbm( #' agaricus.train$data @@ -33,6 +36,7 @@ #' model_new$check_null_handle() #' lgb.restore_handle(model_new) #' model_new$check_null_handle() +#' } #' @export lgb.restore_handle <- function(model) { if (!.is_Booster(x = model)) { diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 6979558d22cd..8a299fb6b8ac 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -19,6 +19,8 @@ #' #' @examples #' \donttest{ +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/multithreading.R b/R-package/R/multithreading.R new file mode 100644 index 000000000000..a8d6b51a8968 --- /dev/null +++ b/R-package/R/multithreading.R @@ -0,0 +1,51 @@ +#' @name setLGBMThreads +#' @title Set maximum number of threads used by LightGBM +#' @description LightGBM attempts to speed up many operations by using multi-threading. +#' The number of threads used in those operations can be controlled via the +#' \code{num_threads} parameter passed through \code{params} to functions like +#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing +#' a model from a text file) are done via code paths that don't explicitly accept thread-control +#' configuration. +#' +#' Use this function to set the maximum number of threads LightGBM will use for such operations. +#' +#' This function affects all LightGBM operations in the same process. +#' +#' So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM +#' operation in the same process will use more than 4 threads. +#' +#' Call \code{setLGBMthreads(-1)} to remove this limitation. +#' @param num_threads maximum number of threads to be used by LightGBM in multi-threaded operations +#' @return NULL +#' @seealso \link{getLGBMthreads} +#' @export +setLGBMthreads <- function(num_threads) { + .Call( + LGBM_SetMaxThreads_R, + num_threads + ) + return(invisible(NULL)) +} + +#' @name getLGBMThreads +#' @title Get default number of threads used by LightGBM +#' @description LightGBM attempts to speed up many operations by using multi-threading. +#' The number of threads used in those operations can be controlled via the +#' \code{num_threads} parameter passed through \code{params} to functions like +#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing +#' a model from a text file) are done via code paths that don't explicitly accept thread-control +#' configuration. +#' +#' Use this function to see the default number of threads LightGBM will use for such operations. +#' @return number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is +#' not explicitly supplied, LightGBM will choose a number of threads to use automatically. +#' @seealso \link{setLGBMthreads} +#' @export +getLGBMthreads <- function() { + out <- 0L + .Call( + LGBM_GetMaxThreads_R, + out + ) + return(out) +} diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index a8abac642c24..69e954fc75f1 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -12,6 +12,8 @@ #' @examples #' \donttest{ #' library(lightgbm) +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index d75056e69734..d227d75eb90d 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -22,6 +22,8 @@ #' @examples #' \donttest{ #' library(lightgbm) +#' \dontshow{setLGBMthreads(2L)} +#' \dontshow{data.table::setDTthreads(1L)} #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd index 94ca192d8291..69332d0ec397 100644 --- a/R-package/man/dim.Rd +++ b/R-package/man/dim.Rd @@ -21,6 +21,8 @@ be directly used with an \code{lgb.Dataset} object. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd index ec01a04f607b..85f2085f1d77 100644 --- a/R-package/man/dimnames.lgb.Dataset.Rd +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -28,6 +28,8 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/getLGBMThreads.Rd b/R-package/man/getLGBMThreads.Rd new file mode 100644 index 000000000000..21af4f4849d4 --- /dev/null +++ b/R-package/man/getLGBMThreads.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multithreading.R +\name{getLGBMThreads} +\alias{getLGBMThreads} +\alias{getLGBMthreads} +\title{Get default number of threads used by LightGBM} +\usage{ +getLGBMthreads() +} +\value{ +number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is + not explicitly supplied, LightGBM will choose a number of threads to use automatically. +} +\description{ +LightGBM attempts to speed up many operations by using multi-threading. + The number of threads used in those operations can be controlled via the + \code{num_threads} parameter passed through \code{params} to functions like + \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing + a model from a text file) are done via code paths that don't explicitly accept thread-control + configuration. + + Use this function to see the default number of threads LightGBM will use for such operations. +} +\seealso{ +\link{setLGBMthreads} +} diff --git a/R-package/man/get_field.Rd b/R-package/man/get_field.Rd index 1b6692fcf807..e2562cc21364 100644 --- a/R-package/man/get_field.Rd +++ b/R-package/man/get_field.Rd @@ -32,6 +32,8 @@ Get one attribute of a \code{lgb.Dataset} } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd index 4895600ff922..2605657b060a 100644 --- a/R-package/man/lgb.Dataset.Rd +++ b/R-package/man/lgb.Dataset.Rd @@ -65,6 +65,8 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd index 97c9e7887602..e400e0a5f8d5 100644 --- a/R-package/man/lgb.Dataset.construct.Rd +++ b/R-package/man/lgb.Dataset.construct.Rd @@ -17,6 +17,8 @@ Construct Dataset explicitly } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd index ab8ca753c2b9..fc50dff19986 100644 --- a/R-package/man/lgb.Dataset.create.valid.Rd +++ b/R-package/man/lgb.Dataset.create.valid.Rd @@ -48,6 +48,8 @@ Construct validation data according to training data } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd index 5ea38227ba66..b03c2c5e0ac5 100644 --- a/R-package/man/lgb.Dataset.save.Rd +++ b/R-package/man/lgb.Dataset.save.Rd @@ -20,6 +20,8 @@ Please note that \code{init_score} is not saved in binary file. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd index 26eb10770e47..5dfcc9a771e8 100644 --- a/R-package/man/lgb.Dataset.set.categorical.Rd +++ b/R-package/man/lgb.Dataset.set.categorical.Rd @@ -22,6 +22,8 @@ Set the categorical features of an \code{lgb.Dataset} object. Use this function } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd index 349b0b22913e..a4efbfac5962 100644 --- a/R-package/man/lgb.Dataset.set.reference.Rd +++ b/R-package/man/lgb.Dataset.set.reference.Rd @@ -19,6 +19,8 @@ If you want to use validation data, you should set reference to training data } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} # create training Dataset data(agaricus.train, package ="lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd index 39fe6afa6b18..e02600451df5 100644 --- a/R-package/man/lgb.configure_fast_predict.Rd +++ b/R-package/man/lgb.configure_fast_predict.Rd @@ -114,6 +114,8 @@ Calling this function multiple times with different parameters might not overrid } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} library(lightgbm) data(mtcars) X <- as.matrix(mtcars[, -1L]) diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 555cb11c7bb3..7ea2928c6166 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -152,6 +152,8 @@ Cross validation logic used by LightGBM \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd index f4e90242fd75..39f0e3018ac7 100644 --- a/R-package/man/lgb.dump.Rd +++ b/R-package/man/lgb.dump.Rd @@ -20,6 +20,8 @@ Dump LightGBM model to json \examples{ \donttest{ library(lightgbm) +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd index 9c2293a0f909..0dc7eb0845c3 100644 --- a/R-package/man/lgb.get.eval.result.Rd +++ b/R-package/man/lgb.get.eval.result.Rd @@ -33,6 +33,8 @@ Given a \code{lgb.Booster}, return evaluation results for a } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} # train a regression model data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index 89a3d4e6b5b7..79cb82f5d8ef 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -25,6 +25,8 @@ Creates a \code{data.table} of feature importances in a model. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd index c1166b2c1cc9..3acc27955c46 100644 --- a/R-package/man/lgb.interprete.Rd +++ b/R-package/man/lgb.interprete.Rd @@ -30,6 +30,8 @@ Computes feature contribution components of rawscore prediction. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} Logit <- function(x) log(x / (1.0 - x)) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index c1a00a20974b..f145db5a245e 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -20,6 +20,8 @@ Load LightGBM takes in either a file path or model string. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index 4d02ede9a001..60ef8cdac133 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -40,6 +40,8 @@ Parse a LightGBM model json dump into a \code{data.table} structure. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd index 302f46460e3f..bdf354da0385 100644 --- a/R-package/man/lgb.plot.importance.Rd +++ b/R-package/man/lgb.plot.importance.Rd @@ -38,6 +38,8 @@ Features are shown ranked in a decreasing importance order. } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd index a914071e896f..6f168e120a4e 100644 --- a/R-package/man/lgb.plot.interpretation.Rd +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -35,6 +35,8 @@ contribution of a feature. Features are shown ranked in a decreasing contributio } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} Logit <- function(x) { log(x / (1.0 - x)) } diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index 95cbdc64485d..37922c077642 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -27,7 +27,10 @@ function. If you wish to make fast single-row predictions using a \code{lgb.Boos call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object. } \examples{ +\donttest{ library(lightgbm) +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data("agaricus.train") model <- lightgbm( agaricus.train$data @@ -45,6 +48,7 @@ model_new$check_null_handle() lgb.restore_handle(model_new) model_new$check_null_handle() } +} \seealso{ \link{lgb.make_serializable}, \link{lgb.drop_serialized}. } diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd index efd110c7d816..62ec0ed462f6 100644 --- a/R-package/man/lgb.save.Rd +++ b/R-package/man/lgb.save.Rd @@ -21,6 +21,8 @@ Save LightGBM model } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 0f2961edc415..557c85b7f9dc 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -130,6 +130,8 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}}, \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index 2df13b9bc374..bcb2f3f980fb 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -121,6 +121,8 @@ If the model object has been configured for fast single-row predictions through } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 6a8e4c80ca91..0a144434cd36 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -23,6 +23,8 @@ Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} o \examples{ \donttest{ library(lightgbm) +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index a8664243dce2..b9b34e1fd021 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -46,6 +46,8 @@ Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable bef \examples{ \donttest{ library(lightgbm) +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/setLGBMThreads.Rd b/R-package/man/setLGBMThreads.Rd new file mode 100644 index 000000000000..53336fc2548e --- /dev/null +++ b/R-package/man/setLGBMThreads.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/multithreading.R +\name{setLGBMThreads} +\alias{setLGBMThreads} +\alias{setLGBMthreads} +\title{Set maximum number of threads used by LightGBM} +\usage{ +setLGBMthreads(num_threads) +} +\arguments{ +\item{num_threads}{maximum number of threads to be used by LightGBM in multi-threaded operations} +} +\description{ +LightGBM attempts to speed up many operations by using multi-threading. + The number of threads used in those operations can be controlled via the + \code{num_threads} parameter passed through \code{params} to functions like + \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing + a model from a text file) are done via code paths that don't explicitly accept thread-control + configuration. + + Use this function to set the maximum number of threads LightGBM will use for such operations. + + This function affects all LightGBM operations in the same process. + + So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM + operation in the same process will use more than 4 threads. + + Call \code{setLGBMthreads(-1)} to remove this limitation. +} +\seealso{ +\link{getLGBMthreads} +} diff --git a/R-package/man/set_field.Rd b/R-package/man/set_field.Rd index f9901e27eefd..2ceebfb87753 100644 --- a/R-package/man/set_field.Rd +++ b/R-package/man/set_field.Rd @@ -34,6 +34,8 @@ Set one attribute of a \code{lgb.Dataset} } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index 1d7bec08de0f..a65809a239d8 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -23,6 +23,8 @@ Get a new \code{lgb.Dataset} containing the specified rows of } \examples{ \donttest{ +\dontshow{setLGBMthreads(2L)} +\dontshow{data.table::setDTthreads(1L)} data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index ba9ef054bfab..c04263f62c1c 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -53,5 +53,6 @@ OBJECTS = \ treelearner/serial_tree_learner.o \ treelearner/tree_learner.o \ treelearner/voting_parallel_tree_learner.o \ + utils/openmp_wrapper.o \ c_api.o \ lightgbm_R.o diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index 14f5afde002f..86d56fecdf34 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -54,5 +54,6 @@ OBJECTS = \ treelearner/serial_tree_learner.o \ treelearner/tree_learner.o \ treelearner/voting_parallel_tree_learner.o \ + utils/openmp_wrapper.o \ c_api.o \ lightgbm_R.o diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 3ae7a98d8537..4799f8540497 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -1212,6 +1212,23 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) { R_API_END(); } +SEXP LGBM_GetMaxThreads_R(SEXP out) { + R_API_BEGIN(); + int num_threads; + CHECK_CALL(LGBM_GetMaxThreads(&num_threads)); + INTEGER(out)[0] = num_threads; + return R_NilValue; + R_API_END(); +} + +SEXP LGBM_SetMaxThreads_R(SEXP num_threads) { + R_API_BEGIN(); + int new_num_threads = Rf_asInteger(num_threads); + CHECK_CALL(LGBM_SetMaxThreads(new_num_threads)); + return R_NilValue; + R_API_END(); +} + // .Call() calls static const R_CallMethodDef CallEntries[] = { {"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1}, @@ -1268,6 +1285,8 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, {"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0}, {"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0}, + {"LGBM_GetMaxThreads_R" , (DL_FUNC) &LGBM_GetMaxThreads_R , 1}, + {"LGBM_SetMaxThreads_R" , (DL_FUNC) &LGBM_SetMaxThreads_R , 1}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 7141a06a207c..4f0407e8f2ec 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -850,4 +850,23 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R( */ LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R(); +/*! +* \brief Get current maximum number of threads used by LightGBM routines in this process. +* \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads(). +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_GetMaxThreads_R( + SEXP out +); + + +/*! +* \brief Set maximum number of threads used by LightGBM routines in this process. +* \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads(). +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_SetMaxThreads_R( + SEXP num_threads +); + #endif // LIGHTGBM_R_H_ diff --git a/R-package/tests/testthat/test_multithreading.R b/R-package/tests/testthat/test_multithreading.R new file mode 100644 index 000000000000..f4157f52e432 --- /dev/null +++ b/R-package/tests/testthat/test_multithreading.R @@ -0,0 +1,16 @@ +test_that("getLGBMthreads() and setLGBMthreads() work as expected", { + # works with integer input + setLGBMthreads(2L) + expect_null(ret) + expect_equal(getLGBMthreads(), 2L) + + # works with float input + ret <- setLGBMthreads(1.0) + expect_null(ret) + expect_equal(getLGBMthreads(), 1L) + + # setting to any negative number sets max threads to -1 + ret <- setLGBMthreads(-312L) + expect_null(ret) + expect_equall(getLGBMthreads(), -1L) +}) diff --git a/R-package/vignettes/basic_walkthrough.Rmd b/R-package/vignettes/basic_walkthrough.Rmd index d7aaf676f386..82bd6957640c 100644 --- a/R-package/vignettes/basic_walkthrough.Rmd +++ b/R-package/vignettes/basic_walkthrough.Rmd @@ -27,6 +27,12 @@ Welcome to the world of [LightGBM](https://lightgbm.readthedocs.io/en/latest/), library(lightgbm) ``` +```{r, include=FALSE} +# limit number of threads used, to be respectful of CRAN's resources when it checks this vignette +data.table::setDTthreads(1L) +setLGBMthreads(2L) +``` + This vignette will guide you through its basic usage. It will show how to build a simple binary classification model based on a subset of the `bank` dataset (Moro, Cortez, and Rita 2014). You will use the two input features "age" and "balance" to predict whether a client has subscribed a term deposit. ## The dataset diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index ada2e4109638..397005477a5c 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -1561,6 +1561,20 @@ LIGHTGBM_C_EXPORT int LGBM_NetworkInitWithFunctions(int num_machines, void* reduce_scatter_ext_fun, void* allgather_ext_fun); +/*! + * \brief Set maximum number of threads used by LightGBM routines in this process. + * \param num_threads maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads(). + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_SetMaxThreads(int num_threads); + +/*! + * \brief Get current maximum number of threads used by LightGBM routines in this process. + * \param[out] out current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads(). + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_GetMaxThreads(int* out); + #if !defined(__cplusplus) && (!defined(__STDC__) || (__STDC_VERSION__ < 199901L)) /*! \brief Inline specifier no-op in C using standards before C99. */ #define INLINE_FUNCTION diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index a337fc353b75..deeb040d3697 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -7,6 +7,7 @@ #ifdef _OPENMP +#include #include #include @@ -17,23 +18,47 @@ #include #include +// this can only be changed by LGBM_SetMaxThreads() +LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS; + +// this is modified by OMP_SET_NUM_THREADS(), for example +// by passing num_thread through params +LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; + +/* + Get number of threads to use in OpenMP parallel regions. + + By default, this will return the result of omp_get_max_threads(), + which is OpenMP-implementation dependent but generally can be controlled + by environment variable OMP_NUM_THREADS. + + ref: + - https://www.openmp.org/spec-html/5.0/openmpsu112.html + - https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html +*/ inline int OMP_NUM_THREADS() { - int ret = 1; -#pragma omp parallel -#pragma omp master - { ret = omp_get_num_threads(); } - return ret; -} + int default_num_threads; -inline void OMP_SET_NUM_THREADS(int num_threads) { - static const int default_omp_num_threads = OMP_NUM_THREADS(); - if (num_threads > 0) { - omp_set_num_threads(num_threads); + if (LGBM_DEFAULT_NUM_THREADS > 0) { + // if LightGBM-specific default has been set, ignore OpenMP-global config + default_num_threads = LGBM_DEFAULT_NUM_THREADS; } else { - omp_set_num_threads(default_omp_num_threads); + // otherwise, default to OpenMP-global config + #pragma omp single + { default_num_threads = omp_get_max_threads(); } + } + + // ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't + // use more than that many threads + if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) { + return LGBM_MAX_NUM_THREADS; } + + return default_num_threads; } +void OMP_SET_NUM_THREADS(int num_threads); + class ThreadExceptionHelper { public: ThreadExceptionHelper() { @@ -102,12 +127,11 @@ class ThreadExceptionHelper { /** Fall here if no OPENMP support, so just simulate a single thread running. All #pragma omp should be ignored by the compiler **/ - inline void omp_set_num_threads(int) __GOMP_NOTHROW {} // NOLINT (no cast done here) - inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} - inline int omp_get_num_threads() __GOMP_NOTHROW {return 1;} - inline int omp_get_max_threads() __GOMP_NOTHROW {return 1;} + void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;} inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; } + LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS = -1; + LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS = -1; #ifdef __cplusplus } // extern "C" #endif diff --git a/src/c_api.cpp b/src/c_api.cpp index baf934db42b1..dbe5425bd0aa 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -2699,6 +2699,23 @@ int LGBM_NetworkInitWithFunctions(int num_machines, int rank, API_END(); } +int LGBM_SetMaxThreads(int num_threads) { + API_BEGIN(); + if (num_threads <= 0) { + LGBM_MAX_NUM_THREADS = -1; + } else { + LGBM_MAX_NUM_THREADS = num_threads; + } + API_END(); +} + +int LGBM_GetMaxThreads(int* out) { + API_BEGIN(); + *out = LGBM_MAX_NUM_THREADS; + API_END(); +} + + // ---- start of some help functions diff --git a/src/utils/openmp_wrapper.cpp b/src/utils/openmp_wrapper.cpp new file mode 100644 index 000000000000..be308aeae47f --- /dev/null +++ b/src/utils/openmp_wrapper.cpp @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2023 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ +#ifdef _OPENMP + +#include + +int LGBM_MAX_NUM_THREADS = -1; + +int LGBM_DEFAULT_NUM_THREADS = -1; + +void OMP_SET_NUM_THREADS(int num_threads) { + if (num_threads <= 0) { + LGBM_DEFAULT_NUM_THREADS = -1; + } else { + LGBM_DEFAULT_NUM_THREADS = num_threads; + } +} + +#endif // _OPENMP diff --git a/tests/c_api_test/test_.py b/tests/c_api_test/test_.py index 4bb76e4aba19..6cfec1c445fc 100644 --- a/tests/c_api_test/test_.py +++ b/tests/c_api_test/test_.py @@ -247,3 +247,36 @@ def test_booster(): c_str(''), c_str('preb.txt')) LIB.LGBM_BoosterFree(booster2) + + +def test_max_thread_control(): + # at initialization, should be -1 + num_threads = ctypes.c_int(0) + ret = LIB.LGBM_GetMaxThreads( + ctypes.byref(num_threads) + ) + assert ret == 0 + assert num_threads.value == -1 + + # updating that value through the C API should work + ret = LIB.LGBM_SetMaxThreads( + ctypes.c_int(6) + ) + assert ret == 0 + + ret = LIB.LGBM_GetMaxThreads( + ctypes.byref(num_threads) + ) + assert ret == 0 + assert num_threads.value == 6 + + # resetting to any negative number should set it to -1 + ret = LIB.LGBM_SetMaxThreads( + ctypes.c_int(-123) + ) + assert ret == 0 + ret = LIB.LGBM_GetMaxThreads( + ctypes.byref(num_threads) + ) + assert ret == 0 + assert num_threads.value == -1 From b62e46d77eabad3d8fa61b3d37cf369d13742cc4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 4 Dec 2023 23:33:14 -0600 Subject: [PATCH 02/11] fix gcc -Wmaybe-uninitialized warning --- .ci/lint-cpp.sh | 2 +- R-package/tests/testthat/test_multithreading.R | 2 +- include/LightGBM/utils/openmp_wrapper.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/lint-cpp.sh b/.ci/lint-cpp.sh index e8b0d7f60bae..2d91f8e85f00 100755 --- a/.ci/lint-cpp.sh +++ b/.ci/lint-cpp.sh @@ -30,7 +30,7 @@ get_omp_pragmas_without_num_threads() { --include='*.h' \ --include='*.hpp' \ 'pragma omp parallel' \ - | grep -v ' num_threads' \ + | grep -v ' num_threads' } PROBLEMATIC_LINES=$( get_omp_pragmas_without_num_threads diff --git a/R-package/tests/testthat/test_multithreading.R b/R-package/tests/testthat/test_multithreading.R index f4157f52e432..46a914080785 100644 --- a/R-package/tests/testthat/test_multithreading.R +++ b/R-package/tests/testthat/test_multithreading.R @@ -1,6 +1,6 @@ test_that("getLGBMthreads() and setLGBMthreads() work as expected", { # works with integer input - setLGBMthreads(2L) + ret <- setLGBMthreads(2L) expect_null(ret) expect_equal(getLGBMthreads(), 2L) diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index deeb040d3697..740f2289ed45 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -37,7 +37,7 @@ LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; - https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html */ inline int OMP_NUM_THREADS() { - int default_num_threads; + int default_num_threads = 1; if (LGBM_DEFAULT_NUM_THREADS > 0) { // if LightGBM-specific default has been set, ignore OpenMP-global config From 651dbc86cf23e32730ba15813f21e1946685b078 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 08:38:04 -0600 Subject: [PATCH 03/11] Update R-package/tests/testthat/test_multithreading.R --- R-package/tests/testthat/test_multithreading.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_multithreading.R b/R-package/tests/testthat/test_multithreading.R index 46a914080785..e2f3169627a2 100644 --- a/R-package/tests/testthat/test_multithreading.R +++ b/R-package/tests/testthat/test_multithreading.R @@ -12,5 +12,5 @@ test_that("getLGBMthreads() and setLGBMthreads() work as expected", { # setting to any negative number sets max threads to -1 ret <- setLGBMthreads(-312L) expect_null(ret) - expect_equall(getLGBMthreads(), -1L) + expect_equal(getLGBMthreads(), -1L) }) From 600f9ec17723e703fc157018ca4ca9accf0b0e7e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 09:05:04 -0600 Subject: [PATCH 04/11] Update include/LightGBM/utils/openmp_wrapper.h --- include/LightGBM/utils/openmp_wrapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 740f2289ed45..8ca4b37e3707 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -130,8 +130,8 @@ class ThreadExceptionHelper { void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;} inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; } - LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS = -1; - LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS = -1; + int LGBM_DEFAULT_NUM_THREADS = -1; + int LGBM_MAX_NUM_THREADS = -1; #ifdef __cplusplus } // extern "C" #endif From c931d3c3e55aac53724989967e859c2070b2f7a7 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 13:40:51 -0600 Subject: [PATCH 05/11] clean up files left behind from vignette-building --- build-cran-package.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build-cran-package.sh b/build-cran-package.sh index 1c8a5dfbdc48..9fa0c5877085 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -227,6 +227,7 @@ if ${BUILD_VIGNETTES} ; then rm -f ./lightgbm/src/network/*.o rm -f ./lightgbm/src/objective/*.o rm -f ./lightgbm/src/treelearner/*.o + rm -f ./lightgbm/src/utils/*.o echo "re-tarring ${TARBALL_NAME}" tar \ From 6bf188b11c6ff7cc333a6509dd0686b1ee918ba3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 14:29:00 -0600 Subject: [PATCH 06/11] try not inlining --- include/LightGBM/utils/openmp_wrapper.h | 21 +-------------------- src/utils/openmp_wrapper.cpp | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 8ca4b37e3707..8e00ecd76fe7 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -36,26 +36,7 @@ LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; - https://www.openmp.org/spec-html/5.0/openmpsu112.html - https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html */ -inline int OMP_NUM_THREADS() { - int default_num_threads = 1; - - if (LGBM_DEFAULT_NUM_THREADS > 0) { - // if LightGBM-specific default has been set, ignore OpenMP-global config - default_num_threads = LGBM_DEFAULT_NUM_THREADS; - } else { - // otherwise, default to OpenMP-global config - #pragma omp single - { default_num_threads = omp_get_max_threads(); } - } - - // ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't - // use more than that many threads - if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) { - return LGBM_MAX_NUM_THREADS; - } - - return default_num_threads; -} +int OMP_NUM_THREADS(); void OMP_SET_NUM_THREADS(int num_threads); diff --git a/src/utils/openmp_wrapper.cpp b/src/utils/openmp_wrapper.cpp index be308aeae47f..cb331e024646 100644 --- a/src/utils/openmp_wrapper.cpp +++ b/src/utils/openmp_wrapper.cpp @@ -10,6 +10,27 @@ int LGBM_MAX_NUM_THREADS = -1; int LGBM_DEFAULT_NUM_THREADS = -1; +int OMP_NUM_THREADS() { + int default_num_threads = 1; + + if (LGBM_DEFAULT_NUM_THREADS > 0) { + // if LightGBM-specific default has been set, ignore OpenMP-global config + default_num_threads = LGBM_DEFAULT_NUM_THREADS; + } else { + // otherwise, default to OpenMP-global config + #pragma omp single + { default_num_threads = omp_get_max_threads(); } + } + + // ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't + // use more than that many threads + if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) { + return LGBM_MAX_NUM_THREADS; + } + + return default_num_threads; +} + void OMP_SET_NUM_THREADS(int num_threads) { if (num_threads <= 0) { LGBM_DEFAULT_NUM_THREADS = -1; From a8e666b1cde78a2e0b4664426e4446564cc9045d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 15:02:15 -0600 Subject: [PATCH 07/11] more extern-ing --- include/LightGBM/utils/openmp_wrapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 8e00ecd76fe7..58b57ddaf4e5 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -36,9 +36,9 @@ LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; - https://www.openmp.org/spec-html/5.0/openmpsu112.html - https://gcc.gnu.org/onlinedocs/libgomp/omp_005fget_005fmax_005fthreads.html */ -int OMP_NUM_THREADS(); +LIGHTGBM_EXTERN_C int OMP_NUM_THREADS(); -void OMP_SET_NUM_THREADS(int num_threads); +LIGHTGBM_EXTERN_C void OMP_SET_NUM_THREADS(int num_threads); class ThreadExceptionHelper { public: From 921fecb8c2f31cfec46434bfb3d44d9c5735a67e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 20:54:35 -0600 Subject: [PATCH 08/11] inline for the no-OpenMP case --- include/LightGBM/utils/openmp_wrapper.h | 30 +++++++++++++------------ src/utils/openmp_wrapper.cpp | 6 +++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 58b57ddaf4e5..1c77dffe1b76 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -5,26 +5,25 @@ #ifndef LIGHTGBM_OPENMP_WRAPPER_H_ #define LIGHTGBM_OPENMP_WRAPPER_H_ +#include + +// this can only be changed by LGBM_SetMaxThreads() +LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS; + +// this is modified by OMP_SET_NUM_THREADS(), for example +// by passing num_thread through params +LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; + #ifdef _OPENMP -#include #include -#include - #include #include #include #include #include -// this can only be changed by LGBM_SetMaxThreads() -LIGHTGBM_EXTERN_C int LGBM_MAX_NUM_THREADS; - -// this is modified by OMP_SET_NUM_THREADS(), for example -// by passing num_thread through params -LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; - /* Get number of threads to use in OpenMP parallel regions. @@ -38,6 +37,11 @@ LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; */ LIGHTGBM_EXTERN_C int OMP_NUM_THREADS(); +/* + Update the default number of threads that'll be used in OpenMP parallel + regions for LightGBM routines where the number of threads aren't directly + supplied. +*/ LIGHTGBM_EXTERN_C void OMP_SET_NUM_THREADS(int num_threads); class ThreadExceptionHelper { @@ -108,11 +112,9 @@ class ThreadExceptionHelper { /** Fall here if no OPENMP support, so just simulate a single thread running. All #pragma omp should be ignored by the compiler **/ - void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} - inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;} inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; } - int LGBM_DEFAULT_NUM_THREADS = -1; - int LGBM_MAX_NUM_THREADS = -1; + inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} + inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;} #ifdef __cplusplus } // extern "C" #endif diff --git a/src/utils/openmp_wrapper.cpp b/src/utils/openmp_wrapper.cpp index cb331e024646..fb6e661eb67c 100644 --- a/src/utils/openmp_wrapper.cpp +++ b/src/utils/openmp_wrapper.cpp @@ -2,14 +2,16 @@ * Copyright (c) 2023 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. */ -#ifdef _OPENMP - #include int LGBM_MAX_NUM_THREADS = -1; int LGBM_DEFAULT_NUM_THREADS = -1; +#ifdef _OPENMP + +#include + int OMP_NUM_THREADS() { int default_num_threads = 1; From b068106f0b65061d4592637cd05e0c2e9eae7c98 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 21:04:47 -0600 Subject: [PATCH 09/11] export omp.h again --- R-package/tests/testthat/helper.R | 1 + include/LightGBM/utils/openmp_wrapper.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R-package/tests/testthat/helper.R b/R-package/tests/testthat/helper.R index 9c928c1f71d1..0777ed287405 100644 --- a/R-package/tests/testthat/helper.R +++ b/R-package/tests/testthat/helper.R @@ -11,6 +11,7 @@ # the check farm is a shared resource and will typically be running many checks simultaneously. # .LGB_MAX_THREADS <- 2L +setLGBMthreads(.LGB_MAX_THREADS) # by default, how much should results in tests be allowed to differ from hard-coded expected numbers? .LGB_NUMERIC_TOLERANCE <- 1e-6 diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h index 1c77dffe1b76..b9a8ea2982fc 100644 --- a/include/LightGBM/utils/openmp_wrapper.h +++ b/include/LightGBM/utils/openmp_wrapper.h @@ -18,6 +18,8 @@ LIGHTGBM_EXTERN_C int LGBM_DEFAULT_NUM_THREADS; #include +#include + #include #include #include @@ -112,9 +114,9 @@ class ThreadExceptionHelper { /** Fall here if no OPENMP support, so just simulate a single thread running. All #pragma omp should be ignored by the compiler **/ - inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; } inline void OMP_SET_NUM_THREADS(int) __GOMP_NOTHROW {} inline int omp_get_thread_num() __GOMP_NOTHROW {return 0;} + inline int OMP_NUM_THREADS() __GOMP_NOTHROW { return 1; } #ifdef __cplusplus } // extern "C" #endif From ebf61f1a0e01195be57d81db169a5612fedb6f32 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 22:18:04 -0600 Subject: [PATCH 10/11] limit data.table parallelism too --- R-package/tests/testthat/helper.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R-package/tests/testthat/helper.R b/R-package/tests/testthat/helper.R index 0777ed287405..45edf40efbeb 100644 --- a/R-package/tests/testthat/helper.R +++ b/R-package/tests/testthat/helper.R @@ -13,6 +13,10 @@ .LGB_MAX_THREADS <- 2L setLGBMthreads(.LGB_MAX_THREADS) +# control data.table parallelism +# ref: https://github.com/Rdatatable/data.table/issues/5658 +data.table::setDTthreads(1L) + # by default, how much should results in tests be allowed to differ from hard-coded expected numbers? .LGB_NUMERIC_TOLERANCE <- 1e-6 From 8106f4a6c45acd450a976fd7f3066981de438854 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 5 Dec 2023 22:29:18 -0600 Subject: [PATCH 11/11] revert aliases change --- R-package/R/aliases.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index e1014143661e..d381b70c5b8e 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -24,7 +24,6 @@ , "max_bin" , "max_bin_by_feature" , "min_data_in_bin" - , "num_threads" , "pre_partition" , "precise_float_parser" , "two_round"