From fa1c6b211e2b13dd46221aac0c02791aa8ba34d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Sat, 7 Sep 2024 13:04:14 +0100 Subject: [PATCH] [LinearAlgebra] Initialise number of BLAS threads with `jl_effective_threads` (#55574) This is a safer estimate than `Sys.CPU_THREADS` to avoid oversubscribing the machine when running distributed applications, or when the Julia process is constrained by external controls (`taskset`, `cgroups`, etc.). Fix #55572 --- NEWS.md | 2 ++ stdlib/LinearAlgebra/src/LinearAlgebra.jl | 4 ++-- test/threads.jl | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 95a8a51c67ac8..c12cc3c64300c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -128,6 +128,8 @@ Standard library changes between different eigendecomposition algorithms ([#49355]). * Added a generic version of the (unblocked) pivoted Cholesky decomposition (callable via `cholesky[!](A, RowMaximum())`) ([#54619]). +* The number of default BLAS threads now respects process affinity, instead of + using total number of logical threads available on the system ([#55574]). #### Logging diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index be59516f086ab..27d4255fb656b 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -843,9 +843,9 @@ function __init__() # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS") @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64" - BLAS.set_num_threads(max(1, Sys.CPU_THREADS)) + BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint))) else - BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2)) + BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint) ÷ 2)) end end end diff --git a/test/threads.jl b/test/threads.jl index 2832f2a0e972c..6265368c2ac79 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -359,3 +359,18 @@ end @test jl_setaffinity(0, mask, cpumasksize) == 0 end end + +# Make sure default number of BLAS threads respects CPU affinity: issue #55572. +@testset "LinearAlgebra number of default threads" begin + if AFFINITY_SUPPORTED + allowed_cpus = findall(uv_thread_getaffinity()) + cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`, + # Remove all variables which could affect the default number of threads + "OPENBLAS_NUM_THREADS"=>nothing, + "GOTO_NUM_THREADS"=>nothing, + "OMP_NUM_THREADS"=>nothing) + for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems + @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2)) + end + end +end