From ed202beb500f14462a4186d808115e10467ad015 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 30 Jan 2024 12:29:03 +0200 Subject: [PATCH 1/3] Remove support for `NUMA_ALLOCATOR` environment variable --- Cargo.lock | 7 ------- crates/subspace-farmer/Cargo.toml | 2 +- .../src/bin/subspace-farmer/commands/farm.rs | 10 ---------- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a25c193a48..3a00961ed0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2136,12 +2136,6 @@ dependencies = [ "cipher 0.4.4", ] -[[package]] -name = "cty" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" - [[package]] name = "curve25519-dalek" version = "3.2.0" @@ -5208,7 +5202,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" dependencies = [ "cc", - "cty", "libc", ] diff --git a/crates/subspace-farmer/Cargo.toml b/crates/subspace-farmer/Cargo.toml index 9968127221..985739ffe9 100644 --- a/crates/subspace-farmer/Cargo.toml +++ b/crates/subspace-farmer/Cargo.toml @@ -32,7 +32,7 @@ hwlocality = { version = "1.0.0-alpha.1", features = ["vendored"], optional = tr jsonrpsee = { version = "0.16.3", features = ["client"] } lru = "0.12.1" mimalloc = "0.1.39" -libmimalloc-sys = { version = "0.1.35", features = ["extended"] } +libmimalloc-sys = "0.1.35" num_cpus = "1.16.0" parity-scale-codec = "3.6.9" parking_lot = "0.12.1" diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs index ab0dd0b22a..0f39aba3e4 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs @@ -525,16 +525,6 @@ where } } - // TODO: Remove code or environment variable once identified whether it helps or not - if std::env::var("NUMA_ALLOCATOR").is_ok() && all_cpu_cores.len() > 1 { - unsafe { - libmimalloc_sys::mi_option_set( - libmimalloc_sys::mi_option_use_numa_nodes, - all_cpu_cores.len() as std::ffi::c_long, - ); - } - } - let mut plotting_delay_senders = Vec::with_capacity(disk_farms.len()); for (disk_farm_index, disk_farm) in disk_farms.into_iter().enumerate() { From faff228a48ca0f2b874c09e43bf6bb731dc4b228 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 30 Jan 2024 12:11:06 +0200 Subject: [PATCH 2/3] Limit default number of farming threads --- .../src/bin/subspace-farmer/commands/farm.rs | 3 ++- crates/subspace-farmer/src/utils.rs | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs index 0f39aba3e4..da9be0033d 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs @@ -131,7 +131,8 @@ pub(crate) struct FarmingArgs { farm_during_initial_plotting: bool, /// Size of PER FARM thread pool used for farming (mostly for blocking I/O, but also for some /// compute-intensive operations during proving), defaults to number of logical CPUs - /// available on UMA system and number of logical CPUs in first NUMA node on NUMA system + /// available on UMA system and number of logical CPUs in first NUMA node on NUMA system, but + /// not more than 32 threads #[arg(long)] farming_thread_pool_size: Option, /// Size of one thread pool used for plotting, defaults to number of logical CPUs available diff --git a/crates/subspace-farmer/src/utils.rs b/crates/subspace-farmer/src/utils.rs index 45695f7afd..6b4b16d73f 100644 --- a/crates/subspace-farmer/src/utils.rs +++ b/crates/subspace-farmer/src/utils.rs @@ -24,6 +24,9 @@ use tracing::debug; #[cfg(feature = "numa")] use tracing::warn; +/// It doesn't make a lot of sense to have a huge number of farming threads, 32 is plenty +const MAX_DEFAULT_FARMING_THREADS: usize = 32; + /// Joins async join handle on drop pub struct AsyncJoinOnDrop { handle: Option>, @@ -202,13 +205,14 @@ pub fn recommended_number_of_farming_threads() -> usize { // Get number of CPU cores .map(|cpuset| cpuset.iter_set().count()) .find(|&count| count > 0) - .unwrap_or_else(num_cpus::get); + .unwrap_or_else(num_cpus::get) + .min(MAX_DEFAULT_FARMING_THREADS); } Err(error) => { warn!(%error, "Failed to get NUMA topology"); } } - num_cpus::get() + num_cpus::get().min(MAX_DEFAULT_FARMING_THREADS) } /// Get all cpu cores, grouped into sets according to NUMA nodes or L3 cache groups on large CPUs. From cc0aff447ac6b78b760233ee0f53e13fa33b949f Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 30 Jan 2024 12:33:14 +0200 Subject: [PATCH 3/3] Regroup and leverage all CPU cores instead of printing a warning --- .../src/bin/subspace-farmer/commands/farm.rs | 39 +++++++++++-------- crates/subspace-farmer/src/utils.rs | 21 ++++++++++ 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs index da9be0033d..2614846273 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs @@ -35,7 +35,7 @@ use subspace_farmer::utils::ss58::parse_ss58_reward_address; use subspace_farmer::utils::{ all_cpu_cores, create_plotting_thread_pool_manager, parse_cpu_cores_sets, recommended_number_of_farming_threads, run_future_in_dedicated_thread, - thread_pool_core_indices, AsyncJoinOnDrop, + thread_pool_core_indices, AsyncJoinOnDrop, CpuCoreSet, }; use subspace_farmer::{Identity, NodeClient, NodeRpcClient}; use subspace_farmer_components::plotting::PlottedSector; @@ -461,8 +461,8 @@ where None => farmer_app_info.protocol_info.max_pieces_in_sector, }; - let plotting_thread_pool_core_indices; - let replotting_thread_pool_core_indices; + let mut plotting_thread_pool_core_indices; + let mut replotting_thread_pool_core_indices; if let Some(plotting_cpu_cores) = plotting_cpu_cores { plotting_thread_pool_core_indices = parse_cpu_cores_sets(&plotting_cpu_cores) .map_err(|error| anyhow::anyhow!("Failed to parse `--plotting-cpu-cores`: {error}"))?; @@ -495,6 +495,25 @@ where } replotting_thread_pool_core_indices }; + + if plotting_thread_pool_core_indices.len() > 1 { + info!( + l3_cache_groups = %plotting_thread_pool_core_indices.len(), + "Multiple L3 cache groups detected" + ); + + if plotting_thread_pool_core_indices.len() > disk_farms.len() { + plotting_thread_pool_core_indices = + CpuCoreSet::regroup(&plotting_thread_pool_core_indices, disk_farms.len()); + replotting_thread_pool_core_indices = + CpuCoreSet::regroup(&replotting_thread_pool_core_indices, disk_farms.len()); + + info!( + farms_count = %disk_farms.len(), + "Regrouped CPU cores to match number of farms, more farms may leverage CPU more efficiently" + ); + } + } } let downloading_semaphore = Arc::new(Semaphore::new( @@ -512,20 +531,6 @@ where .map(|farming_thread_pool_size| farming_thread_pool_size.get()) .unwrap_or_else(recommended_number_of_farming_threads); - let all_cpu_cores = all_cpu_cores(); - if all_cpu_cores.len() > 1 { - info!(l3_cache_groups = %all_cpu_cores.len(), "Multiple L3 cache groups detected"); - - if all_cpu_cores.len() > disk_farms.len() { - warn!( - l3_cache_groups = %all_cpu_cores.len(), - farms_count = %disk_farms.len(), - "Too few disk farms, CPU will not be utilized fully during plotting, same number \ - of farms as L3 cache groups or more is recommended" - ); - } - } - let mut plotting_delay_senders = Vec::with_capacity(disk_farms.len()); for (disk_farm_index, disk_farm) in disk_farms.into_iter().enumerate() { diff --git a/crates/subspace-farmer/src/utils.rs b/crates/subspace-farmer/src/utils.rs index 6b4b16d73f..6c8f535c26 100644 --- a/crates/subspace-farmer/src/utils.rs +++ b/crates/subspace-farmer/src/utils.rs @@ -155,6 +155,27 @@ pub struct CpuCoreSet { } impl CpuCoreSet { + /// Regroup CPU core sets to contain at most `target_sets` sets, useful when there are many L3 + /// cache groups and not as many farms + pub fn regroup(cpu_core_sets: &[Self], target_sets: usize) -> Vec { + cpu_core_sets + // Chunk CPU core sets + .chunks(cpu_core_sets.len().div_ceil(target_sets)) + .map(|sets| Self { + // Combine CPU cores + cores: sets + .iter() + .flat_map(|set| set.cores.iter()) + .copied() + .collect(), + // Preserve topology object + #[cfg(feature = "numa")] + topology: sets[0].topology.clone(), + }) + .collect() + } + + /// Get cpu core numbers in this set pub fn cpu_cores(&self) -> &[usize] { &self.cores }