From aa905e9f17243638721fad5191481d0e9e28bc05 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Sun, 9 Feb 2025 00:38:37 -0500 Subject: [PATCH] add heuristic to shrink table size --- src/map.rs | 3 +++ src/raw/mod.rs | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/map.rs b/src/map.rs index 57a1d2a..b09adde 100644 --- a/src/map.rs +++ b/src/map.rs @@ -208,6 +208,9 @@ impl HashMap { /// However, the capacity is an estimate, and the table may prematurely resize due /// to poor hash distribution. If `capacity` is 0, the hash map will not allocate. /// + /// Note that the `HashMap` may grow and shrink as elements are inserted or removed, + /// but it is guaranteed to never shrink below the initial capacity. + /// /// # Examples /// /// ``` diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 0977504..d5cf10a 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -34,6 +34,11 @@ pub struct HashMap { /// An atomic counter of the number of keys in the table. count: Counter, + /// The initial capacity provided to `HashMap::new`. + /// + /// The table is guaranteed to never shrink below this capacity. + initial_capacity: usize, + /// Hasher for keys. pub hasher: S, } @@ -247,6 +252,7 @@ impl HashMap { collector, resize, hasher, + initial_capacity: 1, table: AtomicPtr::new(ptr::null_mut()), count: Counter::default(), }; @@ -260,6 +266,7 @@ impl HashMap { hasher, resize, collector, + initial_capacity: capacity, table: AtomicPtr::new(table.raw), count: Counter::default(), } @@ -1957,22 +1964,35 @@ where return next; } + let current_capacity = table.len(); + + // Loading the length here is quite expensive, we may want to consider + // a probabilistic counter to detect high-deletion workloads. + let active_entries = self.len(); + let next_capacity = match cfg!(papaya_stress) { // Never grow the table to stress the incremental resizing algorithm. - true => table.len(), + true => current_capacity, // Double the table capacity if we are at least 50% full. + false if active_entries >= (current_capacity >> 1) => current_capacity << 1, + + // Halve the table if we are at most 12.5% full. // - // Loading the length here is quite expensive, we may want to consider - // a probabilistic counter to detect high-deletion workloads. - false if self.len() >= (table.len() >> 1) => table.len() << 1, + // This heuristic is intentionally pessimistic as unnecessarily shrinking + // is an expensive operation, but it may change in the future. We also respect + // the initial capacity to give the user a way to retain a strict minimum table + // size. + false if active_entries <= (current_capacity >> 3) => { + self.initial_capacity.max(current_capacity >> 1) + } // Otherwise keep the capacity the same. // // This can occur due to poor hash distribution or frequent cycling of // insertions and deletions, in which case we want to avoid continuously // growing the table. - false => table.len(), + false => current_capacity, }; let next_capacity = capacity.unwrap_or(next_capacity);