Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add heuristic to shrink table size #58

Merged
merged 1 commit into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ impl<K, V> HashMap<K, V> {
/// However, the capacity is an estimate, and the table may prematurely resize due
/// to poor hash distribution. If `capacity` is 0, the hash map will not allocate.
///
/// Note that the `HashMap` may grow and shrink as elements are inserted or removed,
/// but it is guaranteed to never shrink below the initial capacity.
///
/// # Examples
///
/// ```
Expand Down
30 changes: 25 additions & 5 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ pub struct HashMap<K, V, S> {
/// An atomic counter of the number of keys in the table.
count: Counter,

/// The initial capacity provided to `HashMap::new`.
///
/// The table is guaranteed to never shrink below this capacity.
initial_capacity: usize,

/// Hasher for keys.
pub hasher: S,
}
Expand Down Expand Up @@ -247,6 +252,7 @@ impl<K, V, S> HashMap<K, V, S> {
collector,
resize,
hasher,
initial_capacity: 1,
table: AtomicPtr::new(ptr::null_mut()),
count: Counter::default(),
};
Expand All @@ -260,6 +266,7 @@ impl<K, V, S> HashMap<K, V, S> {
hasher,
resize,
collector,
initial_capacity: capacity,
table: AtomicPtr::new(table.raw),
count: Counter::default(),
}
Expand Down Expand Up @@ -1957,22 +1964,35 @@ where
return next;
}

let current_capacity = table.len();

// Loading the length here is quite expensive, we may want to consider
// a probabilistic counter to detect high-deletion workloads.
let active_entries = self.len();

let next_capacity = match cfg!(papaya_stress) {
// Never grow the table to stress the incremental resizing algorithm.
true => table.len(),
true => current_capacity,

// Double the table capacity if we are at least 50% full.
false if active_entries >= (current_capacity >> 1) => current_capacity << 1,

// Halve the table if we are at most 12.5% full.
//
// Loading the length here is quite expensive, we may want to consider
// a probabilistic counter to detect high-deletion workloads.
false if self.len() >= (table.len() >> 1) => table.len() << 1,
// This heuristic is intentionally pessimistic as unnecessarily shrinking
// is an expensive operation, but it may change in the future. We also respect
// the initial capacity to give the user a way to retain a strict minimum table
// size.
false if active_entries <= (current_capacity >> 3) => {
self.initial_capacity.max(current_capacity >> 1)
}

// Otherwise keep the capacity the same.
//
// This can occur due to poor hash distribution or frequent cycling of
// insertions and deletions, in which case we want to avoid continuously
// growing the table.
false => table.len(),
false => current_capacity,
};

let next_capacity = capacity.unwrap_or(next_capacity);
Expand Down