Skip to content

Commit 3fae1b9

Browse files
committed
Auto merge of #111755 - Zoxc:sharded-switch, r=cjgillot
Use only one shard with a single thread This changes `Sharded` to only access a single shard using a mask set to `0` when a single thread is used, which leads to cache utilization improvements. Performance improvement with 1 thread and `cfg(parallel_compiler)`: <table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.7402s</td><td align="right">1.7004s</td><td align="right">💚 -2.29%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2633s</td><td align="right">0.2550s</td><td align="right">💚 -3.12%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9716s</td><td align="right">0.9482s</td><td align="right">💚 -2.41%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.5679s</td><td align="right">1.5358s</td><td align="right">💚 -2.05%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">6.0569s</td><td align="right">5.9272s</td><td align="right">💚 -2.14%</td></tr><tr><td>Total</td><td align="right">10.5999s</td><td align="right">10.3666s</td><td align="right">💚 -2.20%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9760s</td><td align="right">💚 -2.40%</td></tr></table> cc `@SparrowLii`
2 parents 39c03fb + 8abafd0 commit 3fae1b9

File tree

1 file changed

+38
-10
lines changed

1 file changed

+38
-10
lines changed

compiler/rustc_data_structures/src/sharded.rs

+38-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use crate::fx::{FxHashMap, FxHasher};
2+
#[cfg(parallel_compiler)]
3+
use crate::sync::is_dyn_thread_safe;
24
use crate::sync::{CacheAligned, Lock, LockGuard};
35
use std::borrow::Borrow;
46
use std::collections::hash_map::RawEntryMut;
@@ -18,6 +20,11 @@ pub const SHARDS: usize = 1 << SHARD_BITS;
1820

1921
/// An array of cache-line aligned inner locked structures with convenience methods.
2022
pub struct Sharded<T> {
23+
/// This mask is used to ensure that accesses are inbounds of `shards`.
24+
/// When dynamic thread safety is off, this field is set to 0 causing only
25+
/// a single shard to be used for greater cache efficiency.
26+
#[cfg(parallel_compiler)]
27+
mask: usize,
2128
shards: [CacheAligned<Lock<T>>; SHARDS],
2229
}
2330

@@ -31,31 +38,54 @@ impl<T: Default> Default for Sharded<T> {
3138
impl<T> Sharded<T> {
3239
#[inline]
3340
pub fn new(mut value: impl FnMut() -> T) -> Self {
34-
Sharded { shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))) }
41+
Sharded {
42+
#[cfg(parallel_compiler)]
43+
mask: if is_dyn_thread_safe() { SHARDS - 1 } else { 0 },
44+
shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))),
45+
}
46+
}
47+
48+
#[inline(always)]
49+
fn mask(&self) -> usize {
50+
#[cfg(parallel_compiler)]
51+
{
52+
if SHARDS == 1 { 0 } else { self.mask }
53+
}
54+
#[cfg(not(parallel_compiler))]
55+
{
56+
0
57+
}
58+
}
59+
60+
#[inline(always)]
61+
fn count(&self) -> usize {
62+
// `self.mask` is always one below the used shard count
63+
self.mask() + 1
3564
}
3665

3766
/// The shard is selected by hashing `val` with `FxHasher`.
3867
#[inline]
3968
pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
40-
if SHARDS == 1 { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) }
69+
self.get_shard_by_hash(if SHARDS == 1 { 0 } else { make_hash(val) })
4170
}
4271

4372
#[inline]
4473
pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
45-
&self.shards[get_shard_index_by_hash(hash)].0
74+
self.get_shard_by_index(get_shard_hash(hash))
4675
}
4776

4877
#[inline]
4978
pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {
50-
&self.shards[i].0
79+
// SAFETY: The index get ANDed with the mask, ensuring it is always inbounds.
80+
unsafe { &self.shards.get_unchecked(i & self.mask()).0 }
5181
}
5282

5383
pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
54-
(0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
84+
(0..self.count()).map(|i| self.get_shard_by_index(i).lock()).collect()
5585
}
5686

5787
pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
58-
(0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
88+
(0..self.count()).map(|i| self.get_shard_by_index(i).try_lock()).collect()
5989
}
6090
}
6191

@@ -136,11 +166,9 @@ pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
136166
/// `hash` can be computed with any hasher, so long as that hasher is used
137167
/// consistently for each `Sharded` instance.
138168
#[inline]
139-
#[allow(clippy::modulo_one)]
140-
pub fn get_shard_index_by_hash(hash: u64) -> usize {
169+
fn get_shard_hash(hash: u64) -> usize {
141170
let hash_len = mem::size_of::<usize>();
142171
// Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits.
143172
// hashbrown also uses the lowest bits, so we can't use those
144-
let bits = (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize;
145-
bits % SHARDS
173+
(hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize
146174
}

0 commit comments

Comments
 (0)