Skip to content

Commit 8235daa

Browse files
re0312alice-i-cecileItsDoot
authored
Opportunistically use dense iteration for archetypal iteration (#14049)
# Objective - currently, bevy employs sparse iteration if any of the target components in the query are stored in a sparse set. it may lead to increased cache misses in some cases, potentially impacting performance. - partial fixes #12381 ## Solution - use dense iteration when an archetype and its table have the same entity count. - to avoid introducing complicate unsafe noise, this pr only implement for `for_each ` style iteration. - added a benchmark to test performance for hybrid iteration. ## Performance ![image](https://github.com/bevyengine/bevy/assets/45868716/5cce13cf-6ff2-4861-9576-e75edc63bd46) nearly 2x win in specific scenarios, and no performance degradation in other test cases. --------- Co-authored-by: Alice Cecile <[email protected]> Co-authored-by: Christian Hughes <[email protected]>
1 parent 7c80ae7 commit 8235daa

File tree

3 files changed

+129
-2
lines changed

3 files changed

+129
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use bevy_ecs::prelude::*;
2+
use rand::{prelude::SliceRandom, SeedableRng};
3+
use rand_chacha::ChaCha8Rng;
4+
5+
#[derive(Component, Copy, Clone)]
6+
struct TableData(f32);
7+
8+
#[derive(Component, Copy, Clone)]
9+
#[component(storage = "SparseSet")]
10+
struct SparseData(f32);
11+
12+
fn deterministic_rand() -> ChaCha8Rng {
13+
ChaCha8Rng::seed_from_u64(42)
14+
}
15+
pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>);
16+
17+
impl<'w> Benchmark<'w> {
18+
pub fn new() -> Self {
19+
let mut world = World::new();
20+
21+
let mut v = vec![];
22+
for _ in 0..10000 {
23+
world.spawn((TableData(0.0), SparseData(0.0))).id();
24+
v.push(world.spawn(TableData(0.)).id());
25+
}
26+
27+
// by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration.
28+
v.shuffle(&mut deterministic_rand());
29+
for e in v.into_iter() {
30+
world.entity_mut(e).despawn();
31+
}
32+
33+
let query = world.query::<(&mut TableData, &SparseData)>();
34+
Self(world, query)
35+
}
36+
37+
#[inline(never)]
38+
pub fn run(&mut self) {
39+
self.1
40+
.iter_mut(&mut self.0)
41+
.for_each(|(mut v1, v2)| v1.0 += v2.0)
42+
}
43+
}

benches/benches/bevy_ecs/iteration/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ mod iter_frag_wide;
1111
mod iter_frag_wide_sparse;
1212
mod iter_simple;
1313
mod iter_simple_foreach;
14+
mod iter_simple_foreach_hybrid;
1415
mod iter_simple_foreach_sparse_set;
1516
mod iter_simple_foreach_wide;
1617
mod iter_simple_foreach_wide_sparse_set;
@@ -71,6 +72,10 @@ fn iter_simple(c: &mut Criterion) {
7172
let mut bench = iter_simple_foreach_wide_sparse_set::Benchmark::new();
7273
b.iter(move || bench.run());
7374
});
75+
group.bench_function("foreach_hybrid", |b| {
76+
let mut bench = iter_simple_foreach_hybrid::Benchmark::new();
77+
b.iter(move || bench.run());
78+
});
7479
group.finish();
7580
}
7681

crates/bevy_ecs/src/query/iter.rs

+81-2
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,70 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
167167
accum
168168
}
169169

170+
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
171+
/// from an archetype which has the same entity count as its table.
172+
///
173+
/// # Safety
174+
/// - all `indices` must be in `[0, archetype.len())`.
175+
/// - `archetype` must match D and F
176+
/// - `archetype` must have the same length with it's table.
177+
/// - Either `D::IS_DENSE` or `F::IS_DENSE` must be false.
178+
#[inline]
179+
pub(super) unsafe fn fold_over_dense_archetype_range<B, Func>(
180+
&mut self,
181+
mut accum: B,
182+
func: &mut Func,
183+
archetype: &'w Archetype,
184+
rows: Range<usize>,
185+
) -> B
186+
where
187+
Func: FnMut(B, D::Item<'w>) -> B,
188+
{
189+
assert!(
190+
rows.end <= u32::MAX as usize,
191+
"TableRow is only valid up to u32::MAX"
192+
);
193+
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();
194+
195+
debug_assert!(
196+
archetype.len() == table.entity_count(),
197+
"archetype and it's table must have the same length. "
198+
);
199+
200+
D::set_archetype(
201+
&mut self.cursor.fetch,
202+
&self.query_state.fetch_state,
203+
archetype,
204+
table,
205+
);
206+
F::set_archetype(
207+
&mut self.cursor.filter,
208+
&self.query_state.filter_state,
209+
archetype,
210+
table,
211+
);
212+
let entities = table.entities();
213+
for row in rows {
214+
// SAFETY: Caller assures `row` in range of the current archetype.
215+
let entity = unsafe { *entities.get_unchecked(row) };
216+
let row = TableRow::from_usize(row);
217+
218+
// SAFETY: set_table was called prior.
219+
// Caller assures `row` in range of the current archetype.
220+
let filter_matched = unsafe { F::filter_fetch(&mut self.cursor.filter, entity, row) };
221+
if !filter_matched {
222+
continue;
223+
}
224+
225+
// SAFETY: set_table was called prior.
226+
// Caller assures `row` in range of the current archetype.
227+
let item = D::fetch(&mut self.cursor.fetch, entity, row);
228+
229+
accum = func(accum, item);
230+
}
231+
accum
232+
}
233+
170234
/// Sorts all query items into a new iterator, using the query lens as a key.
171235
///
172236
/// This sort is stable (i.e., does not reorder equal elements).
@@ -914,12 +978,27 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F>
914978
let archetype =
915979
// SAFETY: Matched archetype IDs are guaranteed to still exist.
916980
unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() };
917-
accum =
981+
// SAFETY: Matched table IDs are guaranteed to still exist.
982+
let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() };
983+
984+
// When an archetype and its table have equal entity counts, dense iteration can be safely used.
985+
// this leverages cache locality to optimize performance.
986+
if table.entity_count() == archetype.len() {
987+
accum =
918988
// SAFETY:
919989
// - The fetched archetype matches both D and F
990+
// - The provided archetype and its' table have the same length.
920991
// - The provided range is equivalent to [0, archetype.len)
921992
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
922-
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) };
993+
unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
994+
} else {
995+
accum =
996+
// SAFETY:
997+
// - The fetched archetype matches both D and F
998+
// - The provided range is equivalent to [0, archetype.len)
999+
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
1000+
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
1001+
}
9231002
}
9241003
}
9251004
accum

0 commit comments

Comments
 (0)