diff --git a/benches/benches/bevy_ecs/iteration/iter_simple_foreach_hybrid.rs b/benches/benches/bevy_ecs/iteration/iter_simple_foreach_hybrid.rs new file mode 100644 index 0000000000000..73eb55cfdbf8e --- /dev/null +++ b/benches/benches/bevy_ecs/iteration/iter_simple_foreach_hybrid.rs @@ -0,0 +1,43 @@ +use bevy_ecs::prelude::*; +use rand::{prelude::SliceRandom, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +#[derive(Component, Copy, Clone)] +struct TableData(f32); + +#[derive(Component, Copy, Clone)] +#[component(storage = "SparseSet")] +struct SparseData(f32); + +fn deterministic_rand() -> ChaCha8Rng { + ChaCha8Rng::seed_from_u64(42) +} +pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>); + +impl<'w> Benchmark<'w> { + pub fn new() -> Self { + let mut world = World::new(); + + let mut v = vec![]; + for _ in 0..10000 { + world.spawn((TableData(0.0), SparseData(0.0))).id(); + v.push(world.spawn(TableData(0.)).id()); + } + + // by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration. + v.shuffle(&mut deterministic_rand()); + for e in v.into_iter() { + world.entity_mut(e).despawn(); + } + + let query = world.query::<(&mut TableData, &SparseData)>(); + Self(world, query) + } + + #[inline(never)] + pub fn run(&mut self) { + self.1 + .iter_mut(&mut self.0) + .for_each(|(mut v1, v2)| v1.0 += v2.0) + } +} diff --git a/benches/benches/bevy_ecs/iteration/mod.rs b/benches/benches/bevy_ecs/iteration/mod.rs index 790884335021e..baa1bb385bb87 100644 --- a/benches/benches/bevy_ecs/iteration/mod.rs +++ b/benches/benches/bevy_ecs/iteration/mod.rs @@ -11,6 +11,7 @@ mod iter_frag_wide; mod iter_frag_wide_sparse; mod iter_simple; mod iter_simple_foreach; +mod iter_simple_foreach_hybrid; mod iter_simple_foreach_sparse_set; mod iter_simple_foreach_wide; mod iter_simple_foreach_wide_sparse_set; @@ -71,6 +72,10 @@ fn iter_simple(c: &mut Criterion) { let mut bench = iter_simple_foreach_wide_sparse_set::Benchmark::new(); b.iter(move || bench.run()); }); + group.bench_function("foreach_hybrid", |b| { + let mut bench = iter_simple_foreach_hybrid::Benchmark::new(); + b.iter(move || bench.run()); + }); group.finish(); } diff --git a/crates/bevy_ecs/src/query/iter.rs b/crates/bevy_ecs/src/query/iter.rs index 05955d5b37854..00639e56ce63e 100644 --- a/crates/bevy_ecs/src/query/iter.rs +++ b/crates/bevy_ecs/src/query/iter.rs @@ -167,6 +167,70 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> { accum } + /// Executes the equivalent of [`Iterator::fold`] over a contiguous segment + /// from an archetype which has the same entity count as its table. + /// + /// # Safety + /// - all `indices` must be in `[0, archetype.len())`. + /// - `archetype` must match D and F + /// - `archetype` must have the same length with it's table. + /// - Either `D::IS_DENSE` or `F::IS_DENSE` must be false. + #[inline] + pub(super) unsafe fn fold_over_dense_archetype_range( + &mut self, + mut accum: B, + func: &mut Func, + archetype: &'w Archetype, + rows: Range, + ) -> B + where + Func: FnMut(B, D::Item<'w>) -> B, + { + assert!( + rows.end <= u32::MAX as usize, + "TableRow is only valid up to u32::MAX" + ); + let table = self.tables.get(archetype.table_id()).debug_checked_unwrap(); + + debug_assert!( + archetype.len() == table.entity_count(), + "archetype and it's table must have the same length. " + ); + + D::set_archetype( + &mut self.cursor.fetch, + &self.query_state.fetch_state, + archetype, + table, + ); + F::set_archetype( + &mut self.cursor.filter, + &self.query_state.filter_state, + archetype, + table, + ); + let entities = table.entities(); + for row in rows { + // SAFETY: Caller assures `row` in range of the current archetype. + let entity = unsafe { *entities.get_unchecked(row) }; + let row = TableRow::from_usize(row); + + // SAFETY: set_table was called prior. + // Caller assures `row` in range of the current archetype. + let filter_matched = unsafe { F::filter_fetch(&mut self.cursor.filter, entity, row) }; + if !filter_matched { + continue; + } + + // SAFETY: set_table was called prior. + // Caller assures `row` in range of the current archetype. + let item = D::fetch(&mut self.cursor.fetch, entity, row); + + accum = func(accum, item); + } + accum + } + /// Sorts all query items into a new iterator, using the query lens as a key. /// /// This sort is stable (i.e., does not reorder equal elements). @@ -914,12 +978,27 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F> let archetype = // SAFETY: Matched archetype IDs are guaranteed to still exist. unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() }; - accum = + // SAFETY: Matched table IDs are guaranteed to still exist. + let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() }; + + // When an archetype and its table have equal entity counts, dense iteration can be safely used. + // this leverages cache locality to optimize performance. + if table.entity_count() == archetype.len() { + accum = // SAFETY: // - The fetched archetype matches both D and F + // - The provided archetype and its' table have the same length. // - The provided range is equivalent to [0, archetype.len) // - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false - unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) }; + unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype,0..archetype.len()) }; + } else { + accum = + // SAFETY: + // - The fetched archetype matches both D and F + // - The provided range is equivalent to [0, archetype.len) + // - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false + unsafe { self.fold_over_archetype_range(accum, &mut func, archetype,0..archetype.len()) }; + } } } accum