Skip to content

Commit f2bd50d

Browse files
committed
Auto merge of rust-lang#116422 - the8472:chunked-generic-slice-eq, r=<try>
Chunked generic slice eq looks nice in a microbenchmark, let's see if perf agrees ``` OLD: slice::slice_cmp_generic 54.00ns/iter +/- 1.00ns NEW: slice::slice_cmp_generic 20.00ns/iter +/- 2.00ns ```
2 parents 2ffeb46 + 82ee190 commit f2bd50d

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

library/core/benches/slice.rs

+14
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,17 @@ fn fold_to_last(b: &mut Bencher) {
171171
let slice: &[i32] = &[0; 1024];
172172
b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
173173
}
174+
175+
#[bench]
176+
fn slice_cmp_generic(b: &mut Bencher) {
177+
#[derive(PartialEq, Clone, Copy)]
178+
struct Foo(u32, u32);
179+
180+
let left = [Foo(128, 128); 128];
181+
let right = [Foo(128, 128); 128];
182+
183+
b.iter(|| {
184+
let (left, right) = (black_box(&left), black_box(&right));
185+
left.as_slice() == right.as_slice()
186+
});
187+
}

library/core/src/slice/cmp.rs

+41-1
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,50 @@ where
6060
return false;
6161
}
6262

63-
self.iter().zip(other.iter()).all(|(x, y)| x == y)
63+
// at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
64+
if self.len() < 8 {
65+
return eq_small(self, other);
66+
}
67+
68+
eq_unroll(self, other)
6469
}
6570
}
6671

72+
#[inline]
73+
fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
74+
where
75+
A: PartialEq<B>,
76+
{
77+
a.iter().zip(b).all(|(a, b)| a == b)
78+
}
79+
80+
fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
81+
where
82+
A: PartialEq<B>,
83+
{
84+
let (mut chunks_a, residual_a) = a.as_chunks::<4>();
85+
let (mut chunks_b, residual_b) = b.as_chunks::<4>();
86+
let peeled_a = chunks_a.take_first().unwrap();
87+
let peeled_b = chunks_b.take_first().unwrap();
88+
89+
// peel the first chunk and do a short-circuiting comparison to bail early on mismatches
90+
// in case comparisons are expensive
91+
let mut result = eq_small(peeled_a, peeled_b);
92+
93+
// then check the residual, another chance to bail early
94+
result = result && eq_small(residual_a, residual_b);
95+
96+
// iter.all short-circuits which means the backend can't unroll the loop due to early exits.
97+
// So we unroll it manually.
98+
result = result
99+
&& chunks_a
100+
.iter()
101+
.zip(chunks_b)
102+
.all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
103+
104+
result
105+
}
106+
67107
// When each element can be compared byte-wise, we can compare all the bytes
68108
// from the whole size in one call to the intrinsics.
69109
impl<A, B> SlicePartialEq<B> for [A]

0 commit comments

Comments
 (0)