Skip to content

Commit ef4600d

Browse files
committed
handcode the loops so LLVM has to chew less IR
1 parent 2f78bce commit ef4600d

File tree

1 file changed

+31
-39
lines changed

1 file changed

+31
-39
lines changed

library/core/src/slice/cmp.rs

+31-39
Original file line numberDiff line numberDiff line change
@@ -61,48 +61,40 @@ where
6161
return false;
6262
}
6363

64-
// at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
65-
if self.len() < 8 {
66-
return eq_small(self, other);
64+
// ZSTs have no identity and slices don't guarantee which addresses-to-ZSTs they produce
65+
// so we only need to compare them once to determine the behavior of the PartialEq impl
66+
if const { mem::size_of::<A>() == 0 && mem::size_of::<B>() == 0 } {
67+
// zero-length slices are always equal
68+
if self.len() == 0 {
69+
return true;
70+
}
71+
// SAFETY: A and B are ZSTs so it's ok to conjure them out of thin air
72+
return unsafe { mem::zeroed::<A>() == mem::zeroed::<B>() };
6773
}
6874

69-
eq_unroll(self, other)
70-
}
71-
}
72-
73-
#[inline]
74-
fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
75-
where
76-
A: PartialEq<B>,
77-
{
78-
a.iter().zip(b).all(|(a, b)| a == b)
79-
}
75+
const UNROLL: usize = 4;
76+
let mut i = 0;
77+
let mut is_eq = true;
78+
while i + UNROLL < self.len() && is_eq {
79+
// SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
80+
unsafe {
81+
is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
82+
is_eq = is_eq & (self.get_unchecked(i + 1) == other.get_unchecked(i + 1));
83+
is_eq = is_eq & (self.get_unchecked(i + 2) == other.get_unchecked(i + 2));
84+
is_eq = is_eq & (self.get_unchecked(i + 3) == other.get_unchecked(i + 3));
85+
i = i.unchecked_add(UNROLL);
86+
}
87+
}
88+
while i < self.len() && is_eq {
89+
// SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
90+
unsafe {
91+
is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
92+
i = i.unchecked_add(1);
93+
}
94+
}
8095

81-
fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
82-
where
83-
A: PartialEq<B>,
84-
{
85-
let (mut chunks_a, residual_a) = a.as_chunks::<4>();
86-
let (mut chunks_b, residual_b) = b.as_chunks::<4>();
87-
let peeled_a = chunks_a.take_first().unwrap();
88-
let peeled_b = chunks_b.take_first().unwrap();
89-
90-
// peel the first chunk and do a short-circuiting comparison to bail early on mismatches
91-
// in case comparisons are expensive
92-
let mut result = eq_small(peeled_a, peeled_b);
93-
94-
// then check the residual, another chance to bail early
95-
result = result && eq_small(residual_a, residual_b);
96-
97-
// iter.all short-circuits which means the backend can't unroll the loop due to early exits.
98-
// So we unroll it manually.
99-
result = result
100-
&& chunks_a
101-
.iter()
102-
.zip(chunks_b)
103-
.all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
104-
105-
result
96+
is_eq
97+
}
10698
}
10799

108100
// When each element can be compared byte-wise, we can compare all the bytes

0 commit comments

Comments
 (0)