handcode the loops so LLVM has to chew less IR

the8472 · the8472 · commit ef4600dc88eb · 2023-10-05T14:39:17.000+02:00
diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
@@ -61,48 +61,40 @@ where
             return false;
         }
 
-        // at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
-        if self.len() < 8 {
-            return eq_small(self, other);
+        // ZSTs have no identity and slices don't guarantee which addresses-to-ZSTs they produce
+        // so we only need to compare them once to determine the behavior of the PartialEq impl
+        if const { mem::size_of::<A>() == 0 && mem::size_of::<B>() == 0 } {
+            // zero-length slices are always equal
+            if self.len() == 0 {
+                return true;
+            }
+            // SAFETY: A and B are ZSTs so it's ok to conjure them out of thin air
+            return unsafe { mem::zeroed::<A>() == mem::zeroed::<B>() };
         }
 
-        eq_unroll(self, other)
-    }
-}
-
-#[inline]
-fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
-where
-    A: PartialEq<B>,
-{
-    a.iter().zip(b).all(|(a, b)| a == b)
-}
+        const UNROLL: usize = 4;
+        let mut i = 0;
+        let mut is_eq = true;
+        while i + UNROLL < self.len() && is_eq {
+            // SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
+            unsafe {
+                is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
+                is_eq = is_eq & (self.get_unchecked(i + 1) == other.get_unchecked(i + 1));
+                is_eq = is_eq & (self.get_unchecked(i + 2) == other.get_unchecked(i + 2));
+                is_eq = is_eq & (self.get_unchecked(i + 3) == other.get_unchecked(i + 3));
+                i = i.unchecked_add(UNROLL);
+            }
+        }
+        while i < self.len() && is_eq {
+            // SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
+            unsafe {
+                is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
+                i = i.unchecked_add(1);
+            }
+        }
 
-fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
-where
-    A: PartialEq<B>,
-{
-    let (mut chunks_a, residual_a) = a.as_chunks::<4>();
-    let (mut chunks_b, residual_b) = b.as_chunks::<4>();
-    let peeled_a = chunks_a.take_first().unwrap();
-    let peeled_b = chunks_b.take_first().unwrap();
-
-    // peel the first chunk and do a short-circuiting comparison to bail early on mismatches
-    // in case comparisons are expensive
-    let mut result = eq_small(peeled_a, peeled_b);
-
-    // then check the residual, another chance to bail early
-    result = result && eq_small(residual_a, residual_b);
-
-    // iter.all short-circuits which means the backend can't unroll the loop due to early exits.
-    // So we unroll it manually.
-    result = result
-        && chunks_a
-            .iter()
-            .zip(chunks_b)
-            .all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
-
-    result
+        is_eq
+    }
 }
 
 // When each element can be compared byte-wise, we can compare all the bytes