Move {widening, carrying}_mul to an intrinsic with fallback MIR

scottmcm · scottmcm · commit 401339262bf4 · 2024-11-29T23:10:26.000-08:00
Including implementing it for `u128`, so it can be defined in `uint_impl!`.

This way it works for all backends, including CTFE.
diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -94,6 +94,7 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -
         | sym::add_with_overflow
         | sym::sub_with_overflow
         | sym::mul_with_overflow
+        | sym::carrying_mul_add
         | sym::wrapping_add
         | sym::wrapping_sub
         | sym::wrapping_mul
@@ -436,6 +437,10 @@ pub fn check_intrinsic_type(
                 (1, 0, vec![param(0), param(0)], Ty::new_tup(tcx, &[param(0), tcx.types.bool]))
             }
 
+            sym::carrying_mul_add => {
+                (1, 0, vec![param(0); 4], Ty::new_tup(tcx, &[param(0), param(0)]))
+            }
+
             sym::ptr_guaranteed_cmp => (
                 1,
                 0,
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
@@ -530,6 +530,7 @@ symbols! {
         call_ref_future,
         caller_location,
         capture_disjoint_fields,
+        carrying_mul_add,
         catch_unwind,
         cause,
         cdylib,
diff --git a/library/core/src/intrinsics/fallback.rs b/library/core/src/intrinsics/fallback.rs
@@ -0,0 +1,111 @@
+#![unstable(
+    feature = "core_intrinsics_fallbacks",
+    reason = "The fallbacks will never be stable, as they exist only to be called \
+              by the fallback MIR, but they're exported so they can be tested on \
+              platforms where the fallback MIR isn't actually used",
+    issue = "none"
+)]
+#![allow(missing_docs)]
+
+use crate::panicking::panic_nounwind;
+
+/// Ideally we'd do fallbacks using ordinary trait impls, but that doesn't work
+/// for const (yet™) so we're stuck with hacky workarounds.
+#[inline]
+const fn try_as<T: 'static, F: Copy + 'static>(val: F) -> Option<T> {
+    if const { super::type_id::<T>() == super::type_id::<F>() } {
+        // SAFETY: just checked it's the same type
+        Some(unsafe { super::transmute_unchecked(val) })
+    } else {
+        None
+    }
+}
+
+macro_rules! if_the_types_work {
+    ($f:ident ( $a:expr )) => {
+        if let Some(arg) = try_as($a) {
+            if let Some(ret) = try_as($f(arg)) {
+                return ret;
+            }
+        }
+    };
+}
+
+#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+const fn wide_mul_u128(a: u128, b: u128) -> (u128, u128) {
+    const fn to_low_high(x: u128) -> [u64; 2] {
+        [x as u64, (x >> 64) as u64]
+    }
+    const fn from_low_high(x: [u64; 2]) -> u128 {
+        (x[0] as u128) | ((x[1] as u128) << 64)
+    }
+    #[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+    const fn scalar_mul(low_high: [u64; 2], k: u64) -> [u64; 3] {
+        let (x, c) = u64::widening_mul(k, low_high[0]);
+        let (y, z) = u64::carrying_mul(k, low_high[1], c);
+        [x, y, z]
+    }
+    let a = to_low_high(a);
+    let b = to_low_high(b);
+    let low = scalar_mul(a, b[0]);
+    let high = scalar_mul(a, b[1]);
+    let r0 = low[0];
+    let (r1, c) = u64::overflowing_add(low[1], high[0]);
+    let (r2, c) = u64::carrying_add(low[2], high[1], c);
+    let r3 = high[2] + (c as u64);
+    (from_low_high([r0, r1]), from_low_high([r2, r3]))
+}
+
+#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+#[inline]
+pub const fn carrying_mul_add<T: Copy + 'static>(a: T, b: T, c: T, d: T) -> (T, T) {
+    let args = (a, b, c, d);
+    macro_rules! via_wider_type {
+        ($narrow:ty => $wide:ty) => {{
+            #[inline]
+            const fn doit(
+                (a, b, c, d): ($narrow, $narrow, $narrow, $narrow),
+            ) -> ($narrow, $narrow) {
+                let (a, b, c, d) = (a as $wide, b as $wide, c as $wide, d as $wide);
+                let full = a * b + c + d;
+                (full as $narrow, (full >> <$narrow>::BITS) as $narrow)
+            }
+            if_the_types_work!(doit(args));
+        }};
+    }
+    via_wider_type!(u8 => u16);
+    via_wider_type!(u16 => u32);
+    via_wider_type!(u32 => u64);
+    via_wider_type!(u64 => u128);
+
+    #[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+    #[inline]
+    const fn for_usize((a, b, c, d): (usize, usize, usize, usize)) -> (usize, usize) {
+        #[cfg(target_pointer_width = "16")]
+        type T = u16;
+        #[cfg(target_pointer_width = "32")]
+        type T = u32;
+        #[cfg(target_pointer_width = "64")]
+        type T = u64;
+
+        let (x, y) = carrying_mul_add(a as T, b as T, c as T, d as T);
+        (x as usize, y as usize)
+    }
+    if_the_types_work!(for_usize(args));
+
+    #[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+    #[inline]
+    const fn carrying_mul_add_u128((a, b, c1, c2): (u128, u128, u128, u128)) -> (u128, u128) {
+        let (mut r1, mut r2) = wide_mul_u128(a, b);
+        let c;
+        (r1, c) = u128::overflowing_add(r1, c1);
+        r2 += c as u128;
+        let c;
+        (r1, c) = u128::overflowing_add(r1, c2);
+        r2 += c as u128;
+        (r1, r2)
+    }
+    if_the_types_work!(carrying_mul_add_u128(args));
+
+    panic_nounwind("Not supported for this generic type")
+}
diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs
@@ -68,6 +68,7 @@ use crate::marker::{DiscriminantKind, Tuple};
 use crate::mem::SizedTypeProperties;
 use crate::{ptr, ub_checks};
 
+pub mod fallback;
 pub mod mir;
 pub mod simd;
 
@@ -2939,6 +2940,31 @@ pub const fn mul_with_overflow<T: Copy>(_x: T, _y: T) -> (T, bool) {
     unimplemented!()
 }
 
+/// Performs full-width multiplication and addition with a carry:
+/// `multiplier * multiplicand + addend + carry`.
+///
+/// This is possible without any overflow:
+///    MAX * MAX + MAX + MAX
+/// => (2ⁿ-1) × (2ⁿ-1) + (2ⁿ-1) + (2ⁿ-1)
+/// => (2²ⁿ - 2ⁿ⁺¹ + 1) + (2ⁿ⁺¹ - 2)
+/// => 2²ⁿ - 1
+///
+/// This currently supports unsigned integers *only*, no signed ones.
+/// The stabilized versions of this intrinsic are available on integers.
+#[unstable(feature = "core_intrinsics", issue = "none")]
+#[rustc_const_unstable(feature = "const_carrying_mul_add", issue = "85532")]
+#[rustc_nounwind]
+#[cfg_attr(not(bootstrap), rustc_intrinsic)]
+#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
+pub const fn carrying_mul_add<T: Copy + 'static>(
+    multiplier: T,
+    multiplicand: T,
+    addend: T,
+    carry: T,
+) -> (T, T) {
+    fallback::carrying_mul_add(multiplier, multiplicand, addend, carry)
+}
+
 /// Performs an exact division, resulting in undefined behavior where
 /// `x % y != 0` or `y == 0` or `x == T::MIN && y == -1`
 ///
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
@@ -113,6 +113,7 @@
 #![feature(const_align_of_val_raw)]
 #![feature(const_alloc_layout)]
 #![feature(const_black_box)]
+#![feature(const_carrying_mul_add)]
 #![feature(const_eq_ignore_ascii_case)]
 #![feature(const_eval_select)]
 #![feature(const_heap)]
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
@@ -203,134 +203,6 @@ macro_rules! midpoint_impl {
     };
 }
 
-macro_rules! widening_impl {
-    ($SelfT:ty, $WideT:ty, $BITS:literal, unsigned) => {
-        /// Calculates the complete product `self * rhs` without the possibility to overflow.
-        ///
-        /// This returns the low-order (wrapping) bits and the high-order (overflow) bits
-        /// of the result as two separate values, in that order.
-        ///
-        /// If you also need to add a carry to the wide result, then you want
-        /// [`Self::carrying_mul`] instead.
-        ///
-        /// # Examples
-        ///
-        /// Basic usage:
-        ///
-        /// Please note that this example is shared between integer types.
-        /// Which explains why `u32` is used here.
-        ///
-        /// ```
-        /// #![feature(bigint_helper_methods)]
-        /// assert_eq!(5u32.widening_mul(2), (10, 0));
-        /// assert_eq!(1_000_000_000u32.widening_mul(10), (1410065408, 2));
-        /// ```
-        #[unstable(feature = "bigint_helper_methods", issue = "85532")]
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        #[inline]
-        pub const fn widening_mul(self, rhs: Self) -> (Self, Self) {
-            // note: longer-term this should be done via an intrinsic,
-            //   but for now we can deal without an impl for u128/i128
-            // SAFETY: overflow will be contained within the wider types
-            let wide = unsafe { (self as $WideT).unchecked_mul(rhs as $WideT) };
-            (wide as $SelfT, (wide >> $BITS) as $SelfT)
-        }
-
-        /// Calculates the "full multiplication" `self * rhs + carry`
-        /// without the possibility to overflow.
-        ///
-        /// This returns the low-order (wrapping) bits and the high-order (overflow) bits
-        /// of the result as two separate values, in that order.
-        ///
-        /// Performs "long multiplication" which takes in an extra amount to add, and may return an
-        /// additional amount of overflow. This allows for chaining together multiple
-        /// multiplications to create "big integers" which represent larger values.
-        ///
-        /// If you don't need the `carry`, then you can use [`Self::widening_mul`] instead.
-        ///
-        /// # Examples
-        ///
-        /// Basic usage:
-        ///
-        /// Please note that this example is shared between integer types.
-        /// Which explains why `u32` is used here.
-        ///
-        /// ```
-        /// #![feature(bigint_helper_methods)]
-        /// assert_eq!(5u32.carrying_mul(2, 0), (10, 0));
-        /// assert_eq!(5u32.carrying_mul(2, 10), (20, 0));
-        /// assert_eq!(1_000_000_000u32.carrying_mul(10, 0), (1410065408, 2));
-        /// assert_eq!(1_000_000_000u32.carrying_mul(10, 10), (1410065418, 2));
-        #[doc = concat!("assert_eq!(",
-            stringify!($SelfT), "::MAX.carrying_mul(", stringify!($SelfT), "::MAX, ", stringify!($SelfT), "::MAX), ",
-            "(0, ", stringify!($SelfT), "::MAX));"
-        )]
-        /// ```
-        ///
-        /// This is the core operation needed for scalar multiplication when
-        /// implementing it for wider-than-native types.
-        ///
-        /// ```
-        /// #![feature(bigint_helper_methods)]
-        /// fn scalar_mul_eq(little_endian_digits: &mut Vec<u16>, multiplicand: u16) {
-        ///     let mut carry = 0;
-        ///     for d in little_endian_digits.iter_mut() {
-        ///         (*d, carry) = d.carrying_mul(multiplicand, carry);
-        ///     }
-        ///     if carry != 0 {
-        ///         little_endian_digits.push(carry);
-        ///     }
-        /// }
-        ///
-        /// let mut v = vec![10, 20];
-        /// scalar_mul_eq(&mut v, 3);
-        /// assert_eq!(v, [30, 60]);
-        ///
-        /// assert_eq!(0x87654321_u64 * 0xFEED, 0x86D3D159E38D);
-        /// let mut v = vec![0x4321, 0x8765];
-        /// scalar_mul_eq(&mut v, 0xFEED);
-        /// assert_eq!(v, [0xE38D, 0xD159, 0x86D3]);
-        /// ```
-        ///
-        /// If `carry` is zero, this is similar to [`overflowing_mul`](Self::overflowing_mul),
-        /// except that it gives the value of the overflow instead of just whether one happened:
-        ///
-        /// ```
-        /// #![feature(bigint_helper_methods)]
-        /// let r = u8::carrying_mul(7, 13, 0);
-        /// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(7, 13));
-        /// let r = u8::carrying_mul(13, 42, 0);
-        /// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(13, 42));
-        /// ```
-        ///
-        /// The value of the first field in the returned tuple matches what you'd get
-        /// by combining the [`wrapping_mul`](Self::wrapping_mul) and
-        /// [`wrapping_add`](Self::wrapping_add) methods:
-        ///
-        /// ```
-        /// #![feature(bigint_helper_methods)]
-        /// assert_eq!(
-        ///     789_u16.carrying_mul(456, 123).0,
-        ///     789_u16.wrapping_mul(456).wrapping_add(123),
-        /// );
-        /// ```
-        #[unstable(feature = "bigint_helper_methods", issue = "85532")]
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        #[inline]
-        pub const fn carrying_mul(self, rhs: Self, carry: Self) -> (Self, Self) {
-            // note: longer-term this should be done via an intrinsic,
-            //   but for now we can deal without an impl for u128/i128
-            // SAFETY: overflow will be contained within the wider types
-            let wide = unsafe {
-                (self as $WideT).unchecked_mul(rhs as $WideT).unchecked_add(carry as $WideT)
-            };
-            (wide as $SelfT, (wide >> $BITS) as $SelfT)
-        }
-    };
-}
-
 impl i8 {
     int_impl! {
         Self = i8,
@@ -551,7 +423,6 @@ impl u8 {
         from_xe_bytes_doc = "",
         bound_condition = "",
     }
-    widening_impl! { u8, u16, 8, unsigned }
     midpoint_impl! { u8, u16, unsigned }
 
     /// Checks if the value is within the ASCII range.
@@ -1167,7 +1038,6 @@ impl u16 {
         from_xe_bytes_doc = "",
         bound_condition = "",
     }
-    widening_impl! { u16, u32, 16, unsigned }
     midpoint_impl! { u16, u32, unsigned }
 
     /// Checks if the value is a Unicode surrogate code point, which are disallowed values for [`char`].
@@ -1215,7 +1085,6 @@ impl u32 {
         from_xe_bytes_doc = "",
         bound_condition = "",
     }
-    widening_impl! { u32, u64, 32, unsigned }
     midpoint_impl! { u32, u64, unsigned }
 }
 
@@ -1239,7 +1108,6 @@ impl u64 {
         from_xe_bytes_doc = "",
         bound_condition = "",
     }
-    widening_impl! { u64, u128, 64, unsigned }
     midpoint_impl! { u64, u128, unsigned }
 }
 
@@ -1289,7 +1157,6 @@ impl usize {
         from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
         bound_condition = " on 16-bit targets",
     }
-    widening_impl! { usize, u32, 16, unsigned }
     midpoint_impl! { usize, u32, unsigned }
 }
 
@@ -1314,7 +1181,6 @@ impl usize {
         from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
         bound_condition = " on 32-bit targets",
     }
-    widening_impl! { usize, u64, 32, unsigned }
     midpoint_impl! { usize, u64, unsigned }
 }
 
@@ -1339,7 +1205,6 @@ impl usize {
         from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
         bound_condition = " on 64-bit targets",
     }
-    widening_impl! { usize, u128, 64, unsigned }
     midpoint_impl! { usize, u128, unsigned }
 }
 
diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs
diff --git a/library/core/tests/intrinsics.rs b/library/core/tests/intrinsics.rs
diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs