Skip to content

Commit e18adb3

Browse files
authored
Try #236:
2 parents e77ffac + 83e8d2b commit e18adb3

File tree

5 files changed

+59
-6
lines changed

5 files changed

+59
-6
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
1.36.0, # alloc, rand
1919
1.40.0, # arbitrary
2020
1.46.0, # quickcheck
21+
1.59.0, # asm!
2122
stable,
2223
beta,
2324
nightly

bors.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ status = [
44
"Test (1.36.0)",
55
"Test (1.40.0)",
66
"Test (1.46.0)",
7+
"Test (1.59.0)",
78
"Test (stable)",
89
"Test (beta)",
910
"Test (nightly)",

build.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ fn main() {
1010
if u64_digit {
1111
autocfg::emit("u64_digit");
1212
}
13+
1314
let ac = autocfg::new();
1415
let std = if ac.probe_sysroot_crate("std") {
1516
"std"
@@ -28,6 +29,10 @@ fn main() {
2829
if ac.probe_path(&addcarry) {
2930
autocfg::emit("use_addcarry");
3031
}
32+
33+
if ac.probe_path(&format!("{}::arch::asm", std)) {
34+
autocfg::emit("use_x86_div");
35+
}
3136
}
3237
}
3338

src/biguint/convert.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -657,16 +657,23 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {
657657

658658
let mut digits = u.clone();
659659

660-
let (base, power) = get_radix_base(radix, big_digit::HALF_BITS);
660+
// X86 DIV can quickly divide by a full digit, otherwise we choose a divisor
661+
// that's suitable for `div_half` to avoid slow `DoubleBigDigit` division.
662+
let bits = if cfg!(use_x86_div) {
663+
big_digit::BITS
664+
} else {
665+
big_digit::HALF_BITS
666+
};
667+
let (base, power) = get_radix_base(radix, bits);
661668
let radix = radix as BigDigit;
662669

663670
// For very large numbers, the O(n²) loop of repeated `div_rem_digit` dominates the
664671
// performance. We can mitigate this by dividing into chunks of a larger base first.
665672
// The threshold for this was chosen by anecdotal performance measurements to
666673
// approximate where this starts to make a noticeable difference.
667674
if digits.data.len() >= 64 {
668-
let mut big_base = BigUint::from(base * base);
669-
let mut big_power = 2usize;
675+
let mut big_base = BigUint::from(base);
676+
let mut big_power = 1usize;
670677

671678
// Choose a target base length near √n.
672679
let target_len = digits.data.len().sqrt();

src/biguint/division.rs

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use num_traits::{CheckedDiv, One, ToPrimitive, Zero};
1818
/// This is _not_ true for an arbitrary numerator/denominator.
1919
///
2020
/// (This function also matches what the x86 divide instruction does).
21+
#[cfg(not(use_x86_div))]
2122
#[inline]
2223
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
2324
debug_assert!(hi < divisor);
@@ -27,6 +28,44 @@ fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigi
2728
((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
2829
}
2930

31+
/// With Rust 1.59+ for stable `asm!`, x86 and x86_64 can use a real `div` instruction.
32+
#[cfg(use_x86_div)]
33+
#[inline]
34+
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
35+
// This debug assertion covers the potential #DE for divisor==0 or a quotient too large for one
36+
// register, otherwise in release mode it will become a target-specific fault like SIGFPE.
37+
// This should never occur with the inputs from our few `div_wide` callers.
38+
debug_assert!(hi < divisor);
39+
40+
// SAFETY: The `div` instruction only affects registers, reading the explicit operand as the
41+
// divisor, and implicitly reading RDX:RAX or EDX:EAX as the dividend. The result is implicitly
42+
// written back to RAX or EAX for the quotient and RDX or EDX for the remainder. No memory is
43+
// used, and flags are not preserved.
44+
unsafe {
45+
let (div, rem);
46+
47+
#[cfg(u64_digit)]
48+
core::arch::asm!(
49+
"div {:r}",
50+
in(reg) divisor,
51+
inout("rdx") hi => rem,
52+
inout("rax") lo => div,
53+
options(pure, nomem, nostack),
54+
);
55+
56+
#[cfg(not(u64_digit))]
57+
core::arch::asm!(
58+
"div {:e}",
59+
in(reg) divisor,
60+
inout("edx") hi => rem,
61+
inout("eax") lo => div,
62+
options(pure, nomem, nostack),
63+
);
64+
65+
(div, rem)
66+
}
67+
}
68+
3069
/// For small divisors, we can divide without promoting to `DoubleBigDigit` by
3170
/// using half-size pieces of digit, like long-division.
3271
#[inline]
@@ -47,7 +86,7 @@ pub(super) fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit)
4786

4887
let mut rem = 0;
4988

50-
if b <= big_digit::HALF {
89+
if !cfg!(use_x86_div) && b <= big_digit::HALF {
5190
for d in a.data.iter_mut().rev() {
5291
let (q, r) = div_half(rem, *d, b);
5392
*d = q;
@@ -72,7 +111,7 @@ fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit {
72111

73112
let mut rem = 0;
74113

75-
if b <= big_digit::HALF {
114+
if !cfg!(use_x86_div) && b <= big_digit::HALF {
76115
for &digit in a.data.iter().rev() {
77116
let (_, r) = div_half(rem, digit, b);
78117
rem = r;
@@ -232,7 +271,7 @@ fn div_rem_core(mut a: BigUint, b: &[BigDigit]) -> (BigUint, BigUint) {
232271
let mut a0 = 0;
233272

234273
// [b1, b0] are the two most significant digits of the divisor. They never change.
235-
let b0 = *b.last().unwrap();
274+
let b0 = b[b.len() - 1];
236275
let b1 = b[b.len() - 2];
237276

238277
let q_len = a.data.len() - b.len() + 1;

0 commit comments

Comments
 (0)