Skip to content

Commit 295dc5f

Browse files
committed
make NO_CARRY a const param now, instantiate fns for each # $limbs
1 parent dfd7c14 commit 295dc5f

File tree

5 files changed

+176
-135
lines changed

5 files changed

+176
-135
lines changed

ff/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ num-traits = { version = "0.2", default-features = false }
2323
rand = { version = "0.7", default-features = false }
2424
rayon = { version = "1", optional = true }
2525
zeroize = { version = "1", default-features = false, features = ["zeroize_derive"] }
26+
paste = "1.0.4"
2627

2728
[build-dependencies]
2829
rustc_version = "0.3"

ff/src/biginteger/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ pub struct BigInt<const N: usize>(pub [u64; N]);
2323

2424
impl<const N: usize> Default for BigInt<N> {
2525
fn default() -> Self {
26-
BigInt::<N>([0u64; N])
26+
Self([0u64; N])
2727
}
2828
}
2929

3030
impl<const N: usize> BigInt<N> {
3131
pub const fn new(value: [u64; N]) -> Self {
32-
BigInt::<N>(value)
32+
Self(value)
3333
}
3434
}
3535

ff/src/fields/arithmetic.rs

Lines changed: 64 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,28 @@
66
/// zero bit in the rest of the modulus.
77
macro_rules! impl_field_mul_assign {
88
($limbs:expr) => {
9-
#[inline]
10-
#[ark_ff_asm::unroll_for_loops]
11-
fn mul_assign(&mut self, other: &Self) {
12-
// Checking the modulus at compile time
13-
let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
14-
// $limbs can be 1, hence we can run into a case with an unused mut.
15-
#[allow(unused_mut)]
16-
let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
17-
for i in 1..$limbs {
18-
all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
19-
}
20-
let _no_carry: bool = !(first_bit_set || all_bits_set);
21-
22-
// No-carry optimisation applied to CIOS
23-
if _no_carry {
24-
#[cfg(use_asm)]
25-
#[allow(unsafe_code, unused_mut)]
26-
{
27-
// Tentatively avoid using assembly for `$limbs == 1`.
28-
if $limbs <= 6 && $limbs > 1 {
29-
assert!($limbs <= 6);
30-
ark_ff_asm::x86_64_asm_mul!($limbs, (self.0).0, (other.0).0);
31-
self.reduce();
32-
return;
33-
}
34-
}
9+
paste::paste! {
10+
#[inline]
11+
#[ark_ff_asm::unroll_for_loops]
12+
fn [<mul_assign _id $limbs>]<P: FpParams<N>, const N: usize>(
13+
input: &mut [u64; N],
14+
other: [u64; N],
15+
) {
3516
let mut r = [0u64; $limbs];
3617
let mut carry1 = 0u64;
3718
let mut carry2 = 0u64;
3819

3920
for i in 0..$limbs {
40-
r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
21+
r[0] = fa::mac(r[0], input[0], other[i], &mut carry1);
4122
let k = r[0].wrapping_mul(P::INV);
4223
fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
4324
for j in 1..$limbs {
44-
r[j] = mac_with_carry!(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
25+
r[j] = mac_with_carry!(r[j], input[j], other[i], &mut carry1);
4526
r[j - 1] = mac_with_carry!(r[j], k, P::MODULUS.0[j], &mut carry2);
4627
}
4728
r[$limbs - 1] = carry1 + carry2;
4829
}
49-
(self.0).0 = r;
50-
self.reduce();
51-
// Alternative implementation
52-
} else {
53-
*self = self.mul_without_reduce(other, P::MODULUS, P::INV);
54-
self.reduce();
30+
input.copy_from_slice(&r[..]);
5531
}
5632
}
5733
};
@@ -84,88 +60,67 @@ macro_rules! impl_field_into_repr {
8460

8561
macro_rules! impl_field_square_in_place {
8662
($limbs: expr) => {
87-
#[inline]
88-
// #[ark_ff_asm::unroll_for_loops]
89-
#[allow(unused_braces, clippy::absurd_extreme_comparisons)]
90-
fn square_in_place(&mut self) -> &mut Self {
91-
if $limbs == 1 {
92-
*self = *self * *self;
93-
return self;
94-
}
95-
#[cfg(use_asm)]
96-
#[allow(unsafe_code, unused_mut)]
97-
{
98-
// Checking the modulus at compile time
99-
let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
100-
let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
101-
for i in 1..$limbs {
102-
all_bits_set &= P::MODULUS.0[$limbs - i - 1] == core::u64::MAX;
103-
}
104-
let _no_carry: bool = !(first_bit_set || all_bits_set);
105-
106-
if $limbs <= 6 && _no_carry {
107-
assert!($limbs <= 6);
108-
ark_ff_asm::x86_64_asm_square!($limbs, (self.0).0);
109-
self.reduce();
110-
return self;
111-
}
112-
}
113-
let mut r = [[0u64; $limbs]; 2].concat();
114-
115-
let mut carry = 0;
116-
for i in 0..$limbs {
117-
if i < $limbs - 1 {
118-
for j in 0..$limbs {
119-
if j > i {
120-
r[i + j] =
121-
mac_with_carry!(r[i + j], (self.0).0[i], (self.0).0[j], &mut carry);
63+
paste::paste! {
64+
#[inline(always)]
65+
#[ark_ff_asm::unroll_for_loops]
66+
#[allow(unused_braces, clippy::absurd_extreme_comparisons)]
67+
fn [<square_in_place _id $limbs>]<P: FpParams<N>, const N: usize>(
68+
input: &mut [u64; N],
69+
) {
70+
let mut r = [0u64; $limbs * 2];
71+
let mut carry = 0;
72+
for i in 0..$limbs {
73+
if i < $limbs - 1 {
74+
for j in 0..$limbs {
75+
if j > i {
76+
r[i + j] =
77+
mac_with_carry!(r[i + j], input[i], input[j], &mut carry);
78+
}
12279
}
80+
r[$limbs + i] = carry;
81+
carry = 0;
12382
}
124-
r[$limbs + i] = carry;
125-
carry = 0;
12683
}
127-
}
128-
r[$limbs * 2 - 1] = r[$limbs * 2 - 2] >> 63;
129-
for i in 0..$limbs {
130-
// This computes `r[2 * ($limbs - 1) - (i + 1)]`, but additionally
131-
// handles the case where the index underflows.
132-
// Note that we should never hit this case because it only occurs
133-
// when `$limbs == 1`, but we handle that separately above.
134-
let subtractor = (2 * ($limbs - 1usize))
135-
.checked_sub(i + 1)
136-
.map(|index| r[index])
137-
.unwrap_or(0);
138-
r[2 * ($limbs - 1) - i] = (r[2 * ($limbs - 1) - i] << 1) | (subtractor >> 63);
139-
}
140-
for i in 3..$limbs {
141-
r[$limbs + 1 - i] = (r[$limbs + 1 - i] << 1) | (r[$limbs - i] >> 63);
142-
}
143-
r[1] <<= 1;
84+
r[$limbs * 2 - 1] = r[$limbs * 2 - 2] >> 63;
85+
for i in 0..$limbs {
86+
// This computes `r[2 * ($limbs - 1) - (i + 1)]`, but additionally
87+
// handles the case where the index underflows.
88+
// Note that we should never hit this case because it only occurs
89+
// when `$limbs == 1`, but we handle that separately above.
90+
let subtractor = (2 * ($limbs - 1usize))
91+
.checked_sub(i + 1)
92+
.map(|index| r[index])
93+
.unwrap_or(0);
94+
r[2 * ($limbs - 1) - i] = (r[2 * ($limbs - 1) - i] << 1) | (subtractor >> 63);
95+
}
96+
for i in 3..$limbs {
97+
r[$limbs + 1 - i] = (r[$limbs + 1 - i] << 1) | (r[$limbs - i] >> 63);
98+
}
99+
r[1] <<= 1;
144100

145-
for i in 0..$limbs {
146-
r[2 * i] = mac_with_carry!(r[2 * i], (self.0).0[i], (self.0).0[i], &mut carry);
147-
// need unused assignment because the last iteration of the loop produces an
148-
// assignment to `carry` that is unused.
149-
#[allow(unused_assignments)]
150-
{
151-
r[2 * i + 1] = adc!(r[2 * i + 1], 0, &mut carry);
101+
for i in 0..$limbs {
102+
r[2 * i] = mac_with_carry!(r[2 * i], input[i], input[i], &mut carry);
103+
// need unused assignment because the last iteration of the loop produces an
104+
// assignment to `carry` that is unused.
105+
#[allow(unused_assignments)]
106+
{
107+
r[2 * i + 1] = adc!(r[2 * i + 1], 0, &mut carry);
108+
}
152109
}
153-
}
154-
// Montgomery reduction
155-
let mut _carry2 = 0;
156-
for i in 0..$limbs {
157-
let k = r[i].wrapping_mul(P::INV);
158-
let mut carry = 0;
159-
mac_with_carry!(r[i], k, P::MODULUS.0[0], &mut carry);
160-
for j in 1..$limbs {
161-
r[j + i] = mac_with_carry!(r[j + i], k, P::MODULUS.0[j], &mut carry);
110+
// Montgomery reduction
111+
let mut _carry2 = 0;
112+
for i in 0..$limbs {
113+
let k = r[i].wrapping_mul(P::INV);
114+
let mut carry = 0;
115+
mac_with_carry!(r[i], k, P::MODULUS.0[0], &mut carry);
116+
for j in 1..$limbs {
117+
r[j + i] = mac_with_carry!(r[j + i], k, P::MODULUS.0[j], &mut carry);
118+
}
119+
r[$limbs + i] = adc!(r[$limbs + i], _carry2, &mut carry);
120+
_carry2 = carry;
162121
}
163-
r[$limbs + i] = adc!(r[$limbs + i], _carry2, &mut carry);
164-
_carry2 = carry;
122+
input.copy_from_slice(&r[N..]);
165123
}
166-
(self.0).0.copy_from_slice(&r[$limbs..]);
167-
self.reduce();
168-
self
169124
}
170125
};
171126
}

ff/src/fields/models/fp.rs

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,24 @@ use crate::{
1515
};
1616
use ark_serialize::*;
1717

18-
pub trait FpParams<const N: usize>: FpParameters<BigInt = BigInt<N>> {}
18+
invoke_16!(impl_field_square_in_place);
19+
invoke_16!(impl_field_mul_assign);
20+
21+
pub trait FpParams<const N: usize>: FpParameters<N, BigInt = BigInt<N>> {
22+
// Checking the modulus at compile time
23+
const NO_CARRY: bool = {
24+
let first_bit_set = Self::MODULUS.0[N - 1] >> 63 != 0;
25+
// $limbs can be 1, hence we can run into a case with an unused mut.
26+
#[allow(unused_mut)]
27+
let mut all_bits_set = Self::MODULUS.0[N - 1] == !0 - (1 << 63);
28+
let mut i = 1;
29+
while i < N {
30+
all_bits_set &= Self::MODULUS.0[N - i - 1] == !0u64;
31+
i += 1;
32+
}
33+
!(first_bit_set || all_bits_set)
34+
};
35+
}
1936

2037
#[derive(Derivative)]
2138
#[derivative(
@@ -70,7 +87,7 @@ impl<P, const N: usize> Fp<P, N> {
7087
modulus: BigInt<N>,
7188
inv: u64,
7289
) -> Self {
73-
let mut repr = BigInt::<N>([0; N]);
90+
let mut repr = P::BigInt([0; N]);
7491
let mut i = 0;
7592
while i < limbs.len() {
7693
repr.0[i] = limbs[i];
@@ -319,7 +336,29 @@ impl<P: FpParams<N>, const N: usize> Field for Fp<P, N> {
319336
temp
320337
}
321338

322-
impl_field_square_in_place!(N);
339+
#[inline]
340+
#[ark_ff_asm::unroll_for_loops]
341+
#[allow(unused_braces, clippy::absurd_extreme_comparisons)]
342+
fn square_in_place(&mut self) -> &mut Self {
343+
if N == 1 {
344+
*self = *self * *self;
345+
return self;
346+
}
347+
#[cfg(use_asm)]
348+
#[allow(unsafe_code, unused_mut)]
349+
{
350+
if N <= 6 && P::NO_CARRY {
351+
ark_ff_asm::x86_64_asm_square!($limbs, (self.0).0);
352+
self.reduce();
353+
return self;
354+
}
355+
}
356+
357+
let input = &mut (self.0).0;
358+
match_const!(square_in_place, N, input);
359+
self.reduce();
360+
self
361+
}
323362

324363
#[inline]
325364
fn inverse(&self) -> Option<Self> {
@@ -611,7 +650,7 @@ impl<P: FpParams<N>, const N: usize> ToBytes for Fp<P, N> {
611650
impl<P: FpParams<N>, const N: usize> FromBytes for Fp<P, N> {
612651
#[inline]
613652
fn read<R: Read>(reader: R) -> IoResult<Self> {
614-
BigInt::<N>::read(reader).and_then(|b| match Fp::<P, N>::from_repr(b) {
653+
P::BigInt::read(reader).and_then(|b| match Fp::<P, N>::from_repr(b) {
615654
Some(f) => Ok(f),
616655
None => Err(crate::error("FromBytes::read failed")),
617656
})
@@ -932,7 +971,30 @@ impl<'a, P: FpParams<N>, const N: usize> SubAssign<&'a Self> for Fp<P, N> {
932971
}
933972

934973
impl<'a, P: FpParams<N>, const N: usize> MulAssign<&'a Self> for Fp<P, N> {
935-
impl_field_mul_assign!(N);
974+
#[inline]
975+
#[ark_ff_asm::unroll_for_loops]
976+
fn mul_assign(&mut self, other: &Self) {
977+
// No-carry optimisation applied to CIOS
978+
if P::NO_CARRY {
979+
#[cfg(use_asm)]
980+
#[allow(unsafe_code, unused_mut)]
981+
{
982+
// Tentatively avoid using assembly for `$limbs == 1`.
983+
if N <= 6 && N > 1 {
984+
ark_ff_asm::x86_64_asm_mul!($limbs, input, other);
985+
self.reduce();
986+
return;
987+
}
988+
}
989+
let input = &mut (self.0).0;
990+
let other_ = (other.0).0;
991+
match_const!(mul_assign, N, input, other_);
992+
self.reduce();
993+
} else {
994+
*self = self.mul_without_reduce(other, P::MODULUS, P::INV);
995+
self.reduce();
996+
}
997+
}
936998
}
937999

9381000
impl<'a, P: FpParams<N>, const N: usize> DivAssign<&'a Self> for Fp<P, N> {

0 commit comments

Comments
 (0)