|
6 | 6 | /// zero bit in the rest of the modulus.
|
7 | 7 | macro_rules! impl_field_mul_assign {
|
8 | 8 | ($limbs:expr) => {
|
9 |
| - #[inline] |
10 |
| - #[ark_ff_asm::unroll_for_loops] |
11 |
| - fn mul_assign(&mut self, other: &Self) { |
12 |
| - // Checking the modulus at compile time |
13 |
| - let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0; |
14 |
| - // $limbs can be 1, hence we can run into a case with an unused mut. |
15 |
| - #[allow(unused_mut)] |
16 |
| - let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63); |
17 |
| - for i in 1..$limbs { |
18 |
| - all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64; |
19 |
| - } |
20 |
| - let _no_carry: bool = !(first_bit_set || all_bits_set); |
21 |
| - |
22 |
| - // No-carry optimisation applied to CIOS |
23 |
| - if _no_carry { |
24 |
| - #[cfg(use_asm)] |
25 |
| - #[allow(unsafe_code, unused_mut)] |
26 |
| - { |
27 |
| - // Tentatively avoid using assembly for `$limbs == 1`. |
28 |
| - if $limbs <= 6 && $limbs > 1 { |
29 |
| - assert!($limbs <= 6); |
30 |
| - ark_ff_asm::x86_64_asm_mul!($limbs, (self.0).0, (other.0).0); |
31 |
| - self.reduce(); |
32 |
| - return; |
33 |
| - } |
34 |
| - } |
| 9 | + paste::paste! { |
| 10 | + #[inline] |
| 11 | + #[ark_ff_asm::unroll_for_loops] |
| 12 | + fn [<mul_assign _id $limbs>]<P: FpParams<N>, const N: usize>( |
| 13 | + input: &mut [u64; N], |
| 14 | + other: [u64; N], |
| 15 | + ) { |
35 | 16 | let mut r = [0u64; $limbs];
|
36 | 17 | let mut carry1 = 0u64;
|
37 | 18 | let mut carry2 = 0u64;
|
38 | 19 |
|
39 | 20 | for i in 0..$limbs {
|
40 |
| - r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1); |
| 21 | + r[0] = fa::mac(r[0], input[0], other[i], &mut carry1); |
41 | 22 | let k = r[0].wrapping_mul(P::INV);
|
42 | 23 | fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
|
43 | 24 | for j in 1..$limbs {
|
44 |
| - r[j] = mac_with_carry!(r[j], (self.0).0[j], (other.0).0[i], &mut carry1); |
| 25 | + r[j] = mac_with_carry!(r[j], input[j], other[i], &mut carry1); |
45 | 26 | r[j - 1] = mac_with_carry!(r[j], k, P::MODULUS.0[j], &mut carry2);
|
46 | 27 | }
|
47 | 28 | r[$limbs - 1] = carry1 + carry2;
|
48 | 29 | }
|
49 |
| - (self.0).0 = r; |
50 |
| - self.reduce(); |
51 |
| - // Alternative implementation |
52 |
| - } else { |
53 |
| - *self = self.mul_without_reduce(other, P::MODULUS, P::INV); |
54 |
| - self.reduce(); |
| 30 | + input.copy_from_slice(&r[..]); |
55 | 31 | }
|
56 | 32 | }
|
57 | 33 | };
|
@@ -84,88 +60,67 @@ macro_rules! impl_field_into_repr {
|
84 | 60 |
|
85 | 61 | macro_rules! impl_field_square_in_place {
|
86 | 62 | ($limbs: expr) => {
|
87 |
| - #[inline] |
88 |
| - // #[ark_ff_asm::unroll_for_loops] |
89 |
| - #[allow(unused_braces, clippy::absurd_extreme_comparisons)] |
90 |
| - fn square_in_place(&mut self) -> &mut Self { |
91 |
| - if $limbs == 1 { |
92 |
| - *self = *self * *self; |
93 |
| - return self; |
94 |
| - } |
95 |
| - #[cfg(use_asm)] |
96 |
| - #[allow(unsafe_code, unused_mut)] |
97 |
| - { |
98 |
| - // Checking the modulus at compile time |
99 |
| - let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0; |
100 |
| - let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63); |
101 |
| - for i in 1..$limbs { |
102 |
| - all_bits_set &= P::MODULUS.0[$limbs - i - 1] == core::u64::MAX; |
103 |
| - } |
104 |
| - let _no_carry: bool = !(first_bit_set || all_bits_set); |
105 |
| - |
106 |
| - if $limbs <= 6 && _no_carry { |
107 |
| - assert!($limbs <= 6); |
108 |
| - ark_ff_asm::x86_64_asm_square!($limbs, (self.0).0); |
109 |
| - self.reduce(); |
110 |
| - return self; |
111 |
| - } |
112 |
| - } |
113 |
| - let mut r = [[0u64; $limbs]; 2].concat(); |
114 |
| - |
115 |
| - let mut carry = 0; |
116 |
| - for i in 0..$limbs { |
117 |
| - if i < $limbs - 1 { |
118 |
| - for j in 0..$limbs { |
119 |
| - if j > i { |
120 |
| - r[i + j] = |
121 |
| - mac_with_carry!(r[i + j], (self.0).0[i], (self.0).0[j], &mut carry); |
| 63 | + paste::paste! { |
| 64 | + #[inline(always)] |
| 65 | + #[ark_ff_asm::unroll_for_loops] |
| 66 | + #[allow(unused_braces, clippy::absurd_extreme_comparisons)] |
| 67 | + fn [<square_in_place _id $limbs>]<P: FpParams<N>, const N: usize>( |
| 68 | + input: &mut [u64; N], |
| 69 | + ) { |
| 70 | + let mut r = [0u64; $limbs * 2]; |
| 71 | + let mut carry = 0; |
| 72 | + for i in 0..$limbs { |
| 73 | + if i < $limbs - 1 { |
| 74 | + for j in 0..$limbs { |
| 75 | + if j > i { |
| 76 | + r[i + j] = |
| 77 | + mac_with_carry!(r[i + j], input[i], input[j], &mut carry); |
| 78 | + } |
122 | 79 | }
|
| 80 | + r[$limbs + i] = carry; |
| 81 | + carry = 0; |
123 | 82 | }
|
124 |
| - r[$limbs + i] = carry; |
125 |
| - carry = 0; |
126 | 83 | }
|
127 |
| - } |
128 |
| - r[$limbs * 2 - 1] = r[$limbs * 2 - 2] >> 63; |
129 |
| - for i in 0..$limbs { |
130 |
| - // This computes `r[2 * ($limbs - 1) - (i + 1)]`, but additionally |
131 |
| - // handles the case where the index underflows. |
132 |
| - // Note that we should never hit this case because it only occurs |
133 |
| - // when `$limbs == 1`, but we handle that separately above. |
134 |
| - let subtractor = (2 * ($limbs - 1usize)) |
135 |
| - .checked_sub(i + 1) |
136 |
| - .map(|index| r[index]) |
137 |
| - .unwrap_or(0); |
138 |
| - r[2 * ($limbs - 1) - i] = (r[2 * ($limbs - 1) - i] << 1) | (subtractor >> 63); |
139 |
| - } |
140 |
| - for i in 3..$limbs { |
141 |
| - r[$limbs + 1 - i] = (r[$limbs + 1 - i] << 1) | (r[$limbs - i] >> 63); |
142 |
| - } |
143 |
| - r[1] <<= 1; |
| 84 | + r[$limbs * 2 - 1] = r[$limbs * 2 - 2] >> 63; |
| 85 | + for i in 0..$limbs { |
| 86 | + // This computes `r[2 * ($limbs - 1) - (i + 1)]`, but additionally |
| 87 | + // handles the case where the index underflows. |
| 88 | + // Note that we should never hit this case because it only occurs |
| 89 | + // when `$limbs == 1`, but we handle that separately above. |
| 90 | + let subtractor = (2 * ($limbs - 1usize)) |
| 91 | + .checked_sub(i + 1) |
| 92 | + .map(|index| r[index]) |
| 93 | + .unwrap_or(0); |
| 94 | + r[2 * ($limbs - 1) - i] = (r[2 * ($limbs - 1) - i] << 1) | (subtractor >> 63); |
| 95 | + } |
| 96 | + for i in 3..$limbs { |
| 97 | + r[$limbs + 1 - i] = (r[$limbs + 1 - i] << 1) | (r[$limbs - i] >> 63); |
| 98 | + } |
| 99 | + r[1] <<= 1; |
144 | 100 |
|
145 |
| - for i in 0..$limbs { |
146 |
| - r[2 * i] = mac_with_carry!(r[2 * i], (self.0).0[i], (self.0).0[i], &mut carry); |
147 |
| - // need unused assignment because the last iteration of the loop produces an |
148 |
| - // assignment to `carry` that is unused. |
149 |
| - #[allow(unused_assignments)] |
150 |
| - { |
151 |
| - r[2 * i + 1] = adc!(r[2 * i + 1], 0, &mut carry); |
| 101 | + for i in 0..$limbs { |
| 102 | + r[2 * i] = mac_with_carry!(r[2 * i], input[i], input[i], &mut carry); |
| 103 | + // need unused assignment because the last iteration of the loop produces an |
| 104 | + // assignment to `carry` that is unused. |
| 105 | + #[allow(unused_assignments)] |
| 106 | + { |
| 107 | + r[2 * i + 1] = adc!(r[2 * i + 1], 0, &mut carry); |
| 108 | + } |
152 | 109 | }
|
153 |
| - } |
154 |
| - // Montgomery reduction |
155 |
| - let mut _carry2 = 0; |
156 |
| - for i in 0..$limbs { |
157 |
| - let k = r[i].wrapping_mul(P::INV); |
158 |
| - let mut carry = 0; |
159 |
| - mac_with_carry!(r[i], k, P::MODULUS.0[0], &mut carry); |
160 |
| - for j in 1..$limbs { |
161 |
| - r[j + i] = mac_with_carry!(r[j + i], k, P::MODULUS.0[j], &mut carry); |
| 110 | + // Montgomery reduction |
| 111 | + let mut _carry2 = 0; |
| 112 | + for i in 0..$limbs { |
| 113 | + let k = r[i].wrapping_mul(P::INV); |
| 114 | + let mut carry = 0; |
| 115 | + mac_with_carry!(r[i], k, P::MODULUS.0[0], &mut carry); |
| 116 | + for j in 1..$limbs { |
| 117 | + r[j + i] = mac_with_carry!(r[j + i], k, P::MODULUS.0[j], &mut carry); |
| 118 | + } |
| 119 | + r[$limbs + i] = adc!(r[$limbs + i], _carry2, &mut carry); |
| 120 | + _carry2 = carry; |
162 | 121 | }
|
163 |
| - r[$limbs + i] = adc!(r[$limbs + i], _carry2, &mut carry); |
164 |
| - _carry2 = carry; |
| 122 | + input.copy_from_slice(&r[N..]); |
165 | 123 | }
|
166 |
| - (self.0).0.copy_from_slice(&r[$limbs..]); |
167 |
| - self.reduce(); |
168 |
| - self |
169 | 124 | }
|
170 | 125 | };
|
171 | 126 | }
|
|
0 commit comments