From 6fd13c1374905e3d30323ca0473223236eed7020 Mon Sep 17 00:00:00 2001 From: kilic Date: Tue, 23 Jan 2024 11:53:36 +0300 Subject: [PATCH 1/6] impl msm with batch addition --- src/msm.rs | 482 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 317 insertions(+), 165 deletions(-) diff --git a/src/msm.rs b/src/msm.rs index ae964cf7..dae94393 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -1,8 +1,10 @@ use std::ops::Neg; +use crate::CurveAffine; +use ff::Field; use ff::PrimeField; use group::Group; -use pasta_curves::arithmetic::CurveAffine; +use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator}; fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { // Booth encoding: @@ -48,135 +50,302 @@ fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { } } -pub fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); +fn batch_add( + size: usize, + buckets: &mut [BucketAffine], + points: &[SchedulePoint], + bases: &[Affine], +) { + let mut t = vec![C::Base::ZERO; size]; + let mut z = vec![C::Base::ZERO; size]; + let mut acc = C::Base::ONE; + + for ( + ( + SchedulePoint { + base_idx, + buck_idx, + sign, + }, + t, + ), + z, + ) in points.iter().zip(t.iter_mut()).zip(z.iter_mut()) + { + *z = buckets[*buck_idx].x() - bases[*base_idx].x; + if *sign { + *t = acc * (buckets[*buck_idx].y() - bases[*base_idx].y); + } else { + *t = acc * (buckets[*buck_idx].y() + bases[*base_idx].y); + } + acc *= *z; + } - let c = if bases.len() < 4 { - 1 - } else if bases.len() < 32 { - 3 - } else { - (f64::from(bases.len() as u32)).ln().ceil() as usize - }; + acc = acc.invert().expect(":("); + + for ( + ( + SchedulePoint { + base_idx, + buck_idx, + sign, + }, + t, + ), + z, + ) in points.iter().zip(t.iter()).zip(z.iter()).rev() + { + let lambda = acc * t; + acc *= z; + + let x = lambda.square() - (buckets[*buck_idx].x() + bases[*base_idx].x); + if *sign { + buckets[*buck_idx].set_y(&((lambda * (bases[*base_idx].x - x)) - bases[*base_idx].y)); + } else { + buckets[*buck_idx].set_y(&((lambda * (bases[*base_idx].x - x)) + bases[*base_idx].y)); + } + buckets[*buck_idx].set_x(&x); + } +} - let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; +#[derive(Debug, Clone, Copy)] +struct Affine { + x: C::Base, + y: C::Base, +} - for current_window in (0..number_of_windows).rev() { - for _ in 0..c { - *acc = acc.double(); +impl Affine { + fn from(point: &C) -> Self { + let coords = point.coordinates().unwrap(); + + Self { + x: *coords.x(), + y: *coords.y(), } + } - #[derive(Clone, Copy)] - enum Bucket { - None, - Affine(C), - Projective(C::Curve), + fn neg(&self) -> Self { + Self { + x: self.x, + y: -self.y, } + } - impl Bucket { - fn add_assign(&mut self, other: &C) { - *self = match *self { - Bucket::None => Bucket::Affine(*other), - Bucket::Affine(a) => Bucket::Projective(a + *other), - Bucket::Projective(mut a) => { - a += *other; - Bucket::Projective(a) - } - } - } + fn eval(&self) -> C { + C::from_xy(self.x, self.y).unwrap() + } +} - fn add(self, mut other: C::Curve) -> C::Curve { - match self { - Bucket::None => other, - Bucket::Affine(a) => { - other += a; - other - } - Bucket::Projective(a) => other + a, +#[derive(Debug, Clone)] +enum BucketAffine { + None, + Point(Affine), +} + +#[derive(Debug, Clone)] +enum Bucket { + None, + Point(C::Curve), +} + +impl Bucket { + fn add_assign(&mut self, point: &C, sign: bool) { + *self = match *self { + Bucket::None => Bucket::Point({ + if sign { + point.to_curve() + } else { + point.to_curve().neg() + } + }), + Bucket::Point(a) => { + if sign { + Self::Point(a + point) + } else { + Self::Point(a - point) } } } + } - let mut buckets: Vec> = vec![Bucket::None; 1 << (c - 1)]; + fn add(&self, other: &BucketAffine) -> C::Curve { + match (self, other) { + (Self::Point(this), BucketAffine::Point(other)) => *this + other.eval(), + (Self::Point(this), BucketAffine::None) => *this, + (Self::None, BucketAffine::Point(other)) => other.eval().to_curve(), + (Self::None, BucketAffine::None) => C::Curve::identity(), + } + } +} - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = get_booth_index(current_window, c, coeff.as_ref()); - if coeff.is_positive() { - buckets[coeff as usize - 1].add_assign(base); - } - if coeff.is_negative() { - buckets[coeff.unsigned_abs() as usize - 1].add_assign(&base.neg()); +impl BucketAffine { + fn assign(&mut self, point: &Affine, sign: bool) -> bool { + match *self { + Self::None => { + *self = Self::Point(if sign { *point } else { point.neg() }); + true } + Self::Point(_) => false, } + } - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut running_sum = C::Curve::identity(); - for exp in buckets.into_iter().rev() { - running_sum = exp.add(running_sum); - *acc += &running_sum; + fn x(&self) -> C::Base { + match self { + Self::None => panic!("::x None"), + Self::Point(a) => a.x, + } + } + + fn y(&self) -> C::Base { + match self { + Self::None => panic!("::y None"), + Self::Point(a) => a.y, + } + } + + fn set_x(&mut self, x: &C::Base) { + match self { + Self::None => panic!("::set_x None"), + Self::Point(ref mut a) => a.x = *x, + } + } + + fn set_y(&mut self, y: &C::Base) { + match self { + Self::None => panic!("::set_y None"), + Self::Point(ref mut a) => a.y = *y, } } } -/// Performs a small multi-exponentiation operation. -/// Uses the double-and-add algorithm with doublings shared across points. -pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - let mut acc = C::Curve::identity(); - - // for byte idx - for byte_idx in (0..32).rev() { - // for bit idx - for bit_idx in (0..8).rev() { - acc = acc.double(); - // for each coeff - for coeff_idx in 0..coeffs.len() { - let byte = coeffs[coeff_idx].as_ref()[byte_idx]; - if ((byte >> bit_idx) & 1) != 0 { - acc += bases[coeff_idx]; - } - } +struct Schedule { + buckets: Vec>, + set: Vec, + ptr: usize, +} + +#[derive(Debug, Clone, Default)] +struct SchedulePoint { + base_idx: usize, + buck_idx: usize, + sign: bool, +} + +impl SchedulePoint { + fn new(base_idx: usize, buck_idx: usize, sign: bool) -> Self { + Self { + base_idx, + buck_idx, + sign, + } + } +} + +impl Schedule { + fn new(batch_size: usize, c: usize) -> Self { + Self { + buckets: vec![BucketAffine::None; 1 << (c - 1)], + set: vec![SchedulePoint::default(); batch_size], + ptr: 0, + } + } + + fn contains(&self, buck_idx: usize) -> bool { + self.set + .iter() + .position(|sch| sch.buck_idx == buck_idx) + .is_some() + } + + fn execute(&mut self, bases: &[Affine]) { + if self.ptr != 0 { + batch_add(self.ptr, &mut self.buckets, &self.set, bases); + self.ptr = 0; + self.set + .iter_mut() + .for_each(|sch| *sch = SchedulePoint::default()); } } - acc + fn add(&mut self, bases: &[Affine], base_idx: usize, buck_idx: usize, sign: bool) { + if !self.buckets[buck_idx].assign(&bases[base_idx], sign) { + self.set[self.ptr] = SchedulePoint::new(base_idx, buck_idx, sign); + self.ptr += 1; + } + + if self.ptr == self.set.len() { + self.execute(bases); + } + } } -/// Performs a multi-exponentiation operation. -/// -/// This function will panic if coeffs and bases have a different length. -/// -/// This will use multithreading if beneficial. pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - assert_eq!(coeffs.len(), bases.len()); - - let num_threads = rayon::current_num_threads(); - if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - rayon::scope(|scope| { - let chunk = coeffs.len() / num_threads; + // TODO: consider adjusting it with emprical data? + let batch_size = 64; - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); - }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) + // TODO: consider adjusting it with emprical data? + let c = if bases.len() < 4 { + 1 + } else if bases.len() < 32 { + 3 } else { - let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); - acc - } + (f64::from(bases.len() as u32)).ln().ceil() as usize + }; + + // coeffs to byte representation + let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); + // copy bases into `Affine` to skip in on curve check for every access + let bases_local: Vec<_> = bases.iter().map(Affine::from).collect(); + + // number of windows + let number_of_windows = (256 / c) + 1; + // accumumator for each window + let mut acc = vec![C::Curve::identity(); number_of_windows]; + acc.par_iter_mut().enumerate().rev().for_each(|(w, acc)| { + // jacobian buckets for already scheduled points + let mut j_bucks = vec![Bucket::::None; 1 << (c - 1)]; + + // schedular for affine addition + let mut sched = Schedule::new(batch_size, c); + + for (base_idx, coeff) in coeffs.iter().enumerate() { + let buck_idx = get_booth_index(w, c, coeff.as_ref()); + + if buck_idx != 0 { + // parse bucket index + let sign = buck_idx.is_positive(); + let buck_idx = buck_idx.unsigned_abs() as usize - 1; + + if sched.contains(buck_idx) { + // greedy accumulation + // we use original bases here + j_bucks[buck_idx].add_assign(&bases[base_idx], sign); + } else { + // also flushes the schedule if full + sched.add(&bases_local, base_idx, buck_idx, sign); + } + } + } + + // flush the schedule + sched.execute(&bases_local); + + // summation by parts + // e.g. 3a + 2b + 1c = a + + // (a) + b + + // ((a) + b) + c + let mut running_sum = C::Curve::identity(); + for (j_buck, a_buck) in j_bucks.iter().zip(sched.buckets.iter()).rev() { + running_sum += j_buck.add(a_buck); + *acc += running_sum; + } + + // shift accumulator to the window position + for _ in 0..c * w { + *acc = acc.double(); + } + }); + acc.into_iter().sum::<_>() } #[cfg(test)] @@ -191,38 +360,8 @@ mod test { use pasta_curves::arithmetic::CurveAffine; use rand_core::OsRng; - // keeping older implementation it here for baseline comparison, debugging & benchmarking - fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - assert_eq!(coeffs.len(), bases.len()); - - let num_threads = rayon::current_num_threads(); - if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - rayon::scope(|scope| { - let chunk = coeffs.len() / num_threads; - - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); - }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) - } else { - let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); - acc - } - } - - // keeping older implementation it here for baseline comparison, debugging & benchmarking - fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { + // keeping older implementation here for benchmarking and testing + pub fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); let c = if bases.len() < 4 { @@ -233,29 +372,9 @@ mod test { (f64::from(bases.len() as u32)).ln().ceil() as usize }; - fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { - let skip_bits = segment * c; - let skip_bytes = skip_bits / 8; - - if skip_bytes >= 32 { - return 0; - } - - let mut v = [0; 8]; - for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { - *v = *o; - } - - let mut tmp = u64::from_le_bytes(v); - tmp >>= skip_bits - (skip_bytes * 8); - tmp %= 1 << c; - - tmp as usize - } - - let segments = (256 / c) + 1; + let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; - for current_segment in (0..segments).rev() { + for current_window in (0..number_of_windows).rev() { for _ in 0..c { *acc = acc.double(); } @@ -291,12 +410,15 @@ mod test { } } - let mut buckets: Vec> = vec![Bucket::None; (1 << c) - 1]; + let mut buckets: Vec> = vec![Bucket::None; 1 << (c - 1)]; for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = get_at::(current_segment, c, coeff); - if coeff != 0 { - buckets[coeff - 1].add_assign(base); + let coeff = super::get_booth_index(current_window, c, coeff.as_ref()); + if coeff.is_positive() { + buckets[coeff as usize - 1].add_assign(base); + } + if coeff.is_negative() { + buckets[coeff.unsigned_abs() as usize - 1].add_assign(&base.neg()); } } @@ -312,6 +434,36 @@ mod test { } } + // keeping older implementation here for benchmarking and testing + pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { + assert_eq!(coeffs.len(), bases.len()); + + let num_threads = rayon::current_num_threads(); + if coeffs.len() > num_threads { + let chunk = coeffs.len() / num_threads; + let num_chunks = coeffs.chunks(chunk).len(); + let mut results = vec![C::Curve::identity(); num_chunks]; + rayon::scope(|scope| { + let chunk = coeffs.len() / num_threads; + + for ((coeffs, bases), acc) in coeffs + .chunks(chunk) + .zip(bases.chunks(chunk)) + .zip(results.iter_mut()) + { + scope.spawn(move |_| { + multiexp_serial(coeffs, bases, acc); + }); + } + }); + results.iter().fold(C::Curve::identity(), |a, b| a + b) + } else { + let mut acc = C::Curve::identity(); + multiexp_serial(coeffs, bases, &mut acc); + acc + } + } + #[test] fn test_booth_encoding() { fn mul(scalar: &Fr, point: &G1Affine, window: usize) -> G1Affine { @@ -374,12 +526,12 @@ mod test { let points = &points[..1 << k]; let scalars = &scalars[..1 << k]; - let t0 = start_timer!(|| format!("w/ booth k={}", k)); - let e0 = super::best_multiexp(scalars, points); + let t0 = start_timer!(|| format!("older k={}", k)); + let e0 = best_multiexp(scalars, points); end_timer!(t0); - let t1 = start_timer!(|| format!("w/o booth k={}", k)); - let e1 = best_multiexp(scalars, points); + let t1 = start_timer!(|| format!("cyclone k={}", k)); + let e1 = super::best_multiexp(scalars, points); end_timer!(t1); assert_eq!(e0, e1); @@ -388,7 +540,7 @@ mod test { #[test] fn test_msm_cross() { - run_msm_cross::(10, 18); + run_msm_cross::(16, 22); // run_msm_cross::(19, 23); } } From 8a160e5c0eba2fabb12c57254f663b3bea6061bd Mon Sep 17 00:00:00 2001 From: kilic Date: Tue, 23 Jan 2024 14:15:06 +0300 Subject: [PATCH 2/6] bring back multiexp serial --- src/msm.rs | 158 ++++++++++++++++++++++++++--------------------------- 1 file changed, 77 insertions(+), 81 deletions(-) diff --git a/src/msm.rs b/src/msm.rs index dae94393..1332c317 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -251,10 +251,7 @@ impl Schedule { } fn contains(&self, buck_idx: usize) -> bool { - self.set - .iter() - .position(|sch| sch.buck_idx == buck_idx) - .is_some() + self.set.iter().any(|sch| sch.buck_idx == buck_idx) } fn execute(&mut self, bases: &[Affine]) { @@ -279,6 +276,79 @@ impl Schedule { } } +pub fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { + let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); + + let c = if bases.len() < 4 { + 1 + } else if bases.len() < 32 { + 3 + } else { + (f64::from(bases.len() as u32)).ln().ceil() as usize + }; + + let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; + + for current_window in (0..number_of_windows).rev() { + for _ in 0..c { + *acc = acc.double(); + } + + #[derive(Clone, Copy)] + enum Bucket { + None, + Affine(C), + Projective(C::Curve), + } + + impl Bucket { + fn add_assign(&mut self, other: &C) { + *self = match *self { + Bucket::None => Bucket::Affine(*other), + Bucket::Affine(a) => Bucket::Projective(a + *other), + Bucket::Projective(mut a) => { + a += *other; + Bucket::Projective(a) + } + } + } + + fn add(self, mut other: C::Curve) -> C::Curve { + match self { + Bucket::None => other, + Bucket::Affine(a) => { + other += a; + other + } + Bucket::Projective(a) => other + a, + } + } + } + + let mut buckets: Vec> = vec![Bucket::None; 1 << (c - 1)]; + + for (coeff, base) in coeffs.iter().zip(bases.iter()) { + let coeff = get_booth_index(current_window, c, coeff.as_ref()); + if coeff.is_positive() { + buckets[coeff as usize - 1].add_assign(base); + } + if coeff.is_negative() { + buckets[coeff.unsigned_abs() as usize - 1].add_assign(&base.neg()); + } + } + + // Summation by parts + // e.g. 3a + 2b + 1c = a + + // (a) + b + + // ((a) + b) + c + let mut running_sum = C::Curve::identity(); + for exp in buckets.into_iter().rev() { + running_sum = exp.add(running_sum); + *acc += &running_sum; + } + } +} + pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { // TODO: consider adjusting it with emprical data? let batch_size = 64; @@ -298,7 +368,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu let bases_local: Vec<_> = bases.iter().map(Affine::from).collect(); // number of windows - let number_of_windows = (256 / c) + 1; + let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; // accumumator for each window let mut acc = vec![C::Curve::identity(); number_of_windows]; acc.par_iter_mut().enumerate().rev().for_each(|(w, acc)| { @@ -360,80 +430,6 @@ mod test { use pasta_curves::arithmetic::CurveAffine; use rand_core::OsRng; - // keeping older implementation here for benchmarking and testing - pub fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - - let c = if bases.len() < 4 { - 1 - } else if bases.len() < 32 { - 3 - } else { - (f64::from(bases.len() as u32)).ln().ceil() as usize - }; - - let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; - - for current_window in (0..number_of_windows).rev() { - for _ in 0..c { - *acc = acc.double(); - } - - #[derive(Clone, Copy)] - enum Bucket { - None, - Affine(C), - Projective(C::Curve), - } - - impl Bucket { - fn add_assign(&mut self, other: &C) { - *self = match *self { - Bucket::None => Bucket::Affine(*other), - Bucket::Affine(a) => Bucket::Projective(a + *other), - Bucket::Projective(mut a) => { - a += *other; - Bucket::Projective(a) - } - } - } - - fn add(self, mut other: C::Curve) -> C::Curve { - match self { - Bucket::None => other, - Bucket::Affine(a) => { - other += a; - other - } - Bucket::Projective(a) => other + a, - } - } - } - - let mut buckets: Vec> = vec![Bucket::None; 1 << (c - 1)]; - - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = super::get_booth_index(current_window, c, coeff.as_ref()); - if coeff.is_positive() { - buckets[coeff as usize - 1].add_assign(base); - } - if coeff.is_negative() { - buckets[coeff.unsigned_abs() as usize - 1].add_assign(&base.neg()); - } - } - - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut running_sum = C::Curve::identity(); - for exp in buckets.into_iter().rev() { - running_sum = exp.add(running_sum); - *acc += &running_sum; - } - } - } - // keeping older implementation here for benchmarking and testing pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { assert_eq!(coeffs.len(), bases.len()); @@ -452,14 +448,14 @@ mod test { .zip(results.iter_mut()) { scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); + super::multiexp_serial(coeffs, bases, acc); }); } }); results.iter().fold(C::Curve::identity(), |a, b| a + b) } else { let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); + super::multiexp_serial(coeffs, bases, &mut acc); acc } } From 75a3cda266777dc76caa897413bcba6a4ca71c55 Mon Sep 17 00:00:00 2001 From: kilic Date: Wed, 24 Jan 2024 12:27:17 +0300 Subject: [PATCH 3/6] parallelize coeffs to repr Co-authored-by: Han --- src/msm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/msm.rs b/src/msm.rs index 1332c317..582abc9c 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -363,7 +363,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu }; // coeffs to byte representation - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); + let coeffs: Vec<_> = coeffs.par_iter().map(|a| a.to_repr()).collect(); // copy bases into `Affine` to skip in on curve check for every access let bases_local: Vec<_> = bases.iter().map(Affine::from).collect(); From 4011ed2cbf2b3b2548c8c0950514f117ee99d0d8 Mon Sep 17 00:00:00 2001 From: kilic Date: Wed, 24 Jan 2024 12:28:22 +0300 Subject: [PATCH 4/6] parallelize bases to affine Co-authored-by: Han --- src/msm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/msm.rs b/src/msm.rs index 582abc9c..8807c70e 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -365,7 +365,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu // coeffs to byte representation let coeffs: Vec<_> = coeffs.par_iter().map(|a| a.to_repr()).collect(); // copy bases into `Affine` to skip in on curve check for every access - let bases_local: Vec<_> = bases.iter().map(Affine::from).collect(); + let bases_local: Vec<_> = bases.par_iter().map(Affine::from).collect(); // number of windows let number_of_windows = C::Scalar::NUM_BITS as usize / c + 1; From c6c291a873d1a22125a714417d0a245697e86578 Mon Sep 17 00:00:00 2001 From: kilic Date: Wed, 24 Jan 2024 15:15:57 +0300 Subject: [PATCH 5/6] add missing dependency --- src/msm.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/msm.rs b/src/msm.rs index 8807c70e..3d6c1b81 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -4,7 +4,9 @@ use crate::CurveAffine; use ff::Field; use ff::PrimeField; use group::Group; -use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator}; +use rayon::iter::{ + IndexedParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator, +}; fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { // Booth encoding: @@ -81,7 +83,7 @@ fn batch_add( acc *= *z; } - acc = acc.invert().expect(":("); + acc = acc.invert().unwrap(); for ( ( From a359481886232c9e7fdb52ab7c547d7caf7f1148 Mon Sep 17 00:00:00 2001 From: kilic Date: Mon, 19 Feb 2024 11:41:24 +0300 Subject: [PATCH 6/6] bring back old implementation postfix new one as `_independent_points` --- src/msm.rs | 103 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 42 deletions(-) diff --git a/src/msm.rs b/src/msm.rs index 3d6c1b81..25af9711 100644 --- a/src/msm.rs +++ b/src/msm.rs @@ -8,6 +8,8 @@ use rayon::iter::{ IndexedParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator, }; +const BATCH_SIZE: usize = 64; + fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { // Booth encoding: // * step by `window` size @@ -222,7 +224,7 @@ impl BucketAffine { struct Schedule { buckets: Vec>, - set: Vec, + set: [SchedulePoint; BATCH_SIZE], ptr: usize, } @@ -244,10 +246,16 @@ impl SchedulePoint { } impl Schedule { - fn new(batch_size: usize, c: usize) -> Self { + fn new(c: usize) -> Self { + let set = (0..BATCH_SIZE) + .map(|_| SchedulePoint::default()) + .collect::>() + .try_into() + .unwrap(); + Self { buckets: vec![BucketAffine::None; 1 << (c - 1)], - set: vec![SchedulePoint::default(); batch_size], + set, ptr: 0, } } @@ -351,9 +359,48 @@ pub fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: & } } +/// Performs a multi-exponentiation operation. +/// +/// This function will panic if coeffs and bases have a different length. +/// +/// This will use multithreading if beneficial. pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - // TODO: consider adjusting it with emprical data? - let batch_size = 64; + assert_eq!(coeffs.len(), bases.len()); + + let num_threads = rayon::current_num_threads(); + if coeffs.len() > num_threads { + let chunk = coeffs.len() / num_threads; + let num_chunks = coeffs.chunks(chunk).len(); + let mut results = vec![C::Curve::identity(); num_chunks]; + rayon::scope(|scope| { + let chunk = coeffs.len() / num_threads; + + for ((coeffs, bases), acc) in coeffs + .chunks(chunk) + .zip(bases.chunks(chunk)) + .zip(results.iter_mut()) + { + scope.spawn(move |_| { + multiexp_serial(coeffs, bases, acc); + }); + } + }); + results.iter().fold(C::Curve::identity(), |a, b| a + b) + } else { + let mut acc = C::Curve::identity(); + multiexp_serial(coeffs, bases, &mut acc); + acc + } +} +/// +/// This function will panic if coeffs and bases have a different length. +/// +/// This will use multithreading if beneficial. +pub fn best_multiexp_independent_points( + coeffs: &[C::Scalar], + bases: &[C], +) -> C::Curve { + assert_eq!(coeffs.len(), bases.len()); // TODO: consider adjusting it with emprical data? let c = if bases.len() < 4 { @@ -364,6 +411,10 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu (f64::from(bases.len() as u32)).ln().ceil() as usize }; + if c < 10 { + return best_multiexp(coeffs, bases); + } + // coeffs to byte representation let coeffs: Vec<_> = coeffs.par_iter().map(|a| a.to_repr()).collect(); // copy bases into `Affine` to skip in on curve check for every access @@ -378,7 +429,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu let mut j_bucks = vec![Bucket::::None; 1 << (c - 1)]; // schedular for affine addition - let mut sched = Schedule::new(batch_size, c); + let mut sched = Schedule::new(c); for (base_idx, coeff) in coeffs.iter().enumerate() { let buck_idx = get_booth_index(w, c, coeff.as_ref()); @@ -432,36 +483,6 @@ mod test { use pasta_curves::arithmetic::CurveAffine; use rand_core::OsRng; - // keeping older implementation here for benchmarking and testing - pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - assert_eq!(coeffs.len(), bases.len()); - - let num_threads = rayon::current_num_threads(); - if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - rayon::scope(|scope| { - let chunk = coeffs.len() / num_threads; - - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - super::multiexp_serial(coeffs, bases, acc); - }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) - } else { - let mut acc = C::Curve::identity(); - super::multiexp_serial(coeffs, bases, &mut acc); - acc - } - } - #[test] fn test_booth_encoding() { fn mul(scalar: &Fr, point: &G1Affine, window: usize) -> G1Affine { @@ -524,21 +545,19 @@ mod test { let points = &points[..1 << k]; let scalars = &scalars[..1 << k]; - let t0 = start_timer!(|| format!("older k={}", k)); - let e0 = best_multiexp(scalars, points); + let t0 = start_timer!(|| format!("cyclone k={}", k)); + let e0 = super::best_multiexp_independent_points(scalars, points); end_timer!(t0); - let t1 = start_timer!(|| format!("cyclone k={}", k)); + let t1 = start_timer!(|| format!("older k={}", k)); let e1 = super::best_multiexp(scalars, points); end_timer!(t1); - assert_eq!(e0, e1); } } #[test] fn test_msm_cross() { - run_msm_cross::(16, 22); - // run_msm_cross::(19, 23); + run_msm_cross::(14, 22); } }