From af1713f1d3147c71f9b9ffcaf13f715c83cc636f Mon Sep 17 00:00:00 2001 From: ashWhiteHat Date: Wed, 6 Sep 2023 10:46:30 +0900 Subject: [PATCH 1/3] arithmetic::best_multiexp add benchmark --- halo2_proofs/Cargo.toml | 4 ++++ halo2_proofs/benches/msm.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 halo2_proofs/benches/msm.rs diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 6368bddc99..56385ddfc4 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -31,6 +31,10 @@ harness = false name = "hashtocurve" harness = false +[[bench]] +name = "msm" +harness = false + [[bench]] name = "plonk" harness = false diff --git a/halo2_proofs/benches/msm.rs b/halo2_proofs/benches/msm.rs new file mode 100644 index 0000000000..21c5508a0c --- /dev/null +++ b/halo2_proofs/benches/msm.rs @@ -0,0 +1,27 @@ +#[macro_use] +extern crate criterion; + +use crate::arithmetic::best_multiexp; +use crate::pasta::{EqAffine, Fp}; +use crate::poly::commitment::Params; +use criterion::{BenchmarkId, Criterion}; +use group::ff::Field; +use halo2_proofs::*; +use rand_core::OsRng; + +fn criterion_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("msm"); + for k in 8..16 { + group + .bench_function(BenchmarkId::new("k", k), |b| { + let coeffs = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::>(); + let bases = Params::::new(k).get_g(); + + b.iter(|| best_multiexp(&coeffs, &bases)) + }) + .sample_size(30); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From e00f0d123397ae30da754ded9664ff21f4db9882 Mon Sep 17 00:00:00 2001 From: ashWhiteHat Date: Wed, 6 Sep 2023 12:39:59 +0900 Subject: [PATCH 2/3] arithmetic::best_multiexp parallelize bucket arithmetic --- halo2_proofs/src/arithmetic.rs | 205 +++++++++++++++++---------------- 1 file changed, 107 insertions(+), 98 deletions(-) diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 4cb0039d3f..914851f9ac 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -7,7 +7,7 @@ use group::{ ff::{BatchInvert, PrimeField}, Group as _, GroupOpsOwned, ScalarMulOwned, }; - +use maybe_rayon::prelude::*; pub use pasta_curves::arithmetic::*; /// This represents an element of a group with basic operations that can be @@ -25,94 +25,55 @@ where { } -fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - - let c = if bases.len() < 4 { - 1 - } else if bases.len() < 32 { - 3 - } else { - (f64::from(bases.len() as u32)).ln().ceil() as usize - }; - - fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { - let skip_bits = segment * c; - let skip_bytes = skip_bits / 8; - - if skip_bytes >= 32 { - return 0; - } +#[derive(Clone, Copy)] +enum Bucket { + None, + Affine(C), + Projective(C::Curve), +} - let mut v = [0; 8]; - for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { - *v = *o; +impl Bucket { + fn add_assign(&mut self, other: &C) { + *self = match *self { + Bucket::None => Bucket::Affine(*other), + Bucket::Affine(a) => Bucket::Projective(a + *other), + Bucket::Projective(mut a) => { + a += *other; + Bucket::Projective(a) + } } - - let mut tmp = u64::from_le_bytes(v); - tmp >>= skip_bits - (skip_bytes * 8); - tmp %= 1 << c; - - tmp as usize } - let segments = (256 / c) + 1; - - for current_segment in (0..segments).rev() { - for _ in 0..c { - *acc = acc.double(); - } - - #[derive(Clone, Copy)] - enum Bucket { - None, - Affine(C), - Projective(C::Curve), - } - - impl Bucket { - fn add_assign(&mut self, other: &C) { - *self = match *self { - Bucket::None => Bucket::Affine(*other), - Bucket::Affine(a) => Bucket::Projective(a + *other), - Bucket::Projective(mut a) => { - a += *other; - Bucket::Projective(a) - } - } - } - - fn add(self, mut other: C::Curve) -> C::Curve { - match self { - Bucket::None => other, - Bucket::Affine(a) => { - other += a; - other - } - Bucket::Projective(a) => other + &a, - } + fn add(self, mut other: C::Curve) -> C::Curve { + match self { + Bucket::None => other, + Bucket::Affine(a) => { + other += a; + other } + Bucket::Projective(a) => other + &a, } + } +} - let mut buckets: Vec> = vec![Bucket::None; (1 << c) - 1]; +fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { + let skip_bits = segment * c; + let skip_bytes = skip_bits / 8; - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = get_at::(current_segment, c, coeff); - if coeff != 0 { - buckets[coeff - 1].add_assign(base); - } - } + if skip_bytes >= 32 { + return 0; + } - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut running_sum = C::Curve::identity(); - for exp in buckets.into_iter().rev() { - running_sum = exp.add(running_sum); - *acc += &running_sum; - } + let mut v = [0; 8]; + for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { + *v = *o; } + + let mut tmp = u64::from_le_bytes(v); + tmp >>= skip_bits - (skip_bytes * 8); + tmp %= 1 << c; + + tmp as usize } /// Performs a small multi-exponentiation operation. @@ -147,29 +108,77 @@ pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::C pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { assert_eq!(coeffs.len(), bases.len()); + let c = if bases.len() < 4 { + 1 + } else if bases.len() < 32 { + 3 + } else { + (f64::from(bases.len() as u32)).ln().ceil() as usize + }; + + let mut multi_buckets: Vec>> = + vec![vec![Bucket::None; (1 << c) - 1]; (256 / c) + 1]; + let num_threads = multicore::current_num_threads(); if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - multicore::scope(|scope| { - let chunk = coeffs.len() / num_threads; - - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); + multi_buckets + .par_iter_mut() + .enumerate() + .rev() + .map(|(i, buckets)| { + // get segmentation and add coeff to buckets content + for (coeff, base) in coeffs.iter().zip(bases.iter()) { + let seg = get_at::(i, c, &coeff.to_repr()); + if seg != 0 { + buckets[seg - 1].add_assign(base); + } + } + + // Summation by parts + // e.g. 3a + 2b + 1c = a + + // (a) + b + + // ((a) + b) + c + let mut acc = C::Curve::identity(); + let mut sum = C::Curve::identity(); + buckets.iter().rev().for_each(|b| { + sum = b.add(sum); + acc += sum; }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) + (0..c * i).for_each(|_| acc = acc.double()); + acc + }) + .reduce(|| C::Curve::identity(), |a, b| a + b) } else { - let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); - acc + multi_buckets + .iter_mut() + .enumerate() + .rev() + .map(|(i, buckets)| { + // get segmentation and add coeff to buckets content + for (coeff, base) in coeffs.iter().zip(bases.iter()) { + let seg = get_at::(i, c, &coeff.to_repr()); + if seg != 0 { + buckets[seg - 1].add_assign(base); + } + } + + // Summation by parts + // e.g. 3a + 2b + 1c = a + + // (a) + b + + // ((a) + b) + c + let mut acc = C::Curve::identity(); + let mut sum = C::Curve::identity(); + buckets.iter().rev().for_each(|b| { + sum = b.add(sum); + acc += sum; + }); + acc + }) + .fold(C::Curve::identity(), |mut sum, bucket| { + // restore original evaluation point + (0..c).for_each(|_| sum = sum.double()); + sum + bucket + }) } } From 24e3ec36337bf36ec5359b4114f9f54e22610aa9 Mon Sep 17 00:00:00 2001 From: ashWhiteHat Date: Wed, 6 Sep 2023 12:57:57 +0900 Subject: [PATCH 3/3] arithmetic::best_multiexp refactor buckets --- halo2_proofs/src/arithmetic.rs | 103 ++++++++++++++++----------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 914851f9ac..f10ae0411a 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -56,24 +56,58 @@ impl Bucket { } } -fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { - let skip_bits = segment * c; - let skip_bytes = skip_bits / 8; +#[derive(Clone)] +struct Buckets { + c: usize, + coeffs: Vec>, +} - if skip_bytes >= 32 { - return 0; +impl Buckets { + fn new(c: usize) -> Self { + Self { + c, + coeffs: vec![Bucket::None; (1 << c) - 1], + } } - let mut v = [0; 8]; - for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { - *v = *o; + fn sum(&mut self, coeffs: &[C::Scalar], bases: &[C], i: usize) -> C::Curve { + // get segmentation and add coeff to buckets content + for (coeff, base) in coeffs.iter().zip(bases.iter()) { + let seg = self.get_at::(i, &coeff.to_repr()); + if seg != 0 { + self.coeffs[seg - 1].add_assign(base); + } + } + // Summation by parts + // e.g. 3a + 2b + 1c = a + + // (a) + b + + // ((a) + b) + c + let mut acc = C::Curve::identity(); + let mut sum = C::Curve::identity(); + self.coeffs.iter().rev().for_each(|b| { + sum = b.add(sum); + acc += sum; + }); + acc } - let mut tmp = u64::from_le_bytes(v); - tmp >>= skip_bits - (skip_bytes * 8); - tmp %= 1 << c; + fn get_at(&self, segment: usize, bytes: &F::Repr) -> usize { + let skip_bits = segment * self.c; + let skip_bytes = skip_bits / 8; - tmp as usize + if skip_bytes >= 32 { + 0 + } else { + let mut v = [0; 8]; + for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { + *v = *o; + } + + let mut tmp = u64::from_le_bytes(v); + tmp >>= skip_bits - (skip_bytes * 8); + (tmp % (1 << self.c)) as usize + } + } } /// Performs a small multi-exponentiation operation. @@ -116,9 +150,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu (f64::from(bases.len() as u32)).ln().ceil() as usize }; - let mut multi_buckets: Vec>> = - vec![vec![Bucket::None; (1 << c) - 1]; (256 / c) + 1]; - + let mut multi_buckets: Vec> = vec![Buckets::new(c); (256 / c) + 1]; let num_threads = multicore::current_num_threads(); if coeffs.len() > num_threads { multi_buckets @@ -126,24 +158,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu .enumerate() .rev() .map(|(i, buckets)| { - // get segmentation and add coeff to buckets content - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let seg = get_at::(i, c, &coeff.to_repr()); - if seg != 0 { - buckets[seg - 1].add_assign(base); - } - } - - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut acc = C::Curve::identity(); - let mut sum = C::Curve::identity(); - buckets.iter().rev().for_each(|b| { - sum = b.add(sum); - acc += sum; - }); + let mut acc = buckets.sum(coeffs, bases, i); (0..c * i).for_each(|_| acc = acc.double()); acc }) @@ -153,27 +168,7 @@ pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu .iter_mut() .enumerate() .rev() - .map(|(i, buckets)| { - // get segmentation and add coeff to buckets content - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let seg = get_at::(i, c, &coeff.to_repr()); - if seg != 0 { - buckets[seg - 1].add_assign(base); - } - } - - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut acc = C::Curve::identity(); - let mut sum = C::Curve::identity(); - buckets.iter().rev().for_each(|b| { - sum = b.add(sum); - acc += sum; - }); - acc - }) + .map(|(i, buckets)| buckets.sum(coeffs, bases, i)) .fold(C::Curve::identity(), |mut sum, bucket| { // restore original evaluation point (0..c).for_each(|_| sum = sum.double());