From af1713f1d3147c71f9b9ffcaf13f715c83cc636f Mon Sep 17 00:00:00 2001
From: ashWhiteHat <phantomofrotten@gmail.com>
Date: Wed, 6 Sep 2023 10:46:30 +0900
Subject: [PATCH 1/3] arithmetic::best_multiexp add benchmark

---
 halo2_proofs/Cargo.toml     |  4 ++++
 halo2_proofs/benches/msm.rs | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 halo2_proofs/benches/msm.rs
diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml
index 6368bddc99..56385ddfc4 100644
--- a/halo2_proofs/Cargo.toml
+++ b/halo2_proofs/Cargo.toml
@@ -31,6 +31,10 @@ harness = false
 name = "hashtocurve"
 harness = false
 
+[[bench]]
+name = "msm"
+harness = false
+
 [[bench]]
 name = "plonk"
 harness = false
diff --git a/halo2_proofs/benches/msm.rs b/halo2_proofs/benches/msm.rs
new file mode 100644
index 0000000000..21c5508a0c
--- /dev/null
+++ b/halo2_proofs/benches/msm.rs
@@ -0,0 +1,27 @@
+#[macro_use]
+extern crate criterion;
+
+use crate::arithmetic::best_multiexp;
+use crate::pasta::{EqAffine, Fp};
+use crate::poly::commitment::Params;
+use criterion::{BenchmarkId, Criterion};
+use group::ff::Field;
+use halo2_proofs::*;
+use rand_core::OsRng;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm");
+    for k in 8..16 {
+        group
+            .bench_function(BenchmarkId::new("k", k), |b| {
+                let coeffs = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::<Vec<_>>();
+                let bases = Params::<EqAffine>::new(k).get_g();
+
+                b.iter(|| best_multiexp(&coeffs, &bases))
+            })
+            .sample_size(30);
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

From e00f0d123397ae30da754ded9664ff21f4db9882 Mon Sep 17 00:00:00 2001
From: ashWhiteHat <phantomofrotten@gmail.com>
Date: Wed, 6 Sep 2023 12:39:59 +0900
Subject: [PATCH 2/3] arithmetic::best_multiexp parallelize bucket arithmetic

---
 halo2_proofs/src/arithmetic.rs | 205 +++++++++++++++++----------------
 1 file changed, 107 insertions(+), 98 deletions(-)

diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs
index 4cb0039d3f..914851f9ac 100644
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@@ -7,7 +7,7 @@ use group::{
     ff::{BatchInvert, PrimeField},
     Group as _, GroupOpsOwned, ScalarMulOwned,
 };
-
+use maybe_rayon::prelude::*;
 pub use pasta_curves::arithmetic::*;
 
 /// This represents an element of a group with basic operations that can be
@@ -25,94 +25,55 @@ where
 {
 }
 
-fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
-    let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();
-
-    let c = if bases.len() < 4 {
-        1
-    } else if bases.len() < 32 {
-        3
-    } else {
-        (f64::from(bases.len() as u32)).ln().ceil() as usize
-    };
-
-    fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
-        let skip_bits = segment * c;
-        let skip_bytes = skip_bits / 8;
-
-        if skip_bytes >= 32 {
-            return 0;
-        }
+#[derive(Clone, Copy)]
+enum Bucket<C: CurveAffine> {
+    None,
+    Affine(C),
+    Projective(C::Curve),
+}
 
-        let mut v = [0; 8];
-        for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
-            *v = *o;
+impl<C: CurveAffine> Bucket<C> {
+    fn add_assign(&mut self, other: &C) {
+        *self = match *self {
+            Bucket::None => Bucket::Affine(*other),
+            Bucket::Affine(a) => Bucket::Projective(a + *other),
+            Bucket::Projective(mut a) => {
+                a += *other;
+                Bucket::Projective(a)
+            }
         }
-
-        let mut tmp = u64::from_le_bytes(v);
-        tmp >>= skip_bits - (skip_bytes * 8);
-        tmp %= 1 << c;
-
-        tmp as usize
     }
 
-    let segments = (256 / c) + 1;
-
-    for current_segment in (0..segments).rev() {
-        for _ in 0..c {
-            *acc = acc.double();
-        }
-
-        #[derive(Clone, Copy)]
-        enum Bucket<C: CurveAffine> {
-            None,
-            Affine(C),
-            Projective(C::Curve),
-        }
-
-        impl<C: CurveAffine> Bucket<C> {
-            fn add_assign(&mut self, other: &C) {
-                *self = match *self {
-                    Bucket::None => Bucket::Affine(*other),
-                    Bucket::Affine(a) => Bucket::Projective(a + *other),
-                    Bucket::Projective(mut a) => {
-                        a += *other;
-                        Bucket::Projective(a)
-                    }
-                }
-            }
-
-            fn add(self, mut other: C::Curve) -> C::Curve {
-                match self {
-                    Bucket::None => other,
-                    Bucket::Affine(a) => {
-                        other += a;
-                        other
-                    }
-                    Bucket::Projective(a) => other + &a,
-                }
+    fn add(self, mut other: C::Curve) -> C::Curve {
+        match self {
+            Bucket::None => other,
+            Bucket::Affine(a) => {
+                other += a;
+                other
             }
+            Bucket::Projective(a) => other + &a,
         }
+    }
+}
 
-        let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];
+fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
+    let skip_bits = segment * c;
+    let skip_bytes = skip_bits / 8;
 
-        for (coeff, base) in coeffs.iter().zip(bases.iter()) {
-            let coeff = get_at::<C::Scalar>(current_segment, c, coeff);
-            if coeff != 0 {
-                buckets[coeff - 1].add_assign(base);
-            }
-        }
+    if skip_bytes >= 32 {
+        return 0;
+    }
 
-        // Summation by parts
-        // e.g. 3a + 2b + 1c = a +
-        //                    (a) + b +
-        //                    ((a) + b) + c
-        let mut running_sum = C::Curve::identity();
-        for exp in buckets.into_iter().rev() {
-            running_sum = exp.add(running_sum);
-            *acc += &running_sum;
-        }
+    let mut v = [0; 8];
+    for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
+        *v = *o;
     }
+
+    let mut tmp = u64::from_le_bytes(v);
+    tmp >>= skip_bits - (skip_bytes * 8);
+    tmp %= 1 << c;
+
+    tmp as usize
 }
 
 /// Performs a small multi-exponentiation operation.
@@ -147,29 +108,77 @@ pub fn small_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::C
 pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
     assert_eq!(coeffs.len(), bases.len());
 
+    let c = if bases.len() < 4 {
+        1
+    } else if bases.len() < 32 {
+        3
+    } else {
+        (f64::from(bases.len() as u32)).ln().ceil() as usize
+    };
+
+    let mut multi_buckets: Vec<Vec<Bucket<C>>> =
+        vec![vec![Bucket::None; (1 << c) - 1]; (256 / c) + 1];
+
     let num_threads = multicore::current_num_threads();
     if coeffs.len() > num_threads {
-        let chunk = coeffs.len() / num_threads;
-        let num_chunks = coeffs.chunks(chunk).len();
-        let mut results = vec![C::Curve::identity(); num_chunks];
-        multicore::scope(|scope| {
-            let chunk = coeffs.len() / num_threads;
-
-            for ((coeffs, bases), acc) in coeffs
-                .chunks(chunk)
-                .zip(bases.chunks(chunk))
-                .zip(results.iter_mut())
-            {
-                scope.spawn(move |_| {
-                    multiexp_serial(coeffs, bases, acc);
+        multi_buckets
+            .par_iter_mut()
+            .enumerate()
+            .rev()
+            .map(|(i, buckets)| {
+                // get segmentation and add coeff to buckets content
+                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
+                    if seg != 0 {
+                        buckets[seg - 1].add_assign(base);
+                    }
+                }
+
+                // Summation by parts
+                // e.g. 3a + 2b + 1c = a +
+                //                    (a) + b +
+                //                    ((a) + b) + c
+                let mut acc = C::Curve::identity();
+                let mut sum = C::Curve::identity();
+                buckets.iter().rev().for_each(|b| {
+                    sum = b.add(sum);
+                    acc += sum;
                 });
-            }
-        });
-        results.iter().fold(C::Curve::identity(), |a, b| a + b)
+                (0..c * i).for_each(|_| acc = acc.double());
+                acc
+            })
+            .reduce(|| C::Curve::identity(), |a, b| a + b)
     } else {
-        let mut acc = C::Curve::identity();
-        multiexp_serial(coeffs, bases, &mut acc);
-        acc
+        multi_buckets
+            .iter_mut()
+            .enumerate()
+            .rev()
+            .map(|(i, buckets)| {
+                // get segmentation and add coeff to buckets content
+                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
+                    if seg != 0 {
+                        buckets[seg - 1].add_assign(base);
+                    }
+                }
+
+                // Summation by parts
+                // e.g. 3a + 2b + 1c = a +
+                //                    (a) + b +
+                //                    ((a) + b) + c
+                let mut acc = C::Curve::identity();
+                let mut sum = C::Curve::identity();
+                buckets.iter().rev().for_each(|b| {
+                    sum = b.add(sum);
+                    acc += sum;
+                });
+                acc
+            })
+            .fold(C::Curve::identity(), |mut sum, bucket| {
+                // restore original evaluation point
+                (0..c).for_each(|_| sum = sum.double());
+                sum + bucket
+            })
     }
 }
 

From 24e3ec36337bf36ec5359b4114f9f54e22610aa9 Mon Sep 17 00:00:00 2001
From: ashWhiteHat <phantomofrotten@gmail.com>
Date: Wed, 6 Sep 2023 12:57:57 +0900
Subject: [PATCH 3/3] arithmetic::best_multiexp refactor buckets

---
 halo2_proofs/src/arithmetic.rs | 103 ++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 54 deletions(-)

diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs
index 914851f9ac..f10ae0411a 100644
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@@ -56,24 +56,58 @@ impl<C: CurveAffine> Bucket<C> {
     }
 }
 
-fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
-    let skip_bits = segment * c;
-    let skip_bytes = skip_bits / 8;
+#[derive(Clone)]
+struct Buckets<C: CurveAffine> {
+    c: usize,
+    coeffs: Vec<Bucket<C>>,
+}
 
-    if skip_bytes >= 32 {
-        return 0;
+impl<C: CurveAffine> Buckets<C> {
+    fn new(c: usize) -> Self {
+        Self {
+            c,
+            coeffs: vec![Bucket::None; (1 << c) - 1],
+        }
     }
 
-    let mut v = [0; 8];
-    for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
-        *v = *o;
+    fn sum(&mut self, coeffs: &[C::Scalar], bases: &[C], i: usize) -> C::Curve {
+        // get segmentation and add coeff to buckets content
+        for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+            let seg = self.get_at::<C::Scalar>(i, &coeff.to_repr());
+            if seg != 0 {
+                self.coeffs[seg - 1].add_assign(base);
+            }
+        }
+        // Summation by parts
+        // e.g. 3a + 2b + 1c = a +
+        //                    (a) + b +
+        //                    ((a) + b) + c
+        let mut acc = C::Curve::identity();
+        let mut sum = C::Curve::identity();
+        self.coeffs.iter().rev().for_each(|b| {
+            sum = b.add(sum);
+            acc += sum;
+        });
+        acc
     }
 
-    let mut tmp = u64::from_le_bytes(v);
-    tmp >>= skip_bits - (skip_bytes * 8);
-    tmp %= 1 << c;
+    fn get_at<F: PrimeField>(&self, segment: usize, bytes: &F::Repr) -> usize {
+        let skip_bits = segment * self.c;
+        let skip_bytes = skip_bits / 8;
 
-    tmp as usize
+        if skip_bytes >= 32 {
+            0
+        } else {
+            let mut v = [0; 8];
+            for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
+                *v = *o;
+            }
+
+            let mut tmp = u64::from_le_bytes(v);
+            tmp >>= skip_bits - (skip_bytes * 8);
+            (tmp % (1 << self.c)) as usize
+        }
+    }
 }
 
 /// Performs a small multi-exponentiation operation.
@@ -116,9 +150,7 @@ pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu
         (f64::from(bases.len() as u32)).ln().ceil() as usize
     };
 
-    let mut multi_buckets: Vec<Vec<Bucket<C>>> =
-        vec![vec![Bucket::None; (1 << c) - 1]; (256 / c) + 1];
-
+    let mut multi_buckets: Vec<Buckets<C>> = vec![Buckets::new(c); (256 / c) + 1];
     let num_threads = multicore::current_num_threads();
     if coeffs.len() > num_threads {
         multi_buckets
@@ -126,24 +158,7 @@ pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu
             .enumerate()
             .rev()
             .map(|(i, buckets)| {
-                // get segmentation and add coeff to buckets content
-                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
-                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
-                    if seg != 0 {
-                        buckets[seg - 1].add_assign(base);
-                    }
-                }
-
-                // Summation by parts
-                // e.g. 3a + 2b + 1c = a +
-                //                    (a) + b +
-                //                    ((a) + b) + c
-                let mut acc = C::Curve::identity();
-                let mut sum = C::Curve::identity();
-                buckets.iter().rev().for_each(|b| {
-                    sum = b.add(sum);
-                    acc += sum;
-                });
+                let mut acc = buckets.sum(coeffs, bases, i);
                 (0..c * i).for_each(|_| acc = acc.double());
                 acc
             })
@@ -153,27 +168,7 @@ pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Cu
             .iter_mut()
             .enumerate()
             .rev()
-            .map(|(i, buckets)| {
-                // get segmentation and add coeff to buckets content
-                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
-                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
-                    if seg != 0 {
-                        buckets[seg - 1].add_assign(base);
-                    }
-                }
-
-                // Summation by parts
-                // e.g. 3a + 2b + 1c = a +
-                //                    (a) + b +
-                //                    ((a) + b) + c
-                let mut acc = C::Curve::identity();
-                let mut sum = C::Curve::identity();
-                buckets.iter().rev().for_each(|b| {
-                    sum = b.add(sum);
-                    acc += sum;
-                });
-                acc
-            })
+            .map(|(i, buckets)| buckets.sum(coeffs, bases, i))
             .fold(C::Curve::identity(), |mut sum, bucket| {
                 // restore original evaluation point
                 (0..c).for_each(|_| sum = sum.double());