From 87979154350fddf07cf564d35bf984a0f596478a Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Fri, 8 Sep 2023 12:29:34 +0800
Subject: [PATCH 01/11] Insert MSM and FFT code and their benchmarks.

Resolves taikoxyz/zkevm-circuits#150.
---
 Cargo.toml         |  17 ++++-
 benches/fft.rs     |  24 +++++++
 benches/msm-alt.rs |  56 +++++++++++++++++
 benches/msm.rs     |  34 ++++++++++
 src/fft.rs         | 134 +++++++++++++++++++++++++++++++++++++++
 src/lib.rs         |   3 +
 src/msm.rs         | 153 +++++++++++++++++++++++++++++++++++++++++++++
 src/multicore.rs   |  16 +++++
 8 files changed, 436 insertions(+), 1 deletion(-)
 create mode 100644 benches/fft.rs
 create mode 100644 benches/msm-alt.rs
 create mode 100644 benches/msm.rs
 create mode 100644 src/fft.rs
 create mode 100644 src/msm.rs
 create mode 100644 src/multicore.rs
diff --git a/Cargo.toml b/Cargo.toml
index f29c917e..121552ec 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,7 @@ criterion = { version = "0.3", features = ["html_reports"] }
 rand_xorshift = "0.3"
 ark-std = { version = "0.3" }
 bincode = "1.3.3"
+halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", rev="7a21656" }
 
 [dependencies]
 subtle = "2.4"
@@ -31,9 +32,11 @@ paste = "1.0.11"
 serde = { version = "1.0", default-features = false, optional = true }
 serde_arrays = { version = "0.1.0", optional = true }
 blake2b_simd = "1"
+maybe-rayon = { version = "0.1.0", default-features = false }
 
 [features]
-default = ["reexport", "bits"]
+default = ["reexport", "bits", "multicore"]
+multicore = ["maybe-rayon/threads"]
 asm = []
 bits = ["ff/bits"]
 bn256-table = []
@@ -67,3 +70,15 @@ harness = false
 [[bench]]
 name = "hash_to_curve"
 harness = false
+
+[[bench]]
+name = "fft"
+harness = false
+
+[[bench]]
+name = "msm"
+harness = false
+
+[[bench]]
+name = "msm-alt"
+harness = false
diff --git a/benches/fft.rs b/benches/fft.rs
new file mode 100644
index 00000000..459b9494
--- /dev/null
+++ b/benches/fft.rs
@@ -0,0 +1,24 @@
+#[macro_use]
+extern crate criterion;
+
+use group::ff::Field;
+use halo2curves::{fft::best_fft, pasta::Fp};
+
+use criterion::{BenchmarkId, Criterion};
+use rand_core::OsRng;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("fft");
+    for k in 3..19 {
+        group.bench_function(BenchmarkId::new("k", k), |b| {
+            let mut a = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::<Vec<_>>();
+            let omega = Fp::random(OsRng); // would be weird if this mattered
+            b.iter(|| {
+                best_fft(&mut a, omega, k as u32);
+            });
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/msm-alt.rs b/benches/msm-alt.rs
new file mode 100644
index 00000000..4c452530
--- /dev/null
+++ b/benches/msm-alt.rs
@@ -0,0 +1,56 @@
+//! This benchmark allows testing msm without depending on the `halo2_proofs`
+//! crate.  This code originates in an older version of `halo2_proofs` from
+//! before the `hash_to_curve` method was implemented.  It currently only uses
+//! curve `Secp256k1Affine`
+
+#[macro_use]
+extern crate criterion;
+
+use criterion::{black_box, BenchmarkId, Criterion};
+use ff::Field;
+use halo2_proofs::arithmetic::small_multiexp;
+use halo2curves::secp256k1::Fq as Scalar;
+use halo2curves::secp256k1::Secp256k1Affine;
+use halo2curves::CurveAffine;
+use rand_core::OsRng;
+use rand_core::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::iter::zip;
+
+fn random_curve_points<C: CurveAffine>(k: u8) -> Vec<Secp256k1Affine> {
+    debug_assert!(k < 64);
+    let n: u64 = 1 << k;
+
+    let mut rng = XorShiftRng::from_seed([
+        0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc,
+        0xe5,
+    ]);
+
+    (0..n).map(|_n| Secp256k1Affine::random(&mut rng)).collect()
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm-alt");
+    for k in 8..16 {
+        group
+            .bench_function(BenchmarkId::new("k", k), |b| {
+                let rng = OsRng;
+
+                let mut g = random_curve_points::<Secp256k1Affine>(k);
+                let half_len = g.len() / 2;
+                let (g_lo, g_hi) = g.split_at_mut(half_len);
+                let coeff_1 = Scalar::random(rng);
+                let coeff_2 = Scalar::random(rng);
+
+                b.iter(|| {
+                    for (g_lo, g_hi) in zip(g_lo.iter(), g_hi.iter()) {
+                        small_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
+                    }
+                })
+            })
+            .sample_size(30);
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/msm.rs b/benches/msm.rs
new file mode 100644
index 00000000..419816a3
--- /dev/null
+++ b/benches/msm.rs
@@ -0,0 +1,34 @@
+#[macro_use]
+extern crate criterion;
+use criterion::{black_box, Criterion};
+use ff::Field;
+use halo2_proofs::poly::{commitment::ParamsProver, ipa::commitment::ParamsIPA};
+use halo2curves::msm::small_multiexp;
+use pasta_curves::{EqAffine, Fp};
+use rand_core::OsRng;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let rng = OsRng;
+
+    // small multiexp
+    {
+        let params: ParamsIPA<EqAffine> = ParamsIPA::new(5);
+        let g = &mut params.get_g().to_vec();
+        let len = g.len() / 2;
+        let (g_lo, g_hi) = g.split_at_mut(len);
+
+        let coeff_1 = Fp::random(rng);
+        let coeff_2 = Fp::random(rng);
+
+        c.bench_function("double-and-add", |b| {
+            b.iter(|| {
+                for (g_lo, g_hi) in g_lo.iter().zip(g_hi.iter()) {
+                    small_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
+                }
+            })
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/src/fft.rs b/src/fft.rs
new file mode 100644
index 00000000..6eb3487e
--- /dev/null
+++ b/src/fft.rs
@@ -0,0 +1,134 @@
+use crate::multicore;
+pub use crate::{CurveAffine, CurveExt};
+use ff::Field;
+use group::{GroupOpsOwned, ScalarMulOwned};
+
+/// This represents an element of a group with basic operations that can be
+/// performed. This allows an FFT implementation (for example) to operate
+/// generically over either a field or elliptic curve group.
+pub trait FftGroup<Scalar: Field>:
+    Copy + Send + Sync + 'static + GroupOpsOwned + ScalarMulOwned<Scalar>
+{
+}
+
+impl<T, Scalar> FftGroup<Scalar> for T
+where
+    Scalar: Field,
+    T: Copy + Send + Sync + 'static + GroupOpsOwned + ScalarMulOwned<Scalar>,
+{
+}
+
+/// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size
+/// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative
+/// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when
+/// interpreted as the coefficients of a polynomial of degree $n - 1$, is
+/// transformed into the evaluations of this polynomial at each of the $n$
+/// distinct powers of $\omega$. This transformation is invertible by providing
+/// $\omega^{-1}$ in place of $\omega$ and dividing each resulting field element
+/// by $n$.
+///
+/// This will use multithreading if beneficial.
+pub fn best_fft<Scalar: Field, G: FftGroup<Scalar>>(a: &mut [G], omega: Scalar, log_n: u32) {
+    fn bitreverse(mut n: usize, l: usize) -> usize {
+        let mut r = 0;
+        for _ in 0..l {
+            r = (r << 1) | (n & 1);
+            n >>= 1;
+        }
+        r
+    }
+
+    let threads = multicore::current_num_threads();
+    let log_threads = threads.ilog2();
+    let n = a.len();
+    assert_eq!(n, 1 << log_n);
+
+    for k in 0..n {
+        let rk = bitreverse(k, log_n as usize);
+        if k < rk {
+            a.swap(rk, k);
+        }
+    }
+
+    // precompute twiddle factors
+    let twiddles: Vec<_> = (0..(n / 2))
+        .scan(Scalar::ONE, |w, _| {
+            let tw = *w;
+            *w *= &omega;
+            Some(tw)
+        })
+        .collect();
+
+    if log_n <= log_threads {
+        let mut chunk = 2_usize;
+        let mut twiddle_chunk = n / 2;
+        for _ in 0..log_n {
+            a.chunks_mut(chunk).for_each(|coeffs| {
+                let (left, right) = coeffs.split_at_mut(chunk / 2);
+
+                // case when twiddle factor is one
+                let (a, left) = left.split_at_mut(1);
+                let (b, right) = right.split_at_mut(1);
+                let t = b[0];
+                b[0] = a[0];
+                a[0] += &t;
+                b[0] -= &t;
+
+                left.iter_mut()
+                    .zip(right.iter_mut())
+                    .enumerate()
+                    .for_each(|(i, (a, b))| {
+                        let mut t = *b;
+                        t *= &twiddles[(i + 1) * twiddle_chunk];
+                        *b = *a;
+                        *a += &t;
+                        *b -= &t;
+                    });
+            });
+            chunk *= 2;
+            twiddle_chunk /= 2;
+        }
+    } else {
+        recursive_butterfly_arithmetic(a, n, 1, &twiddles)
+    }
+}
+
+/// This perform recursive butterfly arithmetic
+pub fn recursive_butterfly_arithmetic<Scalar: Field, G: FftGroup<Scalar>>(
+    a: &mut [G],
+    n: usize,
+    twiddle_chunk: usize,
+    twiddles: &[Scalar],
+) {
+    if n == 2 {
+        let t = a[1];
+        a[1] = a[0];
+        a[0] += &t;
+        a[1] -= &t;
+    } else {
+        let (left, right) = a.split_at_mut(n / 2);
+        multicore::join(
+            || recursive_butterfly_arithmetic(left, n / 2, twiddle_chunk * 2, twiddles),
+            || recursive_butterfly_arithmetic(right, n / 2, twiddle_chunk * 2, twiddles),
+        );
+
+        // case when twiddle factor is one
+        let (a, left) = left.split_at_mut(1);
+        let (b, right) = right.split_at_mut(1);
+        let t = b[0];
+        b[0] = a[0];
+        a[0] += &t;
+        b[0] -= &t;
+
+        left.iter_mut()
+            .zip(right.iter_mut())
+            .enumerate()
+            .for_each(|(i, (a, b))| {
+                let mut t = *b;
+                t *= &twiddles[(i + 1) * twiddle_chunk];
+                *b = *a;
+                *a += &t;
+                *b -= &t;
+            });
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 3fa8e98f..670a6448 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,8 @@
 mod arithmetic;
+pub mod fft;
 pub mod hash_to_curve;
+pub mod msm;
+pub mod multicore;
 #[macro_use]
 pub mod legendre;
 pub mod serde;
diff --git a/src/msm.rs b/src/msm.rs
new file mode 100644
index 00000000..de30be55
--- /dev/null
+++ b/src/msm.rs
@@ -0,0 +1,153 @@
+use ff::PrimeField;
+use group::Group;
+use pasta_curves::arithmetic::CurveAffine;
+
+use crate::multicore;
+
+pub fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
+    let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();
+
+    let c = if bases.len() < 4 {
+        1
+    } else if bases.len() < 32 {
+        3
+    } else {
+        (f64::from(bases.len() as u32)).ln().ceil() as usize
+    };
+
+    fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
+        let skip_bits = segment * c;
+        let skip_bytes = skip_bits / 8;
+
+        if skip_bytes >= 32 {
+            return 0;
+        }
+
+        let mut v = [0; 8];
+        for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
+            *v = *o;
+        }
+
+        let mut tmp = u64::from_le_bytes(v);
+        tmp >>= skip_bits - (skip_bytes * 8);
+        tmp %= 1 << c;
+
+        tmp as usize
+    }
+
+    let segments = (256 / c) + 1;
+
+    for current_segment in (0..segments).rev() {
+        for _ in 0..c {
+            *acc = acc.double();
+        }
+
+        #[derive(Clone, Copy)]
+        enum Bucket<C: CurveAffine> {
+            None,
+            Affine(C),
+            Projective(C::Curve),
+        }
+
+        impl<C: CurveAffine> Bucket<C> {
+            fn add_assign(&mut self, other: &C) {
+                *self = match *self {
+                    Bucket::None => Bucket::Affine(*other),
+                    Bucket::Affine(a) => Bucket::Projective(a + *other),
+                    Bucket::Projective(mut a) => {
+                        a += *other;
+                        Bucket::Projective(a)
+                    }
+                }
+            }
+
+            fn add(self, mut other: C::Curve) -> C::Curve {
+                match self {
+                    Bucket::None => other,
+                    Bucket::Affine(a) => {
+                        other += a;
+                        other
+                    }
+                    Bucket::Projective(a) => other + a,
+                }
+            }
+        }
+
+        let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];
+
+        for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+            let coeff = get_at::<C::Scalar>(current_segment, c, coeff);
+            if coeff != 0 {
+                buckets[coeff - 1].add_assign(base);
+            }
+        }
+
+        // Summation by parts
+        // e.g. 3a + 2b + 1c = a +
+        //                    (a) + b +
+        //                    ((a) + b) + c
+        let mut running_sum = C::Curve::identity();
+        for exp in buckets.into_iter().rev() {
+            running_sum = exp.add(running_sum);
+            *acc += &running_sum;
+        }
+    }
+}
+
+/// Performs a small multi-exponentiation operation.
+/// Uses the double-and-add algorithm with doublings shared across points.
+pub fn small_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
+    let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();
+    let mut acc = C::Curve::identity();
+
+    // for byte idx
+    for byte_idx in (0..32).rev() {
+        // for bit idx
+        for bit_idx in (0..8).rev() {
+            acc = acc.double();
+            // for each coeff
+            for coeff_idx in 0..coeffs.len() {
+                let byte = coeffs[coeff_idx].as_ref()[byte_idx];
+                if ((byte >> bit_idx) & 1) != 0 {
+                    acc += bases[coeff_idx];
+                }
+            }
+        }
+    }
+
+    acc
+}
+
+/// Performs a multi-exponentiation operation.
+///
+/// This function will panic if coeffs and bases have a different length.
+///
+/// This will use multithreading if beneficial.
+pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
+    assert_eq!(coeffs.len(), bases.len());
+
+    let num_threads = multicore::current_num_threads();
+    if coeffs.len() > num_threads {
+        let chunk = coeffs.len() / num_threads;
+        let num_chunks = coeffs.chunks(chunk).len();
+        let mut results = vec![C::Curve::identity(); num_chunks];
+        multicore::scope(|scope| {
+            let chunk = coeffs.len() / num_threads;
+
+            for ((coeffs, bases), acc) in coeffs
+                .chunks(chunk)
+                .zip(bases.chunks(chunk))
+                .zip(results.iter_mut())
+            {
+                scope.spawn(move |_| {
+                    multiexp_serial(coeffs, bases, acc);
+                });
+            }
+        });
+        results.iter().fold(C::Curve::identity(), |a, b| a + b)
+    } else {
+        let mut acc = C::Curve::identity();
+        multiexp_serial(coeffs, bases, &mut acc);
+        acc
+    }
+}
diff --git a/src/multicore.rs b/src/multicore.rs
new file mode 100644
index 00000000..d8323553
--- /dev/null
+++ b/src/multicore.rs
@@ -0,0 +1,16 @@
+pub use maybe_rayon::{
+    iter::{IntoParallelIterator, IntoParallelRefMutIterator, ParallelIterator},
+    join, scope, Scope,
+};
+
+#[cfg(feature = "multicore")]
+pub use maybe_rayon::{
+    current_num_threads,
+    iter::{IndexedParallelIterator, IntoParallelRefIterator},
+    slice::ParallelSliceMut,
+};
+
+#[cfg(not(feature = "multicore"))]
+pub fn current_num_threads() -> usize {
+    1
+}

From 77b98f25ed67af56e3d091241230477cb79a7ca7 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Fri, 8 Sep 2023 20:15:33 +0800
Subject: [PATCH 02/11] feedback

---
 Cargo.toml         |  5 ----
 benches/msm-alt.rs | 56 -------------------------------------
 benches/msm.rs     | 69 ++++++++++++++++++++++++++++++++--------------
 3 files changed, 49 insertions(+), 81 deletions(-)
 delete mode 100644 benches/msm-alt.rs

diff --git a/Cargo.toml b/Cargo.toml
index 121552ec..b722272d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,6 @@ criterion = { version = "0.3", features = ["html_reports"] }
 rand_xorshift = "0.3"
 ark-std = { version = "0.3" }
 bincode = "1.3.3"
-halo2_proofs = { git = "https://github.com/privacy-scaling-explorations/halo2.git", rev="7a21656" }
 
 [dependencies]
 subtle = "2.4"
@@ -78,7 +77,3 @@ harness = false
 [[bench]]
 name = "msm"
 harness = false
-
-[[bench]]
-name = "msm-alt"
-harness = false
diff --git a/benches/msm-alt.rs b/benches/msm-alt.rs
deleted file mode 100644
index 4c452530..00000000
--- a/benches/msm-alt.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-//! This benchmark allows testing msm without depending on the `halo2_proofs`
-//! crate.  This code originates in an older version of `halo2_proofs` from
-//! before the `hash_to_curve` method was implemented.  It currently only uses
-//! curve `Secp256k1Affine`
-
-#[macro_use]
-extern crate criterion;
-
-use criterion::{black_box, BenchmarkId, Criterion};
-use ff::Field;
-use halo2_proofs::arithmetic::small_multiexp;
-use halo2curves::secp256k1::Fq as Scalar;
-use halo2curves::secp256k1::Secp256k1Affine;
-use halo2curves::CurveAffine;
-use rand_core::OsRng;
-use rand_core::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::iter::zip;
-
-fn random_curve_points<C: CurveAffine>(k: u8) -> Vec<Secp256k1Affine> {
-    debug_assert!(k < 64);
-    let n: u64 = 1 << k;
-
-    let mut rng = XorShiftRng::from_seed([
-        0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc,
-        0xe5,
-    ]);
-
-    (0..n).map(|_n| Secp256k1Affine::random(&mut rng)).collect()
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let mut group = c.benchmark_group("msm-alt");
-    for k in 8..16 {
-        group
-            .bench_function(BenchmarkId::new("k", k), |b| {
-                let rng = OsRng;
-
-                let mut g = random_curve_points::<Secp256k1Affine>(k);
-                let half_len = g.len() / 2;
-                let (g_lo, g_hi) = g.split_at_mut(half_len);
-                let coeff_1 = Scalar::random(rng);
-                let coeff_2 = Scalar::random(rng);
-
-                b.iter(|| {
-                    for (g_lo, g_hi) in zip(g_lo.iter(), g_hi.iter()) {
-                        small_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
-                    }
-                })
-            })
-            .sample_size(30);
-    }
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/msm.rs b/benches/msm.rs
index 419816a3..6dc36245 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -1,32 +1,61 @@
+//! This benchmark allows testing msm without depending on the `halo2_proofs`
+//! crate.  This code originates in an older version of `halo2_proofs` from
+//! before the `hash_to_curve` method was implemented.  It currently only uses
+//! curve `Secp256k1Affine`
+
 #[macro_use]
 extern crate criterion;
-use criterion::{black_box, Criterion};
+
+use criterion::{black_box, BenchmarkId, Criterion};
 use ff::Field;
-use halo2_proofs::poly::{commitment::ParamsProver, ipa::commitment::ParamsIPA};
-use halo2curves::msm::small_multiexp;
-use pasta_curves::{EqAffine, Fp};
+use halo2curves::bn256::Fr as Scalar;
+use halo2curves::bn256::G1Affine;
+use halo2curves::msm::best_multiexp;
+use halo2curves::CurveAffine;
 use rand_core::OsRng;
+use rand_core::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::iter::zip;
+
+fn random_curve_points<C: CurveAffine>(k: u8) -> Vec<G1Affine> {
+    debug_assert!(k < 64);
+    let n: u64 = 1 << k;
+
+    let mut rng = XorShiftRng::from_seed([
+        0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc,
+        0xe5,
+    ]);
+
+    (0..n).map(|_n| G1Affine::random(&mut rng)).collect()
+}
+
+#[cfg(not(feature = "multicore"))]
+const RANGE: [u8; 6] = [3, 8, 10, 12 /*(Ethereum KZG / EIP 4844)*/, 14, 16];
+#[cfg(feature = "multicore")]
+const RANGE: [u8; 9] = [
+    3, 8, 10, 12, /*(Ethereum KZG / EIP 4844)*/
+    14, 16, 18, 20, 21,
+];
 
 fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm");
     let rng = OsRng;
+    for k in RANGE {
+        group
+            .bench_function(BenchmarkId::new("k", k), |b| {
+                let mut g = random_curve_points::<G1Affine>(k);
+                let half_len = g.len() / 2;
+                let (g_lo, g_hi) = g.split_at_mut(half_len);
+                let coeff_1 = Scalar::random(rng);
+                let coeff_2 = Scalar::random(rng);
 
-    // small multiexp
-    {
-        let params: ParamsIPA<EqAffine> = ParamsIPA::new(5);
-        let g = &mut params.get_g().to_vec();
-        let len = g.len() / 2;
-        let (g_lo, g_hi) = g.split_at_mut(len);
-
-        let coeff_1 = Fp::random(rng);
-        let coeff_2 = Fp::random(rng);
-
-        c.bench_function("double-and-add", |b| {
-            b.iter(|| {
-                for (g_lo, g_hi) in g_lo.iter().zip(g_hi.iter()) {
-                    small_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
-                }
+                b.iter(|| {
+                    for (g_lo, g_hi) in zip(g_lo.iter(), g_hi.iter()) {
+                        best_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
+                    }
+                })
             })
-        });
+            .sample_size(10);
     }
 }
 

From 2b269848f2640fdd62205f9aa5dd81db03516afc Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Fri, 8 Sep 2023 20:31:46 +0800
Subject: [PATCH 03/11] Add instructions

---
 benches/msm.rs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index 6dc36245..213456eb 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -1,7 +1,13 @@
-//! This benchmark allows testing msm without depending on the `halo2_proofs`
-//! crate.  This code originates in an older version of `halo2_proofs` from
-//! before the `hash_to_curve` method was implemented.  It currently only uses
-//! curve `Secp256k1Affine`
+//! This benchmarks Multi Scalar Multiplication (MSM).
+//! It measures `G1` from the BN256 curve.
+//!
+//! Benchmark with default feature `multicore` enabled:
+//!
+//!     cargo bench -- msm
+//!
+//! To run with as singlecore:
+//!
+//!     cargo bench --no-default-features -- msm
 
 #[macro_use]
 extern crate criterion;

From 1977dc029357a49eed675cefdff5233700991002 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Fri, 8 Sep 2023 21:15:59 +0800
Subject: [PATCH 04/11] feeback

---
 benches/msm.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index 213456eb..a3c2e759 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -40,7 +40,7 @@ const RANGE: [u8; 6] = [3, 8, 10, 12 /*(Ethereum KZG / EIP 4844)*/, 14, 16];
 #[cfg(feature = "multicore")]
 const RANGE: [u8; 9] = [
     3, 8, 10, 12, /*(Ethereum KZG / EIP 4844)*/
-    14, 16, 18, 20, 21,
+    14, 16, 18, 20, 22,
 ];
 
 fn criterion_benchmark(c: &mut Criterion) {

From 68f41d3321c3abbf1f8b536462e2f2003a1e496a Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Fri, 15 Sep 2023 20:16:08 +0800
Subject: [PATCH 05/11] Implement feedback:  Actually supply the correct
 arguments to `best_multiexp`.

Split into `singlecore` and `multicore` benchmarks so Criterion's result
caching and comparison over multiple runs makes sense.

Rewrite point and scalar generation.
---
 benches/msm.rs | 78 ++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index a3c2e759..9c9c8b3a 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -1,69 +1,71 @@
 //! This benchmarks Multi Scalar Multiplication (MSM).
 //! It measures `G1` from the BN256 curve.
 //!
-//! Benchmark with default feature `multicore` enabled:
+//! To run this benchmark:
 //!
 //!     cargo bench -- msm
 //!
-//! To run with as singlecore:
+//! Caveat:  `multicore` should be read as _allowing_ for multicore computation --
+//! not enforcing it.
 //!
-//!     cargo bench --no-default-features -- msm
 
 #[macro_use]
 extern crate criterion;
 
 use criterion::{black_box, BenchmarkId, Criterion};
 use ff::Field;
-use halo2curves::bn256::Fr as Scalar;
-use halo2curves::bn256::G1Affine;
-use halo2curves::msm::best_multiexp;
-use halo2curves::CurveAffine;
-use rand_core::OsRng;
+use group::prime::PrimeCurveAffine;
+use halo2curves::bn256::{Fr as Scalar, G1Affine as Point};
+use halo2curves::msm::{best_multiexp, multiexp_serial};
 use rand_core::SeedableRng;
 use rand_xorshift::XorShiftRng;
-use std::iter::zip;
 
-fn random_curve_points<C: CurveAffine>(k: u8) -> Vec<G1Affine> {
-    debug_assert!(k < 64);
-    let n: u64 = 1 << k;
+const SEED: [u8; 16] = [
+    0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc, 0xe5,
+];
 
-    let mut rng = XorShiftRng::from_seed([
-        0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc,
-        0xe5,
-    ]);
+const SINGLECORE_RANGE: [u8; 6] = [3, 8, 10, 12, 14, 16];
 
-    (0..n).map(|_n| G1Affine::random(&mut rng)).collect()
-}
+const MULTICORE_RANGE: [u8; 9] = [3, 8, 10, 12, 14, 16, 18, 20, 22];
 
-#[cfg(not(feature = "multicore"))]
-const RANGE: [u8; 6] = [3, 8, 10, 12 /*(Ethereum KZG / EIP 4844)*/, 14, 16];
-#[cfg(feature = "multicore")]
-const RANGE: [u8; 9] = [
-    3, 8, 10, 12, /*(Ethereum KZG / EIP 4844)*/
-    14, 16, 18, 20, 22,
-];
+fn singlecore(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm/singlecore");
+    let mut rng = XorShiftRng::from_seed(SEED);
+    for k in SINGLECORE_RANGE {
+        group
+            .bench_function(BenchmarkId::new("k", k), |b| {
+                assert!(k < 64);
+                let n: u64 = 1 << k;
+
+                let bases: Vec<_> = (0..n).map(|_| Point::random(&mut rng)).collect();
+                let coeffs: Vec<_> = (0..n).map(|_| Scalar::random(&mut rng)).collect();
+                let mut acc = Point::identity().into();
 
-fn criterion_benchmark(c: &mut Criterion) {
-    let mut group = c.benchmark_group("msm");
-    let rng = OsRng;
-    for k in RANGE {
+                b.iter(|| multiexp_serial(&coeffs, &bases, &mut black_box(acc)));
+            })
+            .sample_size(10);
+    }
+}
+
+fn multicore(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm/multicore");
+    let mut rng = XorShiftRng::from_seed(SEED);
+    for k in MULTICORE_RANGE {
         group
             .bench_function(BenchmarkId::new("k", k), |b| {
-                let mut g = random_curve_points::<G1Affine>(k);
-                let half_len = g.len() / 2;
-                let (g_lo, g_hi) = g.split_at_mut(half_len);
-                let coeff_1 = Scalar::random(rng);
-                let coeff_2 = Scalar::random(rng);
+                assert!(k < 64);
+                let n: u64 = 1 << k;
+
+                let bases: Vec<_> = (0..n).map(|_| Point::random(&mut rng)).collect();
+                let coeffs: Vec<_> = (0..n).map(|_| Scalar::random(&mut rng)).collect();
 
                 b.iter(|| {
-                    for (g_lo, g_hi) in zip(g_lo.iter(), g_hi.iter()) {
-                        best_multiexp(&[black_box(coeff_1), black_box(coeff_2)], &[*g_lo, *g_hi]);
-                    }
+                    best_multiexp(&coeffs, &bases);
                 })
             })
             .sample_size(10);
     }
 }
 
-criterion_group!(benches, criterion_benchmark);
+criterion_group!(benches, singlecore, multicore);
 criterion_main!(benches);

From 2bc3c1750ecf45c39591da082135d09d3dd32732 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Tue, 19 Sep 2023 16:38:42 +0800
Subject: [PATCH 06/11] Use slicing and parallelism to to decrease running
 time.

Laptop measurements:
k=22: 109 sec
k=16:   1 sec
---
 benches/msm.rs | 73 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index 9c9c8b3a..d075a0b5 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -17,8 +17,11 @@ use ff::Field;
 use group::prime::PrimeCurveAffine;
 use halo2curves::bn256::{Fr as Scalar, G1Affine as Point};
 use halo2curves::msm::{best_multiexp, multiexp_serial};
+use maybe_rayon::current_thread_index;
+use maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator};
 use rand_core::SeedableRng;
 use rand_xorshift::XorShiftRng;
+use std::time::{Duration, SystemTime};
 
 const SEED: [u8; 16] = [
     0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc, 0xe5,
@@ -28,20 +31,71 @@ const SINGLECORE_RANGE: [u8; 6] = [3, 8, 10, 12, 14, 16];
 
 const MULTICORE_RANGE: [u8; 9] = [3, 8, 10, 12, 14, 16, 18, 20, 22];
 
+/// This do get called twice, but the total running time entirely dominated by the larger instance.
+fn get_data(k: u8) -> (Vec<Scalar>, Vec<Point>) {
+    let n: u64 = {
+        assert!(k < 64);
+        1 << k
+    };
+
+    println!(
+        "\n\nCoefficient and curve point generation starting.  {} coefficient-points pairs needed",
+        n
+    );
+    let timer = SystemTime::now();
+    let coeffs = (0..n)
+        .into_par_iter()
+        .map_init(
+            || {
+                let mut thread_seed = SEED.clone();
+                let uniq = current_thread_index().unwrap().to_ne_bytes();
+                assert!(std::mem::size_of::<usize>() == 8);
+                for i in 0..uniq.len() {
+                    thread_seed[i] += uniq[i];
+                    thread_seed[i + 8] += uniq[i];
+                }
+                XorShiftRng::from_seed(thread_seed)
+            },
+            |mut rng, _| Scalar::random(&mut rng),
+        )
+        .collect();
+    let bases = (0..n)
+        .into_par_iter()
+        .map_init(
+            || {
+                let mut thread_seed = SEED.clone();
+                let uniq = current_thread_index().unwrap().to_ne_bytes();
+                assert!(std::mem::size_of::<usize>() == 8);
+                for i in 0..uniq.len() {
+                    thread_seed[i] += uniq[i];
+                    thread_seed[i + 8] += uniq[i];
+                }
+                XorShiftRng::from_seed(thread_seed)
+            },
+            |mut rng, _| Point::random(&mut rng),
+        )
+        .collect();
+    let end = timer.elapsed().unwrap();
+    println!(
+        "Coefficient and curve point generation took: {} sec.\n\n",
+        end.as_secs()
+    );
+
+    return (coeffs, bases);
+}
+
 fn singlecore(c: &mut Criterion) {
     let mut group = c.benchmark_group("msm/singlecore");
-    let mut rng = XorShiftRng::from_seed(SEED);
+    let (coeffs, bases) = get_data(*SINGLECORE_RANGE.iter().max().unwrap());
     for k in SINGLECORE_RANGE {
         group
             .bench_function(BenchmarkId::new("k", k), |b| {
                 assert!(k < 64);
-                let n: u64 = 1 << k;
+                let n: usize = 1 << k;
 
-                let bases: Vec<_> = (0..n).map(|_| Point::random(&mut rng)).collect();
-                let coeffs: Vec<_> = (0..n).map(|_| Scalar::random(&mut rng)).collect();
                 let mut acc = Point::identity().into();
 
-                b.iter(|| multiexp_serial(&coeffs, &bases, &mut black_box(acc)));
+                b.iter(|| multiexp_serial(&coeffs[..n], &bases[..n], &mut black_box(acc)));
             })
             .sample_size(10);
     }
@@ -49,18 +103,15 @@ fn singlecore(c: &mut Criterion) {
 
 fn multicore(c: &mut Criterion) {
     let mut group = c.benchmark_group("msm/multicore");
-    let mut rng = XorShiftRng::from_seed(SEED);
+    let (coeffs, bases) = get_data(*MULTICORE_RANGE.iter().max().unwrap());
     for k in MULTICORE_RANGE {
         group
             .bench_function(BenchmarkId::new("k", k), |b| {
                 assert!(k < 64);
-                let n: u64 = 1 << k;
-
-                let bases: Vec<_> = (0..n).map(|_| Point::random(&mut rng)).collect();
-                let coeffs: Vec<_> = (0..n).map(|_| Scalar::random(&mut rng)).collect();
+                let n: usize = 1 << k;
 
                 b.iter(|| {
-                    best_multiexp(&coeffs, &bases);
+                    best_multiexp(&coeffs[..n], &bases[..n]);
                 })
             })
             .sample_size(10);

From 2621efe0b281735779711fc9de59e0c5e7cd1f0f Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Wed, 20 Sep 2023 14:06:14 +0800
Subject: [PATCH 07/11] Refactor msm

---
 benches/msm.rs | 63 ++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index d075a0b5..d40994ab 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -12,7 +12,7 @@
 #[macro_use]
 extern crate criterion;
 
-use criterion::{black_box, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion};
 use ff::Field;
 use group::prime::PrimeCurveAffine;
 use halo2curves::bn256::{Fr as Scalar, G1Affine as Point};
@@ -21,33 +21,28 @@ use maybe_rayon::current_thread_index;
 use maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator};
 use rand_core::SeedableRng;
 use rand_xorshift::XorShiftRng;
-use std::time::{Duration, SystemTime};
+use std::time::SystemTime;
 
+const SAMPLE_SIZE: usize = 10;
+const SINGLECORE_RANGE: [u8; 6] = [3, 8, 10, 12, 14, 16];
+const MULTICORE_RANGE: [u8; 9] = [3, 8, 10, 12, 14, 16, 18, 20, 22];
 const SEED: [u8; 16] = [
     0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc, 0xe5,
 ];
 
-const SINGLECORE_RANGE: [u8; 6] = [3, 8, 10, 12, 14, 16];
-
-const MULTICORE_RANGE: [u8; 9] = [3, 8, 10, 12, 14, 16, 18, 20, 22];
-
-/// This do get called twice, but the total running time entirely dominated by the larger instance.
-fn get_data(k: u8) -> (Vec<Scalar>, Vec<Point>) {
+fn generate_coefficients_and_curvepoints(k: u8) -> (Vec<Scalar>, Vec<Point>) {
     let n: u64 = {
         assert!(k < 64);
         1 << k
     };
 
-    println!(
-        "\n\nCoefficient and curve point generation starting.  {} coefficient-points pairs needed",
-        n
-    );
+    println!("\n\nGenerating 2^{k} = {n} coefficients and curve points..",);
     let timer = SystemTime::now();
     let coeffs = (0..n)
         .into_par_iter()
         .map_init(
             || {
-                let mut thread_seed = SEED.clone();
+                let mut thread_seed = SEED;
                 let uniq = current_thread_index().unwrap().to_ne_bytes();
                 assert!(std::mem::size_of::<usize>() == 8);
                 for i in 0..uniq.len() {
@@ -56,14 +51,14 @@ fn get_data(k: u8) -> (Vec<Scalar>, Vec<Point>) {
                 }
                 XorShiftRng::from_seed(thread_seed)
             },
-            |mut rng, _| Scalar::random(&mut rng),
+            |rng, _| Scalar::random(rng),
         )
         .collect();
     let bases = (0..n)
         .into_par_iter()
         .map_init(
             || {
-                let mut thread_seed = SEED.clone();
+                let mut thread_seed = SEED;
                 let uniq = current_thread_index().unwrap().to_ne_bytes();
                 assert!(std::mem::size_of::<usize>() == 8);
                 for i in 0..uniq.len() {
@@ -72,51 +67,49 @@ fn get_data(k: u8) -> (Vec<Scalar>, Vec<Point>) {
                 }
                 XorShiftRng::from_seed(thread_seed)
             },
-            |mut rng, _| Point::random(&mut rng),
+            |rng, _| Point::random(rng),
         )
         .collect();
     let end = timer.elapsed().unwrap();
-    println!(
-        "Coefficient and curve point generation took: {} sec.\n\n",
+    println!("Generating 2^{k} = {n} coefficients and curve points took: {} sec.\n\n",
         end.as_secs()
     );
 
-    return (coeffs, bases);
+    (coeffs, bases)
 }
 
-fn singlecore(c: &mut Criterion) {
-    let mut group = c.benchmark_group("msm/singlecore");
-    let (coeffs, bases) = get_data(*SINGLECORE_RANGE.iter().max().unwrap());
+fn msm(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm");
+    let max_k = *SINGLECORE_RANGE
+        .iter()
+        .chain(MULTICORE_RANGE.iter())
+        .max()
+        .unwrap_or(&16);
+    let (coeffs, bases) = generate_coefficients_and_curvepoints(max_k);
+
     for k in SINGLECORE_RANGE {
         group
-            .bench_function(BenchmarkId::new("k", k), |b| {
+            .bench_function(BenchmarkId::new("singlecore", k), |b| {
                 assert!(k < 64);
                 let n: usize = 1 << k;
-
                 let mut acc = Point::identity().into();
-
-                b.iter(|| multiexp_serial(&coeffs[..n], &bases[..n], &mut black_box(acc)));
+                b.iter(|| multiexp_serial(&coeffs[..n], &bases[..n], &mut acc));
             })
             .sample_size(10);
     }
-}
-
-fn multicore(c: &mut Criterion) {
-    let mut group = c.benchmark_group("msm/multicore");
-    let (coeffs, bases) = get_data(*MULTICORE_RANGE.iter().max().unwrap());
     for k in MULTICORE_RANGE {
         group
-            .bench_function(BenchmarkId::new("k", k), |b| {
+            .bench_function(BenchmarkId::new("multicore", k), |b| {
                 assert!(k < 64);
                 let n: usize = 1 << k;
-
                 b.iter(|| {
                     best_multiexp(&coeffs[..n], &bases[..n]);
                 })
             })
-            .sample_size(10);
+            .sample_size(SAMPLE_SIZE);
     }
+    group.finish();
 }
 
-criterion_group!(benches, singlecore, multicore);
+criterion_group!(benches, msm);
 criterion_main!(benches);

From 16ae1468cdcebd32a6f00375efd40c7a002b0b2a Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Wed, 20 Sep 2023 14:06:44 +0800
Subject: [PATCH 08/11] Refactor fft

---
 benches/fft.rs | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/benches/fft.rs b/benches/fft.rs
index 459b9494..7dff3f0f 100644
--- a/benches/fft.rs
+++ b/benches/fft.rs
@@ -1,24 +1,45 @@
 #[macro_use]
 extern crate criterion;
 
-use group::ff::Field;
-use halo2curves::{fft::best_fft, pasta::Fp};
-
 use criterion::{BenchmarkId, Criterion};
+use group::ff::Field;
+use halo2curves::bn256::Fr as Scalar;
+use halo2curves::fft::best_fft;
 use rand_core::OsRng;
+use std::ops::Range;
+use std::time::SystemTime;
+
+const RANGE: Range<u32> = 3..19;
+
+fn generate_data(k: u32) -> Vec<Scalar> {
+    let n = 1 << k;
+    let timer = SystemTime::now();
+    println!("\n\nGenerating 2^{k} = {n} values..",);
+    let data: Vec<Scalar> = (0..n).map(|_| Scalar::random(OsRng)).collect();
+    let end = timer.elapsed().unwrap();
+    println!(
+        "Generating 2^{k} = {n} values took: {} sec.\n\n",
+        end.as_secs()
+    );
+    data
+}
 
-fn criterion_benchmark(c: &mut Criterion) {
+fn fft(c: &mut Criterion) {
+    let max_k = RANGE.max().unwrap_or(16);
+    let mut data = generate_data(max_k);
+    let omega = Scalar::random(OsRng);
     let mut group = c.benchmark_group("fft");
-    for k in 3..19 {
+    for k in RANGE {
         group.bench_function(BenchmarkId::new("k", k), |b| {
-            let mut a = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::<Vec<_>>();
-            let omega = Fp::random(OsRng); // would be weird if this mattered
+            let n = 1 << k;
+            assert!(n <= data.len());
             b.iter(|| {
-                best_fft(&mut a, omega, k as u32);
+                best_fft(&mut data[..n], omega, k);
             });
         });
     }
+    group.finish();
 }
 
-criterion_group!(benches, criterion_benchmark);
+criterion_group!(benches, fft);
 criterion_main!(benches);

From a5eab13a7ff76a172cafd38b034bc60b9fa61339 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Wed, 20 Sep 2023 14:31:41 +0800
Subject: [PATCH 09/11] Update module comments

---
 benches/fft.rs | 12 ++++++++++++
 benches/msm.rs |  6 +++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/benches/fft.rs b/benches/fft.rs
index 7dff3f0f..a250308d 100644
--- a/benches/fft.rs
+++ b/benches/fft.rs
@@ -1,3 +1,15 @@
+//! This benchmarks Fast-Fourier Transform (FFT).
+//! Since it is over a finite field, it is actually the Number Theoretical
+//! Transform (NNT).  It uses the `Fr` scalar field from the BN256 curve.
+//!
+//! To run this benchmark:
+//!
+//!     cargo bench -- fft
+//!
+//! Caveat:  The multicore benchmark assumes:
+//!     1. a multi-core system
+//!     2. that the `multicore` feature is enabled.  It is by default.
+
 #[macro_use]
 extern crate criterion;
 
diff --git a/benches/msm.rs b/benches/msm.rs
index d40994ab..3d7f7581 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -5,9 +5,9 @@
 //!
 //!     cargo bench -- msm
 //!
-//! Caveat:  `multicore` should be read as _allowing_ for multicore computation --
-//! not enforcing it.
-//!
+//! Caveat:  The multicore benchmark assumes:
+//!     1. a multi-core system
+//!     2. that the `multicore` feature is enabled.  It is by default.
 
 #[macro_use]
 extern crate criterion;

From 714e164da60303eb006e2087fad732023b2e1223 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Wed, 20 Sep 2023 18:09:07 +0800
Subject: [PATCH 10/11] Fix formatting

---
 benches/msm.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benches/msm.rs b/benches/msm.rs
index 3d7f7581..c78952b7 100644
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -71,7 +71,8 @@ fn generate_coefficients_and_curvepoints(k: u8) -> (Vec<Scalar>, Vec<Point>) {
         )
         .collect();
     let end = timer.elapsed().unwrap();
-    println!("Generating 2^{k} = {n} coefficients and curve points took: {} sec.\n\n",
+    println!(
+        "Generating 2^{k} = {n} coefficients and curve points took: {} sec.\n\n",
         end.as_secs()
     );
 

From 70924514ac84cc48dcff6e93111b7ff527ccb061 Mon Sep 17 00:00:00 2001
From: Einar Rasmussen <einar@taiko.xyz>
Date: Thu, 21 Sep 2023 17:58:53 +0800
Subject: [PATCH 11/11] Implement suggestion for fixing CI

---
 Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Cargo.toml b/Cargo.toml
index b722272d..06edc850 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,3 +77,4 @@ harness = false
 [[bench]]
 name = "msm"
 harness = false
+required-features = ["multicore"]