diff --git a/Cargo.lock b/Cargo.lock index cba1364190..933cd94596 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -1765,7 +1765,6 @@ dependencies = [ name = "poly-commitment" version = "0.1.0" dependencies = [ - "ark-bn254", "ark-ec", "ark-ff", "ark-poly", @@ -1787,6 +1786,7 @@ dependencies = [ "rmp-serde", "serde", "serde_with", + "smallvec", "thiserror", ] @@ -2262,6 +2262,11 @@ dependencies = [ "autocfg", ] +[[package]] +name = "smallvec" +version = "2.0.0-alpha.9" +source = "git+https://github.com/servo/rust-smallvec.git#a176a870987f61b04e001a7c4d0863fdeb427083" + [[package]] name = "smawk" version = "0.3.1" diff --git a/curves/Cargo.toml b/curves/Cargo.toml index 4bbf45f798..8e6fca5a81 100644 --- a/curves/Cargo.toml +++ b/curves/Cargo.toml @@ -17,3 +17,6 @@ ark-ff = { version = "0.3.0", features = ["parallel", "asm"] } rand = { version = "0.8.0", default-features = false } ark-algebra-test-templates = "0.3.0" ark-std = "0.3.0" + +[features] +32x9 = [] diff --git a/curves/src/pasta/fields/fp.rs b/curves/src/pasta/fields/fp.rs index 8560087ade..5365cf232b 100644 --- a/curves/src/pasta/fields/fp.rs +++ b/curves/src/pasta/fields/fp.rs @@ -2,71 +2,110 @@ use ark_ff::{biginteger::BigInteger256 as BigInteger, FftParameters, Fp256, Fp25 pub type Fp = Fp256; +#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct FpParameters; impl Fp256Parameters for FpParameters {} +#[rustfmt::skip] impl FftParameters for FpParameters { type BigInt = BigInteger; const TWO_ADICITY: u32 = 32; - #[rustfmt::skip] - const TWO_ADIC_ROOT_OF_UNITY: BigInteger = BigInteger([ - 0xa28db849bad6dbf0, 0x9083cd03d3b539df, 0xfba6b9ca9dc8448e, 0x3ec928747b89c6da - ]); + const TWO_ADIC_ROOT_OF_UNITY: BigInteger = { + const TWO_ADIC_ROOT_OF_UNITY: Fp = ark_ff::field_new!(Fp, "19814229590243028906643993866117402072516588566294623396325693409366934201135"); + TWO_ADIC_ROOT_OF_UNITY.0 + }; } -impl ark_ff::FpParameters for FpParameters { - // 28948022309329048855892746252171976963363056481941560715954676764349967630337 - const MODULUS: BigInteger = BigInteger([ - 0x992d30ed00000001, - 0x224698fc094cf91b, - 0x0, - 0x4000000000000000, - ]); - - const R: BigInteger = BigInteger([ - 0x34786d38fffffffd, - 0x992c350be41914ad, - 0xffffffffffffffff, - 0x3fffffffffffffff, - ]); - - const R2: BigInteger = BigInteger([ - 0x8c78ecb30000000f, - 0xd7d30dbd8b0de0e7, - 0x7797a99bc3c95d18, - 0x96d41af7b9cb714, - ]); - - const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger([ - 0xcc96987680000000, - 0x11234c7e04a67c8d, - 0x0, - 0x2000000000000000, - ]); - - // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T - const T: BigInteger = BigInteger([0x94cf91b992d30ed, 0x224698fc, 0x0, 0x40000000]); - - const T_MINUS_ONE_DIV_TWO: BigInteger = - BigInteger([0x4a67c8dcc969876, 0x11234c7e, 0x0, 0x20000000]); - - // GENERATOR = 5 - const GENERATOR: BigInteger = BigInteger([ - 0xa1a55e68ffffffed, - 0x74c2a54b4f4982f3, - 0xfffffffffffffffd, - 0x3fffffffffffffff, - ]); - - const MODULUS_BITS: u32 = 255; - - const CAPACITY: u32 = Self::MODULUS_BITS - 1; +#[cfg(not(any(target_family = "wasm", feature = "32x9")))] +pub mod native { + use super::*; + + impl ark_ff::FpParameters for FpParameters { + // 28948022309329048855892746252171976963363056481941560715954676764349967630337 + const MODULUS: BigInteger = BigInteger::new([ + 0x992d30ed00000001, + 0x224698fc094cf91b, + 0x0, + 0x4000000000000000, + ]); + const R: BigInteger = BigInteger::new([ + 0x34786d38fffffffd, + 0x992c350be41914ad, + 0xffffffffffffffff, + 0x3fffffffffffffff, + ]); + const R2: BigInteger = BigInteger::new([ + 0x8c78ecb30000000f, + 0xd7d30dbd8b0de0e7, + 0x7797a99bc3c95d18, + 0x96d41af7b9cb714, + ]); + const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0xcc96987680000000, + 0x11234c7e04a67c8d, + 0x0, + 0x2000000000000000, + ]); + // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T + const T: BigInteger = BigInteger::new([0x94cf91b992d30ed, 0x224698fc, 0x0, 0x40000000]); + const T_MINUS_ONE_DIV_TWO: BigInteger = + BigInteger::new([0x4a67c8dcc969876, 0x11234c7e, 0x0, 0x20000000]); + // GENERATOR = 5 + const GENERATOR: BigInteger = BigInteger::new([ + 0xa1a55e68ffffffed, + 0x74c2a54b4f4982f3, + 0xfffffffffffffffd, + 0x3fffffffffffffff, + ]); + const MODULUS_BITS: u32 = 255; + const CAPACITY: u32 = Self::MODULUS_BITS - 1; + const REPR_SHAVE_BITS: u32 = 1; + // -(MODULUS^{-1} mod 2^64) mod 2^64 + const INV: u64 = 11037532056220336127; + } +} - const REPR_SHAVE_BITS: u32 = 1; +#[cfg(any(target_family = "wasm", feature = "32x9"))] +pub mod x32x9 { + use super::*; - // -(MODULUS^{-1} mod 2^64) mod 2^64 - const INV: u64 = 11037532056220336127; + #[rustfmt::skip] + impl ark_ff::FpParameters for FpParameters { + // 28948022309329048855892746252171976963363056481941560715954676764349967630337 + const MODULUS: BigInteger = BigInteger::new([ + 0x1, 0x9698768, 0x133e46e6, 0xd31f812, 0x224, 0x0, 0x0, 0x0, 0x400000, + ]); + const R: BigInteger = BigInteger::new([ + 0x1fffff81, 0x14a5d367, 0x141ad3c0, 0x1435eec5, 0x1ffeefef, 0x1fffffff, 0x1fffffff, + 0x1fffffff, 0x3fffff, + ]); + const R2: BigInteger = BigInteger::new([ + 0x3b6a, 0x19c10910, 0x1a6a0188, 0x12a4fd88, 0x634b36d, 0x178792ba, 0x7797a99, 0x1dce5b8a, + 0x3506bd, + ]); + // TODO + const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0x0, 0x4b4c3b4, 0x99f2373, 0x698fc09, 0x112, 0x0, 0x0, 0x0, 0x200000, + ]); + // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T + const T: BigInteger = BigInteger::new([ + 0x192d30ed, 0xa67c8dc, 0x11a63f02, 0x44, 0x0, 0x0, 0x0, 0x80000, 0x0, + ]); + const T_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0xc969876, 0x533e46e, 0x8d31f81, 0x22, 0x0, 0x0, 0x0, 0x40000, 0x0, + ]); + // GENERATOR = 5 + const GENERATOR: BigInteger = { + const FIVE: Fp = ark_ff::field_new!(Fp, "5"); + FIVE.0 + }; + const MODULUS_BITS: u32 = 255; + const CAPACITY: u32 = Self::MODULUS_BITS - 1; + const REPR_SHAVE_BITS: u32 = 1; + // -(MODULUS^{-1} mod 2^64) mod 2^64 + const INV: u64 = 0x1fffffff; + } } diff --git a/curves/src/pasta/fields/fq.rs b/curves/src/pasta/fields/fq.rs index 59a0ced05b..80d027a9b7 100644 --- a/curves/src/pasta/fields/fq.rs +++ b/curves/src/pasta/fields/fq.rs @@ -1,73 +1,110 @@ -use ark_ff::{ - biginteger::BigInteger256 as BigInteger, FftParameters, Fp256, Fp256Parameters, FpParameters, -}; - -pub struct FqParameters; +use ark_ff::{biginteger::BigInteger256 as BigInteger, FftParameters, Fp256, Fp256Parameters}; pub type Fq = Fp256; +#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct FqParameters; + impl Fp256Parameters for FqParameters {} + +#[rustfmt::skip] impl FftParameters for FqParameters { type BigInt = BigInteger; const TWO_ADICITY: u32 = 32; - #[rustfmt::skip] - const TWO_ADIC_ROOT_OF_UNITY: BigInteger = BigInteger([ - 0x218077428c9942de, 0xcc49578921b60494, 0xac2e5d27b2efbee2, 0xb79fa897f2db056 - ]); + const TWO_ADIC_ROOT_OF_UNITY: BigInteger = { + const TWO_ADIC_ROOT_OF_UNITY: Fq = ark_ff::field_new!(Fq, "20761624379169977859705911634190121761503565370703356079647768903521299517535"); + TWO_ADIC_ROOT_OF_UNITY.0 + }; } -impl FpParameters for FqParameters { - // 28948022309329048855892746252171976963363056481941647379679742748393362948097 - const MODULUS: BigInteger = BigInteger([ - 0x8c46eb2100000001, - 0x224698fc0994a8dd, - 0x0, - 0x4000000000000000, - ]); - - const R: BigInteger = BigInteger([ - 0x5b2b3e9cfffffffd, - 0x992c350be3420567, - 0xffffffffffffffff, - 0x3fffffffffffffff, - ]); - - const R2: BigInteger = BigInteger([ - 0xfc9678ff0000000f, - 0x67bb433d891a16e3, - 0x7fae231004ccf590, - 0x96d41af7ccfdaa9, - ]); - const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger([ - 0xc623759080000000, - 0x11234c7e04ca546e, - 0x0, - 0x2000000000000000, - ]); - - // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T - - const T: BigInteger = BigInteger([0x994a8dd8c46eb21, 0x224698fc, 0x0, 0x40000000]); - - const T_MINUS_ONE_DIV_TWO: BigInteger = - BigInteger([0x4ca546ec6237590, 0x11234c7e, 0x0, 0x20000000]); - - // GENERATOR = 5 - const GENERATOR: BigInteger = BigInteger([ - 0x96bc8c8cffffffed, - 0x74c2a54b49f7778e, - 0xfffffffffffffffd, - 0x3fffffffffffffff, - ]); - - const MODULUS_BITS: u32 = 255; - - const CAPACITY: u32 = Self::MODULUS_BITS - 1; +#[cfg(not(any(target_family = "wasm", feature = "32x9")))] +pub mod native { + use super::*; + + impl ark_ff::FpParameters for FqParameters { + // 28948022309329048855892746252171976963363056481941647379679742748393362948097 + const MODULUS: BigInteger = BigInteger::new([ + 0x8c46eb2100000001, + 0x224698fc0994a8dd, + 0x0, + 0x4000000000000000, + ]); + const R: BigInteger = BigInteger::new([ + 0x5b2b3e9cfffffffd, + 0x992c350be3420567, + 0xffffffffffffffff, + 0x3fffffffffffffff, + ]); + const R2: BigInteger = BigInteger::new([ + 0xfc9678ff0000000f, + 0x67bb433d891a16e3, + 0x7fae231004ccf590, + 0x96d41af7ccfdaa9, + ]); + const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0xc623759080000000, + 0x11234c7e04ca546e, + 0x0, + 0x2000000000000000, + ]); + // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T + const T: BigInteger = BigInteger::new([0x994a8dd8c46eb21, 0x224698fc, 0x0, 0x40000000]); + const T_MINUS_ONE_DIV_TWO: BigInteger = + BigInteger::new([0x4ca546ec6237590, 0x11234c7e, 0x0, 0x20000000]); + // GENERATOR = 5 + const GENERATOR: BigInteger = BigInteger::new([ + 0x96bc8c8cffffffed, + 0x74c2a54b49f7778e, + 0xfffffffffffffffd, + 0x3fffffffffffffff, + ]); + const MODULUS_BITS: u32 = 255; + const CAPACITY: u32 = Self::MODULUS_BITS - 1; + const REPR_SHAVE_BITS: u32 = 1; + // -(MODULUS^{-1} mod 2^64) mod 2^64 + const INV: u64 = 10108024940646105087; + } +} - const REPR_SHAVE_BITS: u32 = 1; +#[cfg(any(target_family = "wasm", feature = "32x9"))] +pub mod x32x9 { + use super::*; - // -(MODULUS^{-1} mod 2^64) mod 2^64 - const INV: u64 = 10108024940646105087; + #[rustfmt::skip] + impl ark_ff::FpParameters for FqParameters { + // 28948022309329048855892746252171976963363056481941560715954676764349967630337 + const MODULUS: BigInteger = BigInteger::new([ + 0x1, 0x2375908, 0x52a3763, 0xd31f813, 0x224, 0x0, 0x0, 0x0, 0x400000, + ]); + const R: BigInteger = BigInteger::new([ + 0x1fffff81, 0x68ad507, 0x100e85da, 0x1435ee7e, 0x1ffeefef, 0x1fffffff, 0x1fffffff, + 0x1fffffff, 0x3fffff, + ]); + const R2: BigInteger = BigInteger::new([ + 0x3b6a, 0x2b1b550, 0x1027888a, 0x1ea4ed96, 0x418ad7a, 0x999eb, 0x17fae231, + 0x1e67ed54, 0x3506bd, + ]); + const MODULUS_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0x0, 0x111bac84, 0x12951bb1, 0x698fc09, 0x112, 0x0, 0x0, 0x0, 0x200000, + ]); + // T and T_MINUS_ONE_DIV_TWO, where MODULUS - 1 = 2^S * T + const T: BigInteger = BigInteger::new([ + 0xc46eb21, 0xca546ec, 0x11a63f02, 0x44, 0x0, 0x0, 0x0, 0x80000, 0x0, + ]); + const T_MINUS_ONE_DIV_TWO: BigInteger = BigInteger::new([ + 0x6237590, 0x652a376, 0x8d31f81, 0x22, 0x0, 0x0, 0x0, 0x40000, 0x0, + ]); + // GENERATOR = 5 + const GENERATOR: BigInteger = { + const FIVE: Fq = ark_ff::field_new!(Fq, "5"); + FIVE.0 + }; + const MODULUS_BITS: u32 = 255; + const CAPACITY: u32 = Self::MODULUS_BITS - 1; + const REPR_SHAVE_BITS: u32 = 1; + // -(MODULUS^{-1} mod 2^64) mod 2^64 + const INV: u64 = 0x1fffffff; + } } diff --git a/groupmap/src/lib.rs b/groupmap/src/lib.rs index cc310d9ab8..da3e35f67b 100644 --- a/groupmap/src/lib.rs +++ b/groupmap/src/lib.rs @@ -26,6 +26,8 @@ pub trait GroupMap { fn setup() -> Self; fn to_group(&self, u: F) -> (F, F); fn batch_to_group_x(&self, ts: Vec) -> Vec<[F; 3]>; + /// For debug only + fn composition(&self) -> Vec; } #[derive(Clone, Copy)] @@ -127,6 +129,25 @@ fn get_xy( } impl GroupMap for BWParameters { + /// For debug only + fn composition(&self) -> Vec { + let Self { + u, + fu, + sqrt_neg_three_u_squared_minus_u_over_2, + sqrt_neg_three_u_squared, + inv_three_u_squared, + } = self; + + vec![ + *u, + *fu, + *sqrt_neg_three_u_squared_minus_u_over_2, + *sqrt_neg_three_u_squared, + *inv_three_u_squared, + ] + } + fn setup() -> Self { assert!(G::COEFF_A.is_zero()); diff --git a/kimchi/src/circuits/expr.rs b/kimchi/src/circuits/expr.rs index f331d96b1b..cbe7fc28fc 100644 --- a/kimchi/src/circuits/expr.rs +++ b/kimchi/src/circuits/expr.rs @@ -1021,24 +1021,24 @@ fn unnormalized_lagrange_evals( impl<'a, F: FftField> EvalResult<'a, F> { fn init_ F>( - res_domain: (Domain, D), + res_domain: (Domain, &D), g: G, ) -> Evaluations> { let n = res_domain.1.size(); Evaluations::>::from_vec_and_domain( (0..n).into_par_iter().map(g).collect(), - res_domain.1, + res_domain.1.clone(), ) } - fn init F>(res_domain: (Domain, D), g: G) -> Self { + fn init F>(res_domain: (Domain, &D), g: G) -> Self { Self::Evals { domain: res_domain.0, evals: Self::init_(res_domain, g), } } - fn add<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, D)) -> EvalResult<'c, F> { + fn add<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, &D)) -> EvalResult<'c, F> { use EvalResult::*; match (self, other) { (Constant(x), Constant(y)) => Constant(x + y), @@ -1074,7 +1074,7 @@ impl<'a, F: FftField> EvalResult<'a, F> { .collect(); Evals { domain: res_domain.0, - evals: Evaluations::>::from_vec_and_domain(v, res_domain.1), + evals: Evaluations::>::from_vec_and_domain(v, res_domain.1.clone()), } } ( @@ -1151,13 +1151,13 @@ impl<'a, F: FftField> EvalResult<'a, F> { Evals { domain: res_domain.0, - evals: Evaluations::>::from_vec_and_domain(v, res_domain.1), + evals: Evaluations::>::from_vec_and_domain(v, res_domain.1.clone()), } } } } - fn sub<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, D)) -> EvalResult<'c, F> { + fn sub<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, &D)) -> EvalResult<'c, F> { use EvalResult::*; match (self, other) { (Constant(x), Constant(y)) => Constant(x - y), @@ -1275,7 +1275,7 @@ impl<'a, F: FftField> EvalResult<'a, F> { } } - fn pow<'b>(self, d: u64, res_domain: (Domain, D)) -> EvalResult<'b, F> { + fn pow<'b>(self, d: u64, res_domain: (Domain, &D)) -> EvalResult<'b, F> { let mut acc = EvalResult::Constant(F::one()); for i in (0..u64::BITS).rev() { acc = acc.square(res_domain); @@ -1288,7 +1288,7 @@ impl<'a, F: FftField> EvalResult<'a, F> { acc } - fn square<'b>(self, res_domain: (Domain, D)) -> EvalResult<'b, F> { + fn square<'b>(self, res_domain: (Domain, &D)) -> EvalResult<'b, F> { use EvalResult::*; match self { Constant(x) => Constant(x.square()), @@ -1312,7 +1312,7 @@ impl<'a, F: FftField> EvalResult<'a, F> { } } - fn mul<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, D)) -> EvalResult<'c, F> { + fn mul<'c>(self, other: EvalResult<'_, F>, res_domain: (Domain, &D)) -> EvalResult<'c, F> { use EvalResult::*; match (self, other) { (Constant(x), Constant(y)) => Constant(x * y), @@ -1424,6 +1424,15 @@ fn get_domain(d: Domain, env: &Environment) -> D { } } +fn get_domain_ref<'a, F: FftField>(d: Domain, env: &'a Environment) -> &'a D { + match d { + Domain::D1 => &env.domain.d1, + Domain::D2 => &env.domain.d2, + Domain::D4 => &env.domain.d4, + Domain::D8 => &env.domain.d8, + } +} + impl Expr> { /// Convenience function for constructing expressions from literal /// field elements. @@ -1713,13 +1722,13 @@ impl Expr { assert_eq!(domain, d); evals } - EvalResult::Constant(x) => EvalResult::init_((d, get_domain(d, env)), |_| x), + EvalResult::Constant(x) => EvalResult::init_((d, get_domain_ref(d, env)), |_| x), EvalResult::SubEvals { evals, domain: d_sub, shift: s, } => { - let res_domain = get_domain(d, env); + let res_domain = get_domain_ref(d, env); let scale = (d_sub as usize) / (d as usize); assert!(scale != 0); EvalResult::init_((d, res_domain), |i| { @@ -1738,7 +1747,7 @@ impl Expr { where 'a: 'b, { - let dom = (d, get_domain(d, env)); + let dom = (d, get_domain_ref(d, env)); let res: EvalResult<'a, F> = match self { Expr::Square(x) => match x.evaluations_helper(cache, d, env) { @@ -1800,10 +1809,11 @@ impl Expr { Expr::Pow(x, p) => { let x = x.evaluations_helper(cache, d, env); match x { - Either::Left(x) => x.pow(*p, (d, get_domain(d, env))), - Either::Right(id) => { - id.get_from(cache).unwrap().pow(*p, (d, get_domain(d, env))) - } + Either::Left(x) => x.pow(*p, (d, get_domain_ref(d, env))), + Either::Right(id) => id + .get_from(cache) + .unwrap() + .pow(*p, (d, get_domain_ref(d, env))), } } Expr::VanishesOnZeroKnowledgeAndPreviousRows => EvalResult::SubEvals { @@ -1837,7 +1847,7 @@ impl Expr { } } Expr::BinOp(op, e1, e2) => { - let dom = (d, get_domain(d, env)); + let dom = (d, get_domain_ref(d, env)); let f = |x: EvalResult, y: EvalResult| match op { Op2::Mul => x.mul(y, dom), Op2::Add => x.add(y, dom), diff --git a/kimchi/src/circuits/polynomials/endomul_scalar.rs b/kimchi/src/circuits/polynomials/endomul_scalar.rs index 701ce892bf..2f63a2e6d9 100644 --- a/kimchi/src/circuits/polynomials/endomul_scalar.rs +++ b/kimchi/src/circuits/polynomials/endomul_scalar.rs @@ -11,7 +11,7 @@ use crate::{ }, curve::KimchiCurve, }; -use ark_ff::{BitIteratorLE, Field, PrimeField}; +use ark_ff::{BigInteger, BitIteratorLE, Field, PrimeField}; use std::array; use std::marker::PhantomData; @@ -228,7 +228,7 @@ pub fn gen_witness( let bits_per_row = 2 * crumbs_per_row; assert_eq!(num_bits % bits_per_row, 0); - let bits_lsb: Vec<_> = BitIteratorLE::new(scalar.into_repr()) + let bits_lsb: Vec<_> = BitIteratorLE::new(scalar.into_repr().to_64x4()) .take(num_bits) .collect(); let bits_msb: Vec<_> = bits_lsb.iter().rev().collect(); diff --git a/poly-commitment/Cargo.toml b/poly-commitment/Cargo.toml index 890555082e..4553007c7f 100644 --- a/poly-commitment/Cargo.toml +++ b/poly-commitment/Cargo.toml @@ -34,10 +34,13 @@ mina-poseidon = { path = "../poseidon", version = "0.1.0" } ocaml = { version = "0.22.2", optional = true } ocaml-gen = { version = "0.1.5", optional = true } +smallvec = { git = "https://github.com/servo/rust-smallvec.git", features = ["std"] } +# crossbeam-channel = "0.5" + [dev-dependencies] colored = "2.0.0" rand_chacha = { version = "0.3.0" } -ark-bn254 = { version = "0.3.0" } +# ark-bn254 = { version = "0.3.0" } [features] ocaml_types = [ "ocaml", "ocaml-gen" ] diff --git a/poly-commitment/src/combine.rs b/poly-commitment/src/combine.rs index 52f7e19f95..d772d28939 100644 --- a/poly-commitment/src/combine.rs +++ b/poly-commitment/src/combine.rs @@ -19,7 +19,7 @@ use ark_ec::{ models::short_weierstrass_jacobian::GroupAffine as SWJAffine, AffineCurve, ProjectiveCurve, SWModelParameters, }; -use ark_ff::{BitIteratorBE, Field, One, PrimeField, Zero}; +use ark_ff::{BigInteger, BitIteratorBE, Field, One, PrimeField, Zero}; use itertools::Itertools; use mina_poseidon::sponge::ScalarChallenge; use rayon::prelude::*; @@ -190,8 +190,8 @@ fn affine_window_combine_base( }; assert!(g1g2.len() == g1.len()); - let windows1 = BitIteratorBE::new(x1.into_repr()).tuples(); - let windows2 = BitIteratorBE::new(x2.into_repr()).tuples(); + let windows1 = BitIteratorBE::new(x1.into_repr().to_64x4()).tuples(); + let windows2 = BitIteratorBE::new(x2.into_repr().to_64x4()).tuples(); let mut points = vec![SWJAffine::

::zero(); g1.len()]; @@ -295,7 +295,7 @@ fn affine_window_combine_one_endo_base( ) -> Vec> { fn assign(dst: &mut [A], src: &[A]) { let n = dst.len(); - dst[..n].clone_from_slice(&src[..n]); + dst[..n].copy_from_slice(&src[..n]); } fn get_bit(limbs_lsb: &[u64], i: u64) -> u64 { @@ -304,8 +304,11 @@ fn affine_window_combine_one_endo_base( (limbs_lsb[limb as usize] >> j) & 1 } + use ark_ff::BigInteger; + let rep = chal.0.into_repr(); - let r = rep.as_ref(); + let r = rep.to_64x4(); + let r = r.as_ref(); let mut denominators = vec![P::BaseField::zero(); g1.len()]; // acc = 2 (phi(g2) + g2) @@ -371,7 +374,7 @@ fn affine_window_combine_one_base( g2: &[SWJAffine

], x2: P::ScalarField, ) -> Vec> { - let windows2 = BitIteratorBE::new(x2.into_repr()).tuples(); + let windows2 = BitIteratorBE::new(x2.into_repr().to_64x4()).tuples(); let mut points = vec![SWJAffine::

::zero(); g1.len()]; @@ -594,8 +597,8 @@ fn window_shamir( let [_g00_00, g01_00, g10_00, g11_00, g00_01, g01_01, g10_01, g11_01, g00_10, g01_10, g10_10, g11_10, g00_11, g01_11, g10_11, g11_11] = shamir_window_table(g1, g2); - let windows1 = BitIteratorBE::new(x1.into_repr()).tuples(); - let windows2 = BitIteratorBE::new(x2.into_repr()).tuples(); + let windows1 = BitIteratorBE::new(x1.into_repr().to_64x4()).tuples(); + let windows2 = BitIteratorBE::new(x2.into_repr().to_64x4()).tuples(); let mut res = G::Projective::zero(); diff --git a/poly-commitment/src/commitment.rs b/poly-commitment/src/commitment.rs index bb2469b49f..970ce41008 100644 --- a/poly-commitment/src/commitment.rs +++ b/poly-commitment/src/commitment.rs @@ -6,12 +6,13 @@ //! producing the batched opening proof //! 3. Verify batch of batched opening proofs +use crate::msm::call_msm; use crate::srs::endos; use crate::SRS as SRSTrait; use crate::{error::CommitmentError, srs::SRS}; use ark_ec::{ - models::short_weierstrass_jacobian::GroupAffine as SWJAffine, msm::VariableBaseMSM, - AffineCurve, ProjectiveCurve, SWModelParameters, + models::short_weierstrass_jacobian::GroupAffine as SWJAffine, AffineCurve, ProjectiveCurve, + SWModelParameters, }; use ark_ff::{ BigInteger, Field, FpParameters, One, PrimeField, SquareRootField, UniformRand, Zero, @@ -189,7 +190,7 @@ impl<'a, 'b, C: AffineCurve> Sub<&'a PolyComm> for &'b PolyComm { } } -impl PolyComm { +impl PolyComm { pub fn scale(&self, c: C::ScalarField) -> PolyComm { PolyComm { elems: self.elems.iter().map(|g| g.mul(c).into_affine()).collect(), @@ -222,7 +223,7 @@ impl PolyComm { .filter_map(|(com, scalar)| com.elems.get(chunk).map(|c| (c, scalar))) .unzip(); - let chunk_msm = VariableBaseMSM::multi_scalar_mul::(&points, &scalars); + let chunk_msm = call_msm::(&points, &scalars); elems.push(chunk_msm.into_affine()); } @@ -589,7 +590,7 @@ impl SRSTrait for SRS { elems.push(G::zero()); } else { coeffs.chunks(self.g.len()).for_each(|coeffs_chunk| { - let chunk = VariableBaseMSM::multi_scalar_mul(&self.g, coeffs_chunk); + let chunk = call_msm::(&self.g, coeffs_chunk); elems.push(chunk.into_affine()); }); } @@ -738,6 +739,13 @@ impl SRS { let s = b_poly_coefficients(&chal); + debug_assert!(s.len() <= scalars.len()); + + // TODO: implement a better solution at type/wire level, for now we just bail out... + if s.len() > scalars.len() { + return false; + } + let neg_rand_base_i = -rand_base_i; // TERM @@ -808,7 +816,7 @@ impl SRS { // verify the equation let scalars: Vec<_> = scalars.iter().map(|x| x.into_repr()).collect(); - VariableBaseMSM::multi_scalar_mul(&points, &scalars) == G::Projective::zero() + call_msm(&points, &scalars) == G::Projective::zero() } } diff --git a/poly-commitment/src/evaluation_proof.rs b/poly-commitment/src/evaluation_proof.rs index 6b2e9dcfc3..4e7306844e 100644 --- a/poly-commitment/src/evaluation_proof.rs +++ b/poly-commitment/src/evaluation_proof.rs @@ -1,6 +1,7 @@ +use crate::msm::call_msm; use crate::{commitment::*, srs::endos}; use crate::{srs::SRS, PolynomialsToCombine, SRS as _}; -use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve}; +use ark_ec::{AffineCurve, ProjectiveCurve}; use ark_ff::{FftField, Field, One, PrimeField, UniformRand, Zero}; use ark_poly::{univariate::DensePolynomial, UVPolynomial}; use ark_poly::{EvaluationDomain, Evaluations}; @@ -224,25 +225,31 @@ impl SRS { let rand_l = ::rand(rng); let rand_r = ::rand(rng); - let l = VariableBaseMSM::multi_scalar_mul( - &[&g[0..n], &[self.h, u]].concat(), - &[&a[n..], &[rand_l, inner_prod(a_hi, b_lo)]] - .concat() - .iter() - .map(|x| x.into_repr()) - .collect::>(), - ) - .into_affine(); - - let r = VariableBaseMSM::multi_scalar_mul( - &[&g[n..], &[self.h, u]].concat(), - &[&a[0..n], &[rand_r, inner_prod(a_lo, b_hi)]] - .concat() - .iter() - .map(|x| x.into_repr()) - .collect::>(), - ) - .into_affine(); + let call_l = || { + call_msm( + &[&g[0..n], &[self.h, u]].concat(), + &[&a[n..], &[rand_l, inner_prod(a_hi, b_lo)]] + .concat() + .iter() + .map(|x| x.into_repr()) + .collect::>(), + ) + .into_affine() + }; + + let call_r = || { + call_msm( + &[&g[n..], &[self.h, u]].concat(), + &[&a[0..n], &[rand_r, inner_prod(a_lo, b_hi)]] + .concat() + .iter() + .map(|x| x.into_repr()) + .collect::>(), + ) + .into_affine() + }; + + let (l, r) = rayon::join(call_l, call_r); lr.push((l, r)); blinders.push((rand_l, rand_r)); @@ -257,29 +264,33 @@ impl SRS { chals.push(u); chal_invs.push(u_inv); - a = a_hi - .par_iter() - .zip(a_lo) - .map(|(&hi, &lo)| { - // lo + u_inv * hi - let mut res = hi; - res *= u_inv; - res += &lo; - res - }) - .collect(); - - b = b_lo - .par_iter() - .zip(b_hi) - .map(|(&lo, &hi)| { - // lo + u * hi - let mut res = hi; - res *= u; - res += &lo; - res - }) - .collect(); + let call_a = || { + a_hi.par_iter() + .zip(a_lo) + .map(|(&hi, &lo)| { + // lo + u_inv * hi + let mut res = hi; + res *= u_inv; + res += &lo; + res + }) + .collect() + }; + + let call_b = || { + b_lo.par_iter() + .zip(b_hi) + .map(|(&lo, &hi)| { + // lo + u * hi + let mut res = hi; + res *= u; + res += &lo; + res + }) + .collect() + }; + + (a, b) = rayon::join(call_a, call_b); g = G::combine_one_endo(endo_r, endo_q, &g_lo, &g_hi, u_pre); } diff --git a/poly-commitment/src/lib.rs b/poly-commitment/src/lib.rs index fb7f7491ca..47f55a6f5b 100644 --- a/poly-commitment/src/lib.rs +++ b/poly-commitment/src/lib.rs @@ -3,11 +3,12 @@ mod combine; pub mod commitment; pub mod error; pub mod evaluation_proof; +pub mod msm; pub mod pairing_proof; pub mod srs; -#[cfg(test)] -mod tests; +// #[cfg(test)] +// mod tests; pub use commitment::PolyComm; @@ -118,3 +119,1683 @@ pub trait OpenProof: Sized { EFqSponge: FqSponge, RNG: RngCore + CryptoRng; } + +// #[cfg(test)] +// mod tests { +// use std::sync::{atomic::AtomicUsize, Mutex, RwLock}; + +// use ark_ec::{short_weierstrass_jacobian::{GroupAffine, GroupProjective}, AffineCurve, ProjectiveCurve}; +// use ark_ff::{BigInteger256, Field, PrimeField, UniformRand}; +// use mina_curves::pasta::{Fp, Pallas, PallasParameters}; +// use o1_utils::foreign_field::FieldArrayBigUintHelpers; +// use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator}; + +// fn get_rng() -> rand::rngs::StdRng { +// ::seed_from_u64(0) +// } + +// #[allow(clippy::type_complexity)] +// pub fn generate_msm_inputs( +// size: usize, +// ) -> ( +// Vec<::Affine>, +// Vec<::BigInt>, +// ) +// where +// A: AffineCurve, +// { +// let mut rng = get_rng(); +// let scalar_vec = (0..size) +// .map(|_| A::ScalarField::rand(&mut rng).into_repr()) +// .collect(); +// let point_vec = (0..size) +// .map(|_| A::Projective::rand(&mut rng)) +// .collect::>(); +// ( +// ::batch_normalization_into_affine(&point_vec), +// scalar_vec, +// ) +// } + +// #[test] +// fn test_inverses() { +// let mut rng = get_rng(); +// let fp = (0..1_000_000) +// .map(|_| Fp::rand(&mut rng)) +// .collect::>(); +// let now = std::time::Instant::now(); +// for f in fp { +// f.inverse().unwrap(); +// } +// dbg!(now.elapsed()); +// } + +// #[test] +// fn test_alloc() { +// use ark_ff::Zero; + +// let c = 13; +// let zero: GroupProjective = GroupProjective::zero(); + +// { +// let now = std::time::Instant::now(); +// let mut buckets_per_window = vec![vec![zero; (1 << c) - 1]; 20]; +// // let mut buckets_per_window = vec![vec![None::; (1 << c) - 1]; window_starts.len()]; +// // let buckets_per_window2 = buckets_per_window.clone(); +// // let buckets_per_window3 = buckets_per_window.clone(); +// // let buckets_per_window4 = buckets_per_window.clone(); +// eprintln!("ICI time to alloc buckets: {:?}", now.elapsed()); +// } +// } + +// #[test] +// fn test_name() { +// rayon::ThreadPoolBuilder::new().num_threads(32).build_global().unwrap(); + +// // let (mut points, scalars) = generate_msm_inputs::(100_000); +// let (mut points, scalars) = generate_msm_inputs::(65536); +// // dbg!(inputs.len()); + +// let now = std::time::Instant::now(); +// let result = ark_ec::msm::VariableBaseMSM::multi_scalar_mul( +// &points, +// &scalars, +// ).into_affine(); +// let elapsed = now.elapsed(); +// let good = result; +// // assert_result(&result); +// dbg!(result, elapsed); + +// let now = std::time::Instant::now(); +// let result = ark_msm::msm::VariableBaseMSM::multi_scalar_mul::( +// &points, +// &scalars, +// ).into_affine(); +// let elapsed = now.elapsed(); +// dbg!(result, elapsed); +// assert_eq!(good, result); +// // assert_result(&result); + +// let now = std::time::Instant::now(); +// // let result = my_multi_scalar_batch( +// // let result = my_multi_scalar_batch_max_threads( +// let result = call_msm( +// &points, +// &scalars, +// ).into_affine(); +// let elapsed = now.elapsed(); +// // assert_result(&result); +// dbg!(result, elapsed); +// assert_eq!(good, result); + +// // for (index, v) in (0i32..100).enumerate().rev() { +// // println!("index={:?} v={:?}", index, v); +// // } + +// // self.pendings.iter().copied().enumerate().rev() + +// // let now = std::time::Instant::now(); +// // let result = my_multi_scalar_mul2( +// // &points, +// // &scalars, +// // ).into_affine(); +// // let elapsed = now.elapsed(); +// // // assert_result(&result); +// // dbg!(result, elapsed); +// // assert_eq!(good, result); +// } + +// use ark_ff::{One, Zero}; + +// use crate::msm::call_msm; + +// struct Batch<'a> { +// buckets: Vec>, +// /// (index in `buckets`, is_negative, group) +// in_batch: Vec<(usize, bool, &'a GroupAffine)>, +// in_batch_busy_buckets: Vec, +// // inverse_state: Fp, +// // inverses: Vec, + +// inverses: Option, + +// /// (index in `buckets`, is_negative, group) +// pendings: Vec<(usize, bool, &'a GroupAffine)>, +// } + +// struct BatchInverses { +// inverse_state: Fp, +// inverses: Vec, +// } + +// const N_BATCH: usize = 4096; +// const N_COLLISION: usize = 512; + +// impl<'a> Batch<'a> { +// pub fn with_capacity(capacity: usize) -> Self { +// let zero = GroupAffine::zero(); +// Self { +// buckets: vec![zero; capacity], +// in_batch: Vec::with_capacity(N_BATCH), +// in_batch_busy_buckets: vec![false; capacity], +// inverses: Some(BatchInverses { +// inverse_state: Fp::one(), +// inverses: vec![Fp::one(); N_BATCH], +// }), +// pendings: Vec::with_capacity(N_BATCH), +// } +// } + +// fn with_buckets(buckets: Vec>) -> Self { +// let capacity = buckets.capacity(); +// Self { +// buckets, +// in_batch: Vec::with_capacity(N_BATCH), +// in_batch_busy_buckets: vec![false; capacity], +// inverses: Some(BatchInverses { +// inverse_state: Fp::one(), +// inverses: vec![Fp::one(); N_BATCH], +// }), +// pendings: Vec::with_capacity(N_BATCH), +// } +// } + +// fn add_batch(&mut self, batch: Self) { +// let mut buckets = std::mem::take(&mut self.buckets); +// self.add(&mut buckets, batch.buckets.iter()); +// self.buckets = buckets; +// } + +// fn add_in_bucket( +// &mut self, +// bucket: usize, +// is_negative: bool, +// g: &'a GroupAffine +// ) { +// if self.in_batch_busy_buckets[bucket] { +// self.pendings.push((bucket, is_negative, g)); +// } else { +// self.in_batch_busy_buckets[bucket] = true; +// self.in_batch.push((bucket, is_negative, g)); +// } +// } + +// fn batch1( +// // &mut self, +// res: &mut GroupAffine, +// src: &GroupAffine, +// index: usize, +// inverses: &mut BatchInverses, +// ) { +// if res.is_zero() | src.is_zero() { +// return; +// } +// let mut delta_x = src.x - res.x; +// if delta_x.is_zero() { +// let delta_y = src.y - res.y; +// if !delta_y.is_zero() { +// return; +// } +// delta_x = src.y + src.y; +// } +// if inverses.inverse_state.is_zero() { +// inverses.inverses[index].set_one(); +// inverses.inverse_state = delta_x; +// } else { +// inverses.inverses[index] = inverses.inverse_state; +// inverses.inverse_state *= delta_x +// } +// } + +// fn batch2( +// res: &mut GroupAffine, +// src: &GroupAffine, +// index: usize, +// inverses: &mut BatchInverses, +// ) { +// if res.is_zero() | src.is_zero() { +// if !src.is_zero() { +// *res = *src; +// } +// return; +// } +// let mut inverse = inverses.inverses[index]; +// inverse *= inverses.inverse_state; +// let mut delta_x = src.x - res.x; +// let mut delta_y = src.y - res.y; +// if delta_x.is_zero() { +// if !delta_y.is_zero() { +// res.set_zero(); +// return; +// } +// delta_y = src.x.square(); +// delta_y = delta_y + delta_y + delta_y; +// delta_x = src.y.double(); +// } +// inverses.inverse_state *= delta_x; +// let s = delta_y * inverse; +// let ss = s * s; +// res.x = ss - src.x - res.x; +// delta_x = src.x - res.x; +// res.y = s * delta_x; +// res.y -= src.y; +// } + +// fn accumulate(&mut self) { +// use std::ops::Neg; + +// let mut inverses = self.inverses.take().unwrap(); +// inverses.inverse_state = Fp::one(); + +// for (pending_index, (bucket_index, is_neg, group)) in self.in_batch.iter().copied().enumerate() { +// let bucket = &mut self.buckets[bucket_index]; +// let mut group = *group; +// if is_neg { +// group = group.neg(); +// } +// Self::batch1(bucket, &group, pending_index, &mut inverses); +// } + +// inverses.inverse_state = inverses.inverse_state.inverse().unwrap(); + +// for (pending_index, (bucket_index, is_neg, group)) in self.in_batch.iter().copied().enumerate().rev() { +// let bucket = &mut self.buckets[bucket_index]; +// let mut group = *group; +// if is_neg { +// group = group.neg(); +// } +// Self::batch2(bucket, &group, pending_index, &mut inverses); +// } + +// self.in_batch.clear(); +// self.in_batch_busy_buckets.iter_mut().for_each(|b| *b = false); + +// self.pendings.retain(|(bucket, is_neg, g)| { +// if self.in_batch_busy_buckets[*bucket] { +// return true; +// } +// self.in_batch_busy_buckets[*bucket] = true; +// self.in_batch.push((*bucket, *is_neg, g)); +// false +// }); + +// self.inverses = Some(inverses); +// } + +// fn add<'b, S>( +// &mut self, +// res: &mut [GroupAffine], +// src: S, +// ) +// where +// S: Iterator> + Clone + DoubleEndedIterator + ExactSizeIterator, +// { +// let mut inverses = self.inverses.take().unwrap(); +// inverses.inverse_state = Fp::one(); + +// let src2 = src.clone().into_iter(); +// for (index, (res, point)) in res.iter_mut().zip(src2).enumerate() { +// Self::batch1(res, point, index, &mut inverses); +// } + +// inverses.inverse_state = inverses.inverse_state.inverse().unwrap(); + +// for (index, (res, point)) in res.iter_mut().zip(src).enumerate().rev() { +// Self::batch2(res, point, index, &mut inverses); +// } + +// self.inverses = Some(inverses); +// } +// } + +// pub fn my_multi_scalar_batch( +// bases: &[GroupAffine], +// scalars: &[BigInteger256], +// ) -> GroupProjective { +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; +// // panic!(); + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = if size < 32 { +// 3 +// } else { +// ln_without_floats(size) + 2 +// }; +// dbg!(c); + +// let num_bits = < as AffineCurve>::ScalarField as PrimeField>::Params::MODULUS_BITS as usize; +// // let fr_one: BigInteger256 = < as AffineCurve>::ScalarField>::one().into_repr(); + +// let zero = GroupProjective::zero(); +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// dbg!(&window_starts, window_starts.len(), num_bits); + +// let total = 1 << c; +// let half = total >> 1; + +// #[derive(Copy, Clone)] +// struct Digits { +// n: u32, +// } + +// let now = std::time::Instant::now(); +// let digits = scalars.par_iter().map(|scalar| { +// let mut scalar = *scalar; +// let mut carry = 0; +// window_starts.iter().map(|_win_start| { +// let mut digits = scalar.to_64x4()[0] % (1 << c); +// digits += carry; +// if digits > half { +// digits = total - digits; +// carry = 1; +// } else { +// carry = 0; +// } +// let res = Digits { +// n: digits as u32 | ((carry as u32) << 31), +// }; +// scalar.divn(c as u32); +// res +// }).collect::>() +// }).collect::>(); +// eprintln!("digits pre-compute time: {:?}", now.elapsed()); + +// let window_sums: Vec<_> = window_starts +// .par_iter() +// .copied() +// .enumerate() +// .map(|(w_index, w_start)| { + +// let now = std::time::Instant::now(); +// let mut batch = Batch::with_capacity(1 << (c - 1)); +// let elapsed_alloc = now.elapsed(); +// let now = std::time::Instant::now(); + +// let mut nzeros = 0; +// let mut nis_neg = 0; + +// digits.iter().zip(bases).for_each(|(scalar, base)| { +// let Digits { n: digits } = scalar[w_index]; + +// let is_neg = (digits >> 31) != 0; +// let digits = ((digits as u32) & ((-1i32 as u32) >> 1)) as usize; + +// let Some(digits) = digits.checked_sub(1) else { +// nzeros += 1; +// return; +// }; + +// if is_neg { +// nis_neg += 1; +// } + +// batch.add_in_bucket(digits, is_neg, base); + +// if batch.in_batch.len() >= N_BATCH || batch.pendings.len() >= N_COLLISION { +// batch.accumulate(); +// } +// }); + +// while !batch.in_batch.is_empty() || !batch.pendings.is_empty() { +// batch.accumulate(); +// } + +// eprintln!( +// "total alloc: {:?} accum: {:?} nzeros: {:?} nis_neg:{:?} in_batch_cap: {:?} pendings_cap: {:?}", +// elapsed_alloc, now.elapsed(), nzeros, nis_neg, batch.in_batch.capacity(), batch.pendings.capacity(), +// ); + +// let mut res = zero; +// let mut running_sum = GroupProjective::zero(); +// batch.buckets.iter().rev().for_each(|b| { +// running_sum.add_assign_mixed(b); +// res += &running_sum; +// }); +// res +// }) +// .collect(); + +// // We store the sum for the lowest window. +// let lowest = *window_sums.first().unwrap(); + +// // We're traversing windows from high to low. +// lowest +// + &window_sums[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }) +// } + +// pub fn my_multi_scalar_batch_max_threads( +// bases: &[GroupAffine], +// scalars: &[BigInteger256], +// ) -> GroupProjective { + +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; + +// struct BatchPerThread<'a> { +// buckets: Vec>>, +// /// (index in `buckets`, is_negative, group) +// in_batch: Vec<(usize, usize, bool, &'a GroupAffine)>, +// in_batch_busy_buckets: Vec>, +// // inverse_state: Fp, +// // inverses: Vec, + +// inverses: Option, + +// /// (index in `buckets`, is_negative, group) +// pendings: Vec<(usize, usize, bool, &'a GroupAffine)>, +// } + +// struct BatchInverses { +// inverse_state: Fp, +// inverses: Vec, +// } + +// const N_BATCH: usize = 4096; +// const N_COLLISION: usize = 512; + +// const N_WINDOWS: usize = 20; + +// impl<'a> BatchPerThread<'a> { +// pub fn with_capacity(capacity: usize) -> Self { +// let zero = GroupAffine::zero(); +// Self { +// buckets: vec![vec![zero; capacity]; N_WINDOWS], +// in_batch: Vec::with_capacity(N_BATCH), +// in_batch_busy_buckets: vec![vec![false; capacity]; N_WINDOWS], +// inverses: Some(BatchInverses { +// inverse_state: Fp::one(), +// inverses: vec![Fp::one(); N_BATCH], +// }), +// pendings: Vec::with_capacity(N_BATCH), +// } +// } + +// fn add_in_bucket( +// &mut self, +// window: usize, +// bucket: usize, +// is_negative: bool, +// g: &'a GroupAffine +// ) { +// if self.in_batch_busy_buckets[window][bucket] { +// self.pendings.push((window, bucket, is_negative, g)); +// } else { +// self.in_batch_busy_buckets[window][bucket] = true; +// self.in_batch.push((window, bucket, is_negative, g)); +// } +// } + +// fn batch1( +// // &mut self, +// res: &mut GroupAffine, +// src: &GroupAffine, +// index: usize, +// inverses: &mut BatchInverses, +// ) { +// if res.is_zero() | src.is_zero() { +// return; +// } +// let mut delta_x = src.x - res.x; +// if delta_x.is_zero() { +// let delta_y = src.y - res.y; +// if !delta_y.is_zero() { +// return; +// } +// delta_x = src.y + src.y; +// } +// if inverses.inverse_state.is_zero() { +// inverses.inverses[index].set_one(); +// inverses.inverse_state = delta_x; +// } else { +// inverses.inverses[index] = inverses.inverse_state; +// inverses.inverse_state *= delta_x +// } +// } + +// fn batch2( +// res: &mut GroupAffine, +// src: &GroupAffine, +// index: usize, +// inverses: &mut BatchInverses, +// ) { +// if res.is_zero() | src.is_zero() { +// if !src.is_zero() { +// *res = *src; +// } +// return; +// } +// let mut inverse = inverses.inverses[index]; +// inverse *= inverses.inverse_state; +// let mut delta_x = src.x - res.x; +// let mut delta_y = src.y - res.y; +// if delta_x.is_zero() { +// if !delta_y.is_zero() { +// res.set_zero(); +// return; +// } +// delta_y = src.x.square(); +// delta_y = delta_y + delta_y + delta_y; +// delta_x = src.y.double(); +// } +// inverses.inverse_state *= delta_x; +// let s = delta_y * inverse; +// let ss = s * s; +// res.x = ss - src.x - res.x; +// delta_x = src.x - res.x; +// res.y = s * delta_x; +// res.y -= src.y; +// } + +// fn accumulate(&mut self) { +// use std::ops::Neg; + +// let mut inverses = self.inverses.take().unwrap(); +// inverses.inverse_state = Fp::one(); + +// for (pending_index, (window_index, bucket_index, is_neg, group)) in self.in_batch.iter().copied().enumerate() { +// let bucket = &mut self.buckets[window_index][bucket_index]; +// let mut group = *group; +// if is_neg { +// group = group.neg(); +// } +// Self::batch1(bucket, &group, pending_index, &mut inverses); +// } + +// inverses.inverse_state = inverses.inverse_state.inverse().unwrap(); + +// for (pending_index, (window_index, bucket_index, is_neg, group)) in self.in_batch.iter().copied().enumerate().rev() { +// let bucket = &mut self.buckets[window_index][bucket_index]; +// let mut group = *group; +// if is_neg { +// group = group.neg(); +// } +// Self::batch2(bucket, &group, pending_index, &mut inverses); +// } + +// self.in_batch.clear(); +// self.in_batch_busy_buckets.iter_mut().for_each(|vec| { +// vec.iter_mut().for_each(|b| { *b = false }); +// }); + +// self.pendings.retain(|(window, bucket, is_neg, g)| { +// if self.in_batch_busy_buckets[*window][*bucket] { +// return true; +// } +// self.in_batch_busy_buckets[*window][*bucket] = true; +// self.in_batch.push((*window, *bucket, *is_neg, g)); +// false +// }); + +// self.inverses = Some(inverses); +// } + +// fn add<'b, S>( +// &mut self, +// res: &mut [GroupAffine], +// src: S, +// ) +// where +// S: Iterator> + Clone + DoubleEndedIterator + ExactSizeIterator, +// { +// let mut inverses = self.inverses.take().unwrap(); +// inverses.inverse_state = Fp::one(); + +// let src2 = src.clone().into_iter(); +// for (index, (res, point)) in res.iter_mut().zip(src2).enumerate() { +// Self::batch1(res, point, index, &mut inverses); +// } + +// inverses.inverse_state = inverses.inverse_state.inverse().unwrap(); + +// for (index, (res, point)) in res.iter_mut().zip(src).enumerate().rev() { +// Self::batch2(res, point, index, &mut inverses); +// } + +// self.inverses = Some(inverses); +// } +// } + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = if size < 32 { +// 3 +// } else { +// ln_without_floats(size) + 2 +// }; + +// let total = 1 << c; +// let half = total >> 1; + +// #[derive(Copy, Clone)] +// struct Digits { +// n: u32, +// } + +// let num_bits = < as AffineCurve>::ScalarField as PrimeField>::Params::MODULUS_BITS as usize; +// // let num_bits = ::Params::MODULUS_BITS as usize; +// // let fr_one = G::ScalarField::one().into_repr(); + +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// let now = std::time::Instant::now(); +// let digits = scalars.par_iter().map(|scalar| { +// let mut scalar = *scalar; +// let mut carry = 0; +// window_starts.iter().map(|_win_start| { +// let mut digits = scalar.to_64x4()[0] % (1 << c); +// digits += carry; +// if digits > half { +// digits = total - digits; +// carry = 1; +// } else { +// carry = 0; +// } +// let res = Digits { +// n: digits as u32 | ((carry as u32) << 31), +// }; +// scalar.divn(c as u32); +// res +// }).collect::>() +// }).collect::>(); +// eprintln!("digits pre-compute time: {:?}", now.elapsed()); + +// let zero = GroupProjective::zero(); + +// let num_threads = rayon::current_num_threads(); +// let n_per_thread = (size / num_threads) + 1; + +// let now = std::time::Instant::now(); + +// let mut buckets_per_thread = (0..rayon::current_num_threads()).into_par_iter().map(|thread_index| { +// let now = std::time::Instant::now(); +// let mut batch = BatchPerThread::with_capacity(1 << (c - 1)); +// // let mut buckets_per_window = (0..window_starts.len()).map(|_| { +// // ListOfBuckets::with_capacity(1 << (c - 1)) +// // }).collect::>(); +// // let mut is_initialized = vec![vec![false; 1 << (c - 1)]; window_starts.len()]; + +// // let now = std::time::Instant::now(); +// eprintln!("[{:?}] time to alloc buckets: {:?}", thread_index, now.elapsed()); +// let now = std::time::Instant::now(); + +// let thread_start = thread_index * n_per_thread; +// let thread_end = (thread_index + 1) * n_per_thread; + +// // let scalars = &scalars[thread_start..]; +// let bases = &bases[thread_start..]; +// let scalars = &digits[thread_start..]; + +// for (scalar, base) in scalars.iter().zip(bases).take(n_per_thread) { +// for (index, win_start) in window_starts.iter().copied().enumerate() { +// let Digits { n: digits } = scalar[index]; + +// let is_neg = (digits >> 31) != 0; +// let digits = ((digits as u32) & ((-1i32 as u32) >> 1)) as usize; + +// let Some(digits) = digits.checked_sub(1) else { +// continue; +// }; + +// batch.add_in_bucket(index, digits, is_neg, base); + +// if batch.in_batch.len() >= N_BATCH || batch.pendings.len() >= N_COLLISION { +// batch.accumulate(); +// } +// } +// } + +// while !batch.in_batch.is_empty() || !batch.pendings.is_empty() { +// batch.accumulate(); +// } + +// eprintln!("[{:?}] time to add_assign_mixed: {:?}", thread_index, now.elapsed()); + +// batch +// }).collect::>(); +// eprintln!("time to add_assign_mixed: {:?}", now.elapsed()); + +// let mut buckets_per_window = buckets_per_thread.pop().unwrap(); + +// dbg!(buckets_per_thread.len()); +// // dbg!(buckets_per_window.len()); + +// let now = std::time::Instant::now(); + +// let pendings = buckets_per_window.buckets.into_iter().map(|per_window| { +// Mutex::new(Some(Batch::with_buckets(per_window))) +// }).collect::>(); + +// use crossbeam_channel::bounded; + +// let (s, r) = bounded(1000); + +// for (_thread_index, buckets_per_thread) in buckets_per_thread.into_iter().enumerate() { +// for (window_index, buckets_per_win) in buckets_per_thread.buckets.into_iter().enumerate() { +// s.send((window_index, Batch::with_buckets(buckets_per_win))).unwrap(); +// } +// } + +// let now = std::time::Instant::now(); +// let big_n = AtomicUsize::new(0); +// let _ = (0..rayon::current_num_threads()).into_par_iter().for_each(|_thread_index| { + +// let mut n = 0; +// loop { +// let Ok((index, mut next)) = r.try_recv() else { +// // eprintln!("STOP {:?} {:?}", n, now.elapsed()); +// break; +// }; +// let next2 = { +// let mut locked = pendings[index].lock().unwrap(); +// match locked.take() { +// Some(pending) => pending, +// None => { +// *locked = Some(next); +// continue; +// } +// } +// }; +// // let big_n = big_n.fetch_add(1, std::sync::atomic::Ordering::AcqRel); +// // let now = std::time::Instant::now(); +// next.add_batch(next2); +// // next.add_list_of_buckets(&next2); +// // eprintln!("ADDING {:?} {:?}", big_n, now.elapsed()); +// n += 1; + +// // next.iter_mut().zip(next2).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// s.send((index, next)).unwrap(); +// } +// }); +// eprintln!("time ICI: {:?}", now.elapsed()); + +// assert!(s.is_empty()); + +// let buckets_per_window = pendings.into_iter().map(|v| v.into_inner().unwrap().unwrap()).collect::>(); + +// let now = std::time::Instant::now(); +// let buckets = buckets_per_window.par_iter().map(|buckets| { +// let mut res = zero; +// let mut running_sum = GroupProjective::zero(); +// buckets.buckets.iter().rev().for_each(|b| { +// running_sum.add_assign_mixed(b); +// res += &running_sum; +// }); +// res +// }).collect::>(); +// eprintln!("time to sum of sums: {:?}", now.elapsed()); + +// // let mut res = zero; +// // let mut running_sum = G::Projective::zero(); +// // buckets.into_iter().rev().for_each(|b| { +// // running_sum += &b; +// // res += &running_sum; +// // }); +// // res + +// // We store the sum for the lowest window. +// let lowest = *buckets.first().unwrap(); + +// let now = std::time::Instant::now(); +// // We're traversing windows from high to low. +// let res = lowest +// + &buckets[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }); +// eprintln!("time to fold: {:?}", now.elapsed()); + +// res +// } + +// pub fn my_multi_scalar_orig_with_signed_digits( +// bases: &[G], +// scalars: &[::BigInt], +// ) -> G::Projective { +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; +// // panic!(); + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = if size < 32 { +// 3 +// } else { +// ln_without_floats(size) + 2 +// }; +// dbg!(c); + +// let num_bits = ::Params::MODULUS_BITS as usize; +// let fr_one = G::ScalarField::one().into_repr(); + +// let zero = G::Projective::zero(); +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// dbg!(&window_starts, window_starts.len(), num_bits); + +// let total = 1 << c; +// let half = total >> 1; + +// #[derive(Copy, Clone)] +// struct Digits { +// n: u32, +// } + +// let now = std::time::Instant::now(); +// let digits = scalars.par_iter().map(|scalar| { +// let mut scalar = *scalar; +// let mut carry = 0; +// window_starts.iter().map(|_win_start| { +// let mut digits = scalar.to_64x4()[0] % (1 << c); +// digits += carry; +// if digits > half { +// digits = total - digits; +// carry = 1; +// } else { +// carry = 0; +// } +// let res = Digits { +// n: digits as u32 | ((carry as u32) << 31), +// }; +// scalar.divn(c as u32); +// res +// }).collect::>() +// }).collect::>(); +// eprintln!("digits pre-compute time: {:?}", now.elapsed()); + +// // Each window is of size `c`. +// // We divide up the bits 0..num_bits into windows of size `c`, and +// // in parallel process each such window. +// let window_sums: Vec<_> = window_starts +// .par_iter() +// .copied() +// .enumerate() +// .map(|(w_index, w_start)| { + +// let mut res = zero; +// // We don't need the "zero" bucket, so we only have 2^c - 1 buckets. + +// // let now = std::time::Instant::now(); +// let mut buckets = vec![zero; (1 << (c - 1)) - 0]; +// // eprintln!("allocation time: {:?} n={:?}", now.elapsed(), buckets.len()); + +// digits.iter().zip(bases).for_each(|(scalar, base)| { +// let Digits { n: digits } = scalar[w_index]; + +// let is_neg = (digits >> 31) != 0; +// let digits = (digits as u32) & ((-1i32 as u32) >> 1); + +// let Some(digits) = digits.checked_sub(1) else { +// return; +// }; + +// if is_neg { +// buckets[digits as usize].add_assign_mixed(&base.neg()); +// } else { +// buckets[digits as usize].add_assign_mixed(base); +// } +// }); + +// // Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j]) +// // This is computed below for b buckets, using 2b curve additions. +// // +// // We could first normalize `buckets` and then use mixed-addition +// // here, but that's slower for the kinds of groups we care about +// // (Short Weierstrass curves and Twisted Edwards curves). +// // In the case of Short Weierstrass curves, +// // mixed addition saves ~4 field multiplications per addition. +// // However normalization (with the inversion batched) takes ~6 +// // field multiplications per element, +// // hence batch normalization is a slowdown. + +// // `running_sum` = sum_{j in i..num_buckets} bucket[j], +// // where we iterate backward from i = num_buckets to 0. +// let mut running_sum = G::Projective::zero(); +// buckets.into_iter().rev().for_each(|b| { +// running_sum += &b; +// res += &running_sum; +// }); +// res +// }) +// .collect(); + +// // We store the sum for the lowest window. +// let lowest = *window_sums.first().unwrap(); + +// // We're traversing windows from high to low. +// lowest +// + &window_sums[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }) +// } + +// struct ListOfBuckets { +// buckets: Vec, +// is_initialized: Vec, +// } + +// impl ListOfBuckets { +// fn with_capacity(capacity: usize) -> Self { +// Self { +// buckets: { +// let mut vec = Vec::::with_capacity(capacity); +// unsafe { vec.set_len(capacity); } +// vec +// }, +// is_initialized: vec![false; capacity], +// } +// } + +// fn add_assign_mixed(&mut self, index: usize, g: &G) { +// if !self.is_initialized[index] { +// self.buckets[index] = (*g).into(); +// self.is_initialized[index] = true; +// } else { +// self.buckets[index].add_assign_mixed(g); +// } +// } + +// fn iter_mut(&mut self) -> impl Iterator { +// self.buckets.iter_mut().zip(self.is_initialized.iter_mut()) +// } + +// fn iter(&self) -> impl Iterator { +// self.buckets.iter().zip(self.is_initialized.iter().copied()) +// } + +// fn iter_rev(&self) -> impl Iterator { +// self.buckets.iter().rev().zip(self.is_initialized.iter().rev().copied()) +// } + +// fn add_list_of_buckets(&mut self, other: &Self) { +// self.iter_mut().zip(other.iter()).for_each(|((group, is_init), (other_group, other_is_init))| { +// match (*is_init, other_is_init) { +// (true, true) => { +// *group += other_group; +// }, +// (true, false) => {}, +// (false, true) => { +// *group = *other_group; +// *is_init = true; +// }, +// (false, false) => {}, +// } +// }); +// } + +// fn counts(&self) -> (usize, usize) { +// let total = self.is_initialized.len(); +// let n_init = self.is_initialized.iter().filter(|b| **b).count(); +// (n_init, total) +// } +// } + +// pub fn my_multi_scalar_mul2( +// bases: &[G], +// scalars: &[::BigInt], +// ) -> G::Projective { +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; +// // panic!(); + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = 13; +// // let c = if size < 32 { +// // 3 +// // } else { +// // ln_without_floats(size) + 2 +// // }; + +// let num_bits = ::Params::MODULUS_BITS as usize; +// let fr_one = G::ScalarField::one().into_repr(); + +// let zero = G::Projective::zero(); +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// // dbg!(c, num_bits); +// // dbg!(&window_starts, window_starts.len(), num_bits); + +// // let mut buckets_per_window = vec![vec![zero; (1 << c) - 1]; window_starts.len()]; +// // let mut buckets = vec![zero; (1 << c) - 1]; + +// // dbg!(rayon::current_num_threads()); + +// let num_threads = rayon::current_num_threads(); +// let n_per_thread = (size / num_threads) + 1; + +// let now = std::time::Instant::now(); + +// dbg!((1 << c) - 1); + +// let mut buckets_per_thread = (0..rayon::current_num_threads()).into_par_iter().map(|thread_index| { +// // let mut buckets_per_window = vec![vec![zero; 1 << (c - 1)]; window_starts.len()]; +// let mut buckets_per_window = (0..window_starts.len()).map(|_| { +// // let mut vec = Vec::::with_capacity(1 << (c - 1)); +// // unsafe { vec.set_len(1 << (c - 1)); } +// // vec +// ListOfBuckets::with_capacity(1 << (c - 1)) +// }).collect::>(); +// // let mut is_initialized = vec![vec![false; 1 << (c - 1)]; window_starts.len()]; + +// let now = std::time::Instant::now(); +// // eprintln!("[{:?}] time to alloc buckets: {:?}", thread_index, now.elapsed()); +// // let now = std::time::Instant::now(); + +// let thread_start = thread_index * n_per_thread; +// let thread_end = (thread_index + 1) * n_per_thread; + +// let scalars = &scalars[thread_start..]; +// let bases = &bases[thread_start..]; + +// for (scalar, base) in scalars.iter().zip(bases).take(n_per_thread) { +// if scalar == &fr_one { +// panic!(); +// } + +// let mut carry = 0; + +// let total = 1 << c; +// let half = total >> 1; + +// for (index, win_start) in window_starts.iter().copied().enumerate() { +// let mut scalar = *scalar; +// scalar.divn(win_start as u32); + +// let mut digits = scalar.to_64x4()[0] % (1 << c); +// digits += carry; + +// let buckets = &mut buckets_per_window[index]; +// // let is_initialized = &mut is_initialized[index]; + +// if digits > half { +// digits = total - digits; +// carry = 1; + +// if digits > 0 { +// let index = (digits - 1) as usize; +// buckets.add_assign_mixed(index, &base.neg()); +// // if !is_initialized[index] { +// // buckets[index] = base.neg().into(); +// // is_initialized[index] = true; +// // } else { +// // buckets[index].add_assign_mixed(&base.neg()); +// // } +// } +// } else { +// carry = 0; +// if digits > 0 { +// let index = (digits - 1) as usize; +// buckets.add_assign_mixed(index, base); +// // if !is_initialized[index] { +// // buckets[index] = (*base).into(); +// // is_initialized[index] = true; +// // } else { +// // buckets[index].add_assign_mixed(base); +// // } +// } +// } +// } +// } + +// eprintln!("[{:?}] time to add_assign_mixed: {:?}", thread_index, now.elapsed()); +// // let now = std::time::Instant::now(); +// // let mut n_not_init = 0; +// // let mut n_total = 0; + +// // for (buckets, is_init) in buckets_per_window.iter_mut().zip(&is_initialized) { +// // for (group, is_init) in buckets.iter_mut().zip(is_init) { +// // if !*is_init { +// // n_not_init += 1; +// // *group = zero; +// // } +// // n_total += 1; +// // } +// // } +// // eprintln!("[{:?}] time to set {:?}/{:?} to zero: {:?}", thread_index, n_not_init, n_total, now.elapsed()); + +// // for (index, g) in buckets_per_window.iter().enumerate() { +// // for (index, g) in g.iter().enumerate() { +// // if g.is_zero() { +// // eprintln!("ZERO at {:?}", index); +// // } +// // } +// // } + +// buckets_per_window +// }).collect::>(); +// eprintln!("time to add_assign_mixed: {:?}", now.elapsed()); + +// // panic!(); + +// // let now = std::time::Instant::now(); +// // let mut buckets_per_window = vec![vec![zero; 1 << (c - 1)]; window_starts.len()]; +// let mut buckets_per_window = buckets_per_thread.pop().unwrap(); +// // let mut buckets_per_window = vec![vec![None::; 1 << (c - 1)]; window_starts.len()]; + +// dbg!(buckets_per_thread.len()); +// dbg!(buckets_per_window.len()); + +// let now = std::time::Instant::now(); +// // buckets_per_window.par_iter_mut().for_each(|buckets_per_window| { +// // for buckets_per_thread in &buckets_per_thread { +// // // dbg!(buckets_per_thread.len()); // 20 +// // for (i, buckets_per_win) in buckets_per_thread.iter().enumerate() { +// // // let buckets_per_window = &mut buckets_per_window[i]; +// // // dbg!(buckets_per_window.len()); // 8191 +// // buckets_per_window.iter_mut().zip(buckets_per_win).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// // } +// // } +// // }); + +// let pendings = buckets_per_window.into_iter().map(|per_window| { +// Mutex::new(Some(per_window)) +// }).collect::>(); + +// use crossbeam_channel::bounded; + +// let (s, r) = bounded(1000); + +// for (_thread_index, buckets_per_thread) in buckets_per_thread.into_iter().enumerate() { +// for (window_index, buckets_per_win) in buckets_per_thread.into_iter().enumerate() { +// s.send((window_index, buckets_per_win)).unwrap(); +// } +// } + +// let now = std::time::Instant::now(); +// let _ = (0..rayon::current_num_threads()).into_par_iter().for_each(|_thread_index| { + +// let mut n = 0; +// loop { +// let Ok((index, mut next)) = r.try_recv() else { +// // eprintln!("STOP {:?} {:?}", n, now.elapsed()); +// break; +// }; +// let next2 = { +// let mut locked = pendings[index].lock().unwrap(); +// match locked.take() { +// Some(pending) => pending, +// None => { +// *locked = Some(next); +// continue; +// } +// } +// }; +// next.add_list_of_buckets(&next2); +// // eprintln!("ADDING {:?} {:?}", n, now.elapsed()); +// n += 1; + +// // next.iter_mut().zip(next2).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// s.send((index, next)).unwrap(); +// } +// }); +// eprintln!("time ICI: {:?}", now.elapsed()); + +// assert!(s.is_empty()); + +// // let a = n_ran.load(std::sync::atomic::Ordering::Relaxed); +// // assert_eq!(a, 620); + +// // todo!(); + +// // let _ = (0..rayon::current_num_threads()).into_par_iter().map(|thread_index| { +// // }).collect::>(); + +// // dbg!(buckets_per_thread.len()); + +// // for (thread_index, buckets_per_thread) in buckets_per_thread.iter().enumerate() { +// // dbg!(buckets_per_thread.len()); // 20 +// // for (i, buckets_per_win) in buckets_per_thread.iter().enumerate() { +// // let buckets_per_window = &mut buckets_per_window[i]; +// // // dbg!(buckets_per_window.len()); // 8191 or 4096 +// // buckets_per_window.iter_mut().zip(buckets_per_win).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// // } +// // } + +// // for buckets_per_thread in buckets_per_thread { +// // // dbg!(buckets_per_thread.len()); // 20 +// // for (i, buckets_per_win) in buckets_per_thread.iter().enumerate() { +// // let buckets_per_window = &mut buckets_per_window[i]; +// // // dbg!(buckets_per_window.len()); // 8191 or 4096 +// // buckets_per_window.iter_mut().zip(buckets_per_win).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// // } +// // } +// // eprintln!("time to accumulate: {:?}", now.elapsed()); + +// // let now = std::time::Instant::now(); +// // let mut buckets_per_window = vec![vec![zero; (1 << c) - 1]; window_starts.len()]; +// // for buckets_per_thread in buckets_per_thread { +// // dbg!(buckets_per_thread.len()); // 20 +// // for (i, buckets_per_win) in buckets_per_thread.iter().enumerate() { +// // let buckets_per_window = &mut buckets_per_window[i]; +// // // dbg!(buckets_per_window.len()); // 8191 +// // buckets_per_window.iter_mut().zip(buckets_per_win).for_each(|(accum, for_thread)| { +// // *accum += for_thread; +// // }); +// // } +// // } +// // eprintln!("time to accumulate: {:?}", now.elapsed()); + +// // let buckets_per_window = buckets_per_thread.iter().map(|buckets_per_window| { + +// // }).collect::>(); + +// // for (scalar, base) in scalars_and_bases_iter.clone() { +// // if scalar == &fr_one { +// // panic!(); +// // } +// // for (index, win_start) in window_starts.iter().copied().enumerate() { +// // let mut scalar = *scalar; +// // scalar.divn(win_start as u32); +// // let scalar = scalar.to_64x4()[0] % (1 << c); +// // if scalar != 0 { +// // let buckets = &mut buckets_per_window[index]; +// // buckets[(scalar - 1) as usize].add_assign_mixed(base); +// // } +// // } +// // } +// // eprintln!("time to add_assign_mixed: {:?}", now.elapsed()); + +// // dbg!(buckets_per_window.len()); + +// let buckets_per_window = pendings.into_iter().map(|v| v.into_inner().unwrap().unwrap()).collect::>(); + +// let now = std::time::Instant::now(); +// let buckets = buckets_per_window.par_iter().map(|buckets| { +// let mut res = zero; +// let mut running_sum = G::Projective::zero(); +// buckets.iter_rev().for_each(|(b, is_init)| { +// if is_init { +// running_sum += b; +// } +// res += &running_sum; +// }); +// res +// }).collect::>(); +// eprintln!("time to sum of sums: {:?}", now.elapsed()); + +// // let mut res = zero; +// // let mut running_sum = G::Projective::zero(); +// // buckets.into_iter().rev().for_each(|b| { +// // running_sum += &b; +// // res += &running_sum; +// // }); +// // res + +// // We store the sum for the lowest window. +// let lowest = *buckets.first().unwrap(); + +// let now = std::time::Instant::now(); +// // We're traversing windows from high to low. +// let res = lowest +// + &buckets[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }); +// eprintln!("time to fold: {:?}", now.elapsed()); + +// res + +// // todo!() + +// // // Each window is of size `c`. +// // // We divide up the bits 0..num_bits into windows of size `c`, and +// // // in parallel process each such window. +// // let window_sums: Vec<_> = window_starts +// // .into_par_iter() +// // .map(|w_start| { + +// // let mut res = zero; +// // // We don't need the "zero" bucket, so we only have 2^c - 1 buckets. +// // let mut buckets = vec![zero; (1 << c) - 1]; +// // // This clone is cheap, because the iterator contains just a +// // // pointer and an index into the original vectors. +// // scalars_and_bases_iter.clone().for_each(|(&scalar, base)| { +// // if scalar == fr_one { +// // // We only process unit scalars once in the first window. +// // if w_start == 0 { +// // res.add_assign_mixed(base); +// // } +// // } else { +// // let mut scalar = scalar; + +// // // We right-shift by w_start, thus getting rid of the +// // // lower bits. +// // scalar.divn(w_start as u32); + +// // // We mod the remaining bits by 2^{window size}, thus taking `c` bits. +// // let scalar = scalar.to_64x4()[0] % (1 << c); + +// // // If the scalar is non-zero, we update the corresponding +// // // bucket. +// // // (Recall that `buckets` doesn't have a zero bucket.) +// // if scalar != 0 { +// // buckets[(scalar - 1) as usize].add_assign_mixed(base); +// // } +// // } +// // }); + +// // // Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j]) +// // // This is computed below for b buckets, using 2b curve additions. +// // // +// // // We could first normalize `buckets` and then use mixed-addition +// // // here, but that's slower for the kinds of groups we care about +// // // (Short Weierstrass curves and Twisted Edwards curves). +// // // In the case of Short Weierstrass curves, +// // // mixed addition saves ~4 field multiplications per addition. +// // // However normalization (with the inversion batched) takes ~6 +// // // field multiplications per element, +// // // hence batch normalization is a slowdown. + +// // // `running_sum` = sum_{j in i..num_buckets} bucket[j], +// // // where we iterate backward from i = num_buckets to 0. +// // let mut running_sum = G::Projective::zero(); +// // buckets.into_iter().rev().for_each(|b| { +// // running_sum += &b; +// // res += &running_sum; +// // }); +// // res +// // }) +// // .collect(); + +// // // We store the sum for the lowest window. +// // let lowest = *window_sums.first().unwrap(); + +// // // We're traversing windows from high to low. +// // lowest +// // + &window_sums[1..] +// // .iter() +// // .rev() +// // .fold(zero, |mut total, sum_i| { +// // total += sum_i; +// // for _ in 0..c { +// // total.double_in_place(); +// // } +// // total +// // }) +// } + +// pub fn my_multi_scalar_mul( +// bases: &[G], +// scalars: &[::BigInt], +// ) -> G::Projective { +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; +// // panic!(); + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = if size < 32 { +// 3 +// } else { +// ln_without_floats(size) + 2 +// }; +// dbg!(c); + +// let num_bits = ::Params::MODULUS_BITS as usize; +// let fr_one = G::ScalarField::one().into_repr(); + +// let zero = G::Projective::zero(); +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// // dbg!(&window_starts, window_starts.len(), num_bits); + +// // Each window is of size `c`. +// // We divide up the bits 0..num_bits into windows of size `c`, and +// // in parallel process each such window. +// let window_sums: Vec<_> = window_starts +// .into_par_iter() +// .map(|w_start| { + +// let mut res = zero; +// // We don't need the "zero" bucket, so we only have 2^c - 1 buckets. + +// let mut buckets = ListOfBuckets::with_capacity((1 << c) - 1); +// // let mut buckets = vec![zero; (1 << c) - 1]; +// // This clone is cheap, because the iterator contains just a +// // pointer and an index into the original vectors. +// scalars_and_bases_iter.clone().for_each(|(&scalar, base)| { +// if scalar == fr_one { +// // We only process unit scalars once in the first window. +// if w_start == 0 { +// res.add_assign_mixed(base); +// } +// } else { +// let mut scalar = scalar; + +// // We right-shift by w_start, thus getting rid of the +// // lower bits. +// scalar.divn(w_start as u32); + +// // We mod the remaining bits by 2^{window size}, thus taking `c` bits. +// let scalar = scalar.to_64x4()[0] % (1 << c); + +// // If the scalar is non-zero, we update the corresponding +// // bucket. +// // (Recall that `buckets` doesn't have a zero bucket.) +// if scalar != 0 { +// buckets.add_assign_mixed((scalar - 1) as usize, base); +// // buckets[(scalar - 1) as usize].add_assign_mixed(base); +// } +// } +// }); + +// // Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j]) +// // This is computed below for b buckets, using 2b curve additions. +// // +// // We could first normalize `buckets` and then use mixed-addition +// // here, but that's slower for the kinds of groups we care about +// // (Short Weierstrass curves and Twisted Edwards curves). +// // In the case of Short Weierstrass curves, +// // mixed addition saves ~4 field multiplications per addition. +// // However normalization (with the inversion batched) takes ~6 +// // field multiplications per element, +// // hence batch normalization is a slowdown. + +// // `running_sum` = sum_{j in i..num_buckets} bucket[j], +// // where we iterate backward from i = num_buckets to 0. +// let mut running_sum = G::Projective::zero(); +// buckets.iter_rev().for_each(|(b, is_init)| { +// if is_init { +// running_sum += b; +// } +// res += &running_sum; +// }); +// res +// }) +// .collect(); + +// // We store the sum for the lowest window. +// let lowest = *window_sums.first().unwrap(); + +// // We're traversing windows from high to low. +// lowest +// + &window_sums[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }) +// } + +// pub fn my_multi_scalar_mul_orig( +// bases: &[G], +// scalars: &[::BigInt], +// ) -> G::Projective { +// use ark_ff::BigInteger; +// use ark_ff::{One, Zero, FpParameters}; +// // panic!(); + +// let size = std::cmp::min(bases.len(), scalars.len()); +// let scalars = &scalars[..size]; +// let bases = &bases[..size]; +// let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero()); + +// let c = if size < 32 { +// 3 +// } else { +// ln_without_floats(size) + 2 +// }; +// dbg!(c); + +// let num_bits = ::Params::MODULUS_BITS as usize; +// let fr_one = G::ScalarField::one().into_repr(); + +// let zero = G::Projective::zero(); +// let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + +// dbg!(&window_starts, window_starts.len(), num_bits); + +// // Each window is of size `c`. +// // We divide up the bits 0..num_bits into windows of size `c`, and +// // in parallel process each such window. +// let window_sums: Vec<_> = window_starts +// .into_par_iter() +// .map(|w_start| { + +// let mut res = zero; +// // We don't need the "zero" bucket, so we only have 2^c - 1 buckets. +// let mut buckets = vec![zero; (1 << c) - 1]; +// // This clone is cheap, because the iterator contains just a +// // pointer and an index into the original vectors. +// scalars_and_bases_iter.clone().for_each(|(&scalar, base)| { +// if scalar == fr_one { +// // We only process unit scalars once in the first window. +// if w_start == 0 { +// res.add_assign_mixed(base); +// } +// } else { +// let mut scalar = scalar; + +// // We right-shift by w_start, thus getting rid of the +// // lower bits. +// scalar.divn(w_start as u32); + +// // We mod the remaining bits by 2^{window size}, thus taking `c` bits. +// let scalar = scalar.to_64x4()[0] % (1 << c); + +// // If the scalar is non-zero, we update the corresponding +// // bucket. +// // (Recall that `buckets` doesn't have a zero bucket.) +// if scalar != 0 { +// buckets[(scalar - 1) as usize].add_assign_mixed(base); +// } +// } +// }); + +// // Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j]) +// // This is computed below for b buckets, using 2b curve additions. +// // +// // We could first normalize `buckets` and then use mixed-addition +// // here, but that's slower for the kinds of groups we care about +// // (Short Weierstrass curves and Twisted Edwards curves). +// // In the case of Short Weierstrass curves, +// // mixed addition saves ~4 field multiplications per addition. +// // However normalization (with the inversion batched) takes ~6 +// // field multiplications per element, +// // hence batch normalization is a slowdown. + +// // `running_sum` = sum_{j in i..num_buckets} bucket[j], +// // where we iterate backward from i = num_buckets to 0. +// let mut running_sum = G::Projective::zero(); +// buckets.into_iter().rev().for_each(|b| { +// running_sum += &b; +// res += &running_sum; +// }); +// res +// }) +// .collect(); + +// // We store the sum for the lowest window. +// let lowest = *window_sums.first().unwrap(); + +// // We're traversing windows from high to low. +// lowest +// + &window_sums[1..] +// .iter() +// .rev() +// .fold(zero, |mut total, sum_i| { +// total += sum_i; +// for _ in 0..c { +// total.double_in_place(); +// } +// total +// }) +// } + +// fn ln_without_floats(a: usize) -> usize { +// // log2(a) * ln(2) + +// (log2(a) * 69 / 100) as usize +// } + +// fn log2(x: usize) -> u32 { +// if x == 0 { +// 0 +// } else if x.is_power_of_two() { +// 1usize.leading_zeros() - x.leading_zeros() +// } else { +// 0usize.leading_zeros() - x.leading_zeros() +// } +// } + +// } diff --git a/poly-commitment/src/msm.rs b/poly-commitment/src/msm.rs new file mode 100644 index 0000000000..8901fe4552 --- /dev/null +++ b/poly-commitment/src/msm.rs @@ -0,0 +1,373 @@ +use std::sync::atomic::AtomicUsize; + +use ark_ec::{ + short_weierstrass_jacobian::{GroupAffine, GroupProjective}, + AffineCurve, ProjectiveCurve, SWModelParameters as Parameter, +}; +use ark_ff::{BigInteger, FpParameters}; +use ark_ff::{BigInteger256, Field, One, PrimeField, Zero}; +use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; + +use crate::commitment::CommitmentCurve; + +pub static MSM_DURATION: AtomicUsize = AtomicUsize::new(0); +pub static MSM_INDEX: AtomicUsize = AtomicUsize::new(0); + +pub fn call_msm( + points: &[G], + scalars: &[<::ScalarField as PrimeField>::BigInt], +) -> G::Projective { + // let now = std::time::Instant::now(); + + let res = if scalars.iter().any(|s| s.is_zero()) { + // Unfortunatly, in many cases `call_msm` is called with many zeros in `scalars` + // When that occur, we can't use the batched additions, because digits are not + // evenly distributed in each bucket. That would be slower than + // non-batched msm + ark_ec::msm::VariableBaseMSM::multi_scalar_mul(points, scalars) + } else { + // In the few cases when there is no zero in `scalars`, our MSM is about 30% faster + // than `ark_ec::msm::VariableBaseMSM::multi_scalar_mul` + call_msm_impl(points, scalars) + }; + + // let elapsed = now.elapsed(); + // MSM_DURATION.fetch_add(elapsed.as_millis().try_into().unwrap(), std::sync::atomic::Ordering::Relaxed); + + res +} + +// /// Use to compare window sizes +// pub fn call_msm2( +// points: &[G], +// scalars: &[<::ScalarField as PrimeField>::BigInt], +// ) -> G::Projective { +// let mut map = HashMap::new(); + +// let size = std::cmp::min(points.len(), scalars.len()); + +// // let c = if size <= 8194 { 8 } else { 13 }; + +// for c in 5..15 { +// // dbg!(c); +// let now = std::time::Instant::now(); +// let _res = call_msm_impl(&points[..size], &scalars[..size], c); +// let elapsed = now.elapsed(); +// map.insert(c, elapsed); +// } + +// let now = std::time::Instant::now(); +// let res = ark_ec::msm::VariableBaseMSM::multi_scalar_mul(&points[..size], &scalars[..size]); +// let ark_elapsed = now.elapsed(); + +// let mut best_vec = map.iter().collect::>(); +// best_vec.sort_by_key(|(_c, dur)| *dur); + +// // dbg!(&best_vec); +// let best = best_vec.first().unwrap(); +// // assert!(best.1 < best_vec.last().unwrap().1); + +// use ark_ff::BigInteger; +// let n_zeros = scalars.iter().filter(|s| s.is_zero()).count(); + +// // let best = if + +// // MSM_DURATION.fetch_add(best.1.as_millis().try_into().unwrap(), std::sync::atomic::Ordering::Relaxed); +// // MSM_DURATION.fetch_add(elapsed.as_millis().try_into().unwrap(), std::sync::atomic::Ordering::Relaxed); +// let index = MSM_INDEX.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + +// // if ark_elapsed < best.1 { + +// let mut s = ""; +// if best.1 < &ark_elapsed { +// s = "XXX"; +// } + +// eprintln!( +// "[{:?}] npoints:{:?} nzeros:{:?} ark_elapsed:{:?} best:{:?} {}", +// index, +// points.len(), +// n_zeros, +// ark_elapsed, +// &best_vec[..2], +// s +// ); +// // } else { + +// // } +// // eprintln!("[{:?}] npoints:{:?} nzeros:{:?} elapsed:{:?}", index, points.len(), n_zeros, elapsed); + +// // if points.len() == 16384 { +// // std::process::exit(0); +// // } + +// res +// } + +pub fn call_msm_impl( + points: &[G], + scalars: &[<::ScalarField as PrimeField>::BigInt], +) -> G::Projective { + use std::any::TypeId; + + assert_eq!(TypeId::of::(), TypeId::of::>()); + assert_eq!( + TypeId::of::(), + TypeId::of::>() + ); + assert_eq!( + TypeId::of::<<::ScalarField as PrimeField>::BigInt>(), + TypeId::of::() + ); + + // Safety: We're reinterpreting generic types to their concret types + // proof-systems contains too much useless generic types + // It's safe because we just asserted they are the same types + let result = my_msm::(unsafe { std::mem::transmute(points) }, unsafe { + std::mem::transmute(scalars) + }); + unsafe { *(&result as *const _ as *const G::Projective) } +} + +struct Batch<'a, P: Parameter> { + buckets: Vec>, + /// (index in `buckets`, is_negative, group) + in_batch: Vec<(usize, bool, &'a GroupAffine

)>, + in_batch_busy_buckets: Vec, + inverse_state: P::BaseField, + inverses: Vec, + /// (index in `buckets`, is_negative, group) + pendings: Vec<(usize, bool, &'a GroupAffine

)>, +} + +const N_BATCH: usize = 4096; +const N_COLLISION: usize = 512; + +impl<'a, P: Parameter> Batch<'a, P> { + pub fn with_capacity(capacity: usize) -> Self { + let zero = GroupAffine::zero(); + Self { + buckets: vec![zero; capacity], + in_batch: Vec::with_capacity(N_BATCH), + in_batch_busy_buckets: vec![false; capacity], + inverse_state: P::BaseField::one(), + inverses: vec![P::BaseField::one(); N_BATCH], + pendings: Vec::with_capacity(N_BATCH), + } + } + + fn add_in_bucket(&mut self, bucket: usize, is_negative: bool, g: &'a GroupAffine

) { + if self.in_batch_busy_buckets[bucket] { + self.pendings.push((bucket, is_negative, g)); + } else { + self.in_batch_busy_buckets[bucket] = true; + self.in_batch.push((bucket, is_negative, g)); + } + } + + // Thanks to + // https://github.com/snarkify/arkmsm/blob/f60cffa905762911a77800a77d524cf7279b63d5/src/batch_adder.rs#L125-L201 + fn accumulate(&mut self) { + use std::ops::Neg; + + self.inverse_state = P::BaseField::one(); + + for (in_batch_index, (bucket_index, is_neg, point)) in + self.in_batch.iter().copied().enumerate() + { + let bucket = &mut self.buckets[bucket_index]; + let mut point = *point; + if is_neg { + point = point.neg(); + } + if bucket.is_zero() | point.is_zero() { + continue; + } + let mut diff_x = point.x - bucket.x; + if diff_x.is_zero() { + let diff_y = point.y - bucket.y; + if !diff_y.is_zero() { + continue; + } + diff_x = point.y + point.y; + } + if self.inverse_state.is_zero() { + self.inverses[in_batch_index].set_one(); + self.inverse_state = diff_x; + } else { + self.inverses[in_batch_index] = self.inverse_state; + self.inverse_state *= diff_x + } + } + + self.inverse_state = self.inverse_state.inverse().unwrap(); + + for (in_batch_index, (bucket_index, is_neg, point)) in + self.in_batch.iter().copied().enumerate().rev() + { + let bucket = &mut self.buckets[bucket_index]; + let mut point = *point; + if is_neg { + point = point.neg(); + } + if bucket.is_zero() | point.is_zero() { + if !point.is_zero() { + *bucket = point; + } + continue; + } + let mut inverse = self.inverses[in_batch_index]; + inverse *= self.inverse_state; + let mut diff_x = point.x - bucket.x; + let mut diff_y = point.y - bucket.y; + if diff_x.is_zero() { + if !diff_y.is_zero() { + bucket.set_zero(); + continue; + } + diff_y = point.x.square(); + diff_y = diff_y + diff_y + diff_y; + diff_x = point.y.double(); + } + self.inverse_state *= diff_x; + let s = diff_y * inverse; + let ss = s * s; + bucket.x = ss - point.x - bucket.x; + diff_x = point.x - bucket.x; + bucket.y = s * diff_x; + bucket.y -= point.y; + } + + self.in_batch.clear(); + self.in_batch_busy_buckets + .iter_mut() + .for_each(|b| *b = false); + + self.pendings.retain(|(bucket, is_neg, g)| { + if self.in_batch_busy_buckets[*bucket] { + return true; + } + self.in_batch_busy_buckets[*bucket] = true; + self.in_batch.push((*bucket, *is_neg, g)); + false + }); + } +} + +#[derive(Copy, Clone)] +pub struct Digits { + digits: u32, +} + +pub fn my_msm( + bases: &[GroupAffine

], + scalars: &[BigInteger256], +) -> GroupProjective

{ + let size = std::cmp::min(bases.len(), scalars.len()); + let scalars = &scalars[..size]; + let bases = &bases[..size]; + + let c = match size { + ..=18 => 6, + ..=8184 => 8, + _ => 13, + }; + + let zero = GroupProjective::zero(); + let num_bits = + < as AffineCurve>::ScalarField as PrimeField>::Params::MODULUS_BITS as usize; + let window_starts: Vec<_> = (0..num_bits).step_by(c).collect(); + + let max = 1 << c; + let max_half = max >> 1; + + let digits = scalars + .par_iter() + .map(|scalar| { + if scalar.is_zero() { + return None; + } + let mut scalar = *scalar; + let mut carry = 0; + Some( + window_starts + .iter() + .map(|_win_start| { + let mut digits = scalar.to_64x4()[0] % (1 << c); + digits += carry; + if digits > max_half { + digits = max - digits; + carry = 1; + } else { + carry = 0; + } + let digits = Digits { + digits: digits as u32 | ((carry as u32) << 31), + }; + scalar.divn(c as u32); + digits + }) + .collect::>(), + ) + }) + .collect::>(); + + let sum_per_window: Vec<_> = window_starts + .par_iter() + .copied() + .enumerate() + .map(|(window_index, _)| { + let mut batch = Batch::with_capacity(1 << (c - 1)); + + digits.iter().zip(bases).for_each(|(scalar, base)| { + let Some(scalar) = scalar else { + return; + }; + let Digits { digits } = scalar[window_index]; + let is_neg = (digits >> 31) != 0; + let digits = ((digits as u32) & ((-1i32 as u32) >> 1)) as usize; + let Some(digits) = digits.checked_sub(1) else { + return; + }; + batch.add_in_bucket(digits, is_neg, base); + if batch.in_batch.len() >= N_BATCH || batch.pendings.len() >= N_COLLISION { + batch.accumulate(); + } + }); + + while !batch.in_batch.is_empty() || !batch.pendings.is_empty() { + batch.accumulate(); + } + + // eprintln!( + // "total alloc: {:?} accum: {:?} nzeros: {:?} nis_neg:{:?} in_batch_cap: {:?} pendings_cap: {:?}", + // elapsed_alloc, now.elapsed(), nzeros, nis_neg, batch.in_batch.capacity(), batch.pendings.capacity(), + // ); + + let mut running_sum = zero; + batch + .buckets + .iter() + .rev() + .map(|b| { + running_sum.add_assign_mixed(b); + running_sum + }) + .sum() + }) + .collect(); + + let lowest = *sum_per_window.first().unwrap(); + + lowest + + &sum_per_window[1..] + .iter() + .rev() + .fold(zero, |mut total, sum_i| { + total += sum_i; + for _ in 0..c { + total.double_in_place(); + } + total + }) +} diff --git a/poly-commitment/src/pairing_proof.rs b/poly-commitment/src/pairing_proof.rs index 1a581e538b..3004636b6e 100644 --- a/poly-commitment/src/pairing_proof.rs +++ b/poly-commitment/src/pairing_proof.rs @@ -1,8 +1,9 @@ use crate::commitment::*; use crate::evaluation_proof::combine_polys; +use crate::msm::call_msm; use crate::srs::SRS; use crate::{CommitmentError, PolynomialsToCombine, SRS as SRSTrait}; -use ark_ec::{msm::VariableBaseMSM, AffineCurve, PairingEngine}; +use ark_ec::{AffineCurve, PairingEngine}; use ark_ff::{PrimeField, Zero}; use ark_poly::{ univariate::{DenseOrSparsePolynomial, DensePolynomial}, @@ -302,7 +303,7 @@ impl< ); let scalars: Vec<_> = scalars.iter().map(|x| x.into_repr()).collect(); - VariableBaseMSM::multi_scalar_mul(&points, &scalars) + call_msm::(&points, &scalars) }; let evals = combine_evaluations(evaluations, polyscale); let blinding_commitment = srs.full_srs.h.mul(self.blinding); @@ -326,94 +327,94 @@ impl< } } -#[cfg(test)] -mod tests { - use super::{PairingProof, PairingSRS}; - use crate::commitment::Evaluation; - use crate::evaluation_proof::DensePolynomialOrEvaluations; - use crate::srs::SRS; - use crate::SRS as _; - use ark_bn254::Fr as ScalarField; - use ark_bn254::{G1Affine as G1, G2Affine as G2, Parameters}; - use ark_ec::bn::Bn; - use ark_ff::UniformRand; - use ark_poly::{ - univariate::DensePolynomial, EvaluationDomain, Polynomial, Radix2EvaluationDomain as D, - UVPolynomial, - }; - - use rand::{rngs::StdRng, SeedableRng}; - - #[test] - fn test_pairing_proof() { - let n = 64; - let domain = D::::new(n).unwrap(); - - let rng = &mut StdRng::from_seed([0u8; 32]); - - let x = ScalarField::rand(rng); - - let mut srs = SRS::::create_trusted_setup(x, n); - let verifier_srs = SRS::::create_trusted_setup(x, 3); - srs.add_lagrange_basis(domain); - - let srs = PairingSRS { - full_srs: srs, - verifier_srs, - }; - - let polynomials: Vec<_> = (0..4) - .map(|_| { - let coeffs = (0..63).map(|_| ScalarField::rand(rng)).collect(); - DensePolynomial::from_coefficients_vec(coeffs) - }) - .collect(); - - let comms: Vec<_> = polynomials - .iter() - .map(|p| srs.full_srs.commit(p, 1, rng)) - .collect(); - - let polynomials_and_blinders: Vec<(DensePolynomialOrEvaluations<_, D<_>>, _)> = polynomials - .iter() - .zip(comms.iter()) - .map(|(p, comm)| { - let p = DensePolynomialOrEvaluations::DensePolynomial(p); - (p, comm.blinders.clone()) - }) - .collect(); - - let evaluation_points = vec![ScalarField::rand(rng), ScalarField::rand(rng)]; - - let evaluations: Vec<_> = polynomials - .iter() - .zip(comms) - .map(|(p, commitment)| { - let evaluations = evaluation_points - .iter() - .map(|x| { - // Inputs are chosen to use only 1 chunk - vec![p.evaluate(x)] - }) - .collect(); - Evaluation { - commitment: commitment.commitment, - evaluations, - } - }) - .collect(); - - let polyscale = ScalarField::rand(rng); - - let pairing_proof = PairingProof::>::create( - &srs, - polynomials_and_blinders.as_slice(), - &evaluation_points, - polyscale, - ) - .unwrap(); - - let res = pairing_proof.verify(&srs, &evaluations, polyscale, &evaluation_points); - assert!(res); - } -} +// #[cfg(test)] +// mod tests { +// use super::{PairingProof, PairingSRS}; +// use crate::commitment::Evaluation; +// use crate::evaluation_proof::DensePolynomialOrEvaluations; +// use crate::srs::SRS; +// use crate::SRS as _; +// use ark_bn254::Fr as ScalarField; +// use ark_bn254::{G1Affine as G1, G2Affine as G2, Parameters}; +// use ark_ec::bn::Bn; +// use ark_ff::UniformRand; +// use ark_poly::{ +// univariate::DensePolynomial, EvaluationDomain, Polynomial, Radix2EvaluationDomain as D, +// UVPolynomial, +// }; + +// use rand::{rngs::StdRng, SeedableRng}; + +// #[test] +// fn test_pairing_proof() { +// let n = 64; +// let domain = D::::new(n).unwrap(); + +// let rng = &mut StdRng::from_seed([0u8; 32]); + +// let x = ScalarField::rand(rng); + +// let mut srs = SRS::::create_trusted_setup(x, n); +// let verifier_srs = SRS::::create_trusted_setup(x, 3); +// srs.add_lagrange_basis(domain); + +// let srs = PairingSRS { +// full_srs: srs, +// verifier_srs, +// }; + +// let polynomials: Vec<_> = (0..4) +// .map(|_| { +// let coeffs = (0..63).map(|_| ScalarField::rand(rng)).collect(); +// DensePolynomial::from_coefficients_vec(coeffs) +// }) +// .collect(); + +// let comms: Vec<_> = polynomials +// .iter() +// .map(|p| srs.full_srs.commit(p, 1, rng)) +// .collect(); + +// let polynomials_and_blinders: Vec<(DensePolynomialOrEvaluations<_, D<_>>, _)> = polynomials +// .iter() +// .zip(comms.iter()) +// .map(|(p, comm)| { +// let p = DensePolynomialOrEvaluations::DensePolynomial(p); +// (p, comm.blinders.clone()) +// }) +// .collect(); + +// let evaluation_points = vec![ScalarField::rand(rng), ScalarField::rand(rng)]; + +// let evaluations: Vec<_> = polynomials +// .iter() +// .zip(comms) +// .map(|(p, commitment)| { +// let evaluations = evaluation_points +// .iter() +// .map(|x| { +// // Inputs are chosen to use only 1 chunk +// vec![p.evaluate(x)] +// }) +// .collect(); +// Evaluation { +// commitment: commitment.commitment, +// evaluations, +// } +// }) +// .collect(); + +// let polyscale = ScalarField::rand(rng); + +// let pairing_proof = PairingProof::>::create( +// &srs, +// polynomials_and_blinders.as_slice(), +// &evaluation_points, +// polyscale, +// ) +// .unwrap(); + +// let res = pairing_proof.verify(&srs, &evaluations, polyscale, &evaluation_points); +// assert!(res); +// } +// } diff --git a/poseidon/src/sponge.rs b/poseidon/src/sponge.rs index ff7f00a412..9c599ace5d 100644 --- a/poseidon/src/sponge.rs +++ b/poseidon/src/sponge.rs @@ -19,7 +19,8 @@ pub fn endo_coefficient() -> F { let t = F::multiplicative_generator(); - t.pow(p_minus_1_over_3.into_repr().as_ref()) + let p_minus_1_over_3 = p_minus_1_over_3.into_repr().to_64x4(); + t.pow(&p_minus_1_over_3) } fn get_bit(limbs_lsb: &[u64], i: u64) -> u64 { @@ -30,7 +31,7 @@ fn get_bit(limbs_lsb: &[u64], i: u64) -> u64 { impl ScalarChallenge { pub fn to_field_with_length(&self, length_in_bits: usize, endo_coeff: &F) -> F { - let rep = self.0.into_repr(); + let rep = self.0.into_repr().to_64x4(); let r = rep.as_ref(); let mut a: F = 2_u64.into(); @@ -92,6 +93,7 @@ impl DefaultFrSponge { .expect("internal representation was not a valid field element") } else { let x = self.sponge.squeeze().into_repr(); + let x = x.to_64x4(); self.last_squeezed .extend(&x.as_ref()[0..HIGH_ENTROPY_LIMBS]); self.squeeze(num_limbs) @@ -112,6 +114,7 @@ where limbs.to_vec() } else { let x = self.sponge.squeeze().into_repr(); + let x = x.to_64x4(); self.last_squeezed .extend(&x.as_ref()[0..HIGH_ENTROPY_LIMBS]); self.squeeze_limbs(num_limbs) diff --git a/rust-toolchain b/rust-toolchain index cc31fcd4f5..74c280fb83 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.72 +1.83 diff --git a/signer/src/keypair.rs b/signer/src/keypair.rs index fc81dce32e..648c2caf3d 100644 --- a/signer/src/keypair.rs +++ b/signer/src/keypair.rs @@ -98,7 +98,9 @@ impl Keypair { pub fn secret_multiply_with_curve_point(&self, multiplicand: CurvePoint) -> CurvePoint { use ark_ec::AffineCurve; use ark_ec::ProjectiveCurve; - multiplicand.mul(self.secret.clone().into_scalar()).into_affine() + multiplicand + .mul(self.secret.clone().into_scalar()) + .into_affine() } }