diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 6477a14ca4..02fad45510 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -63,7 +63,7 @@ lazy_static = { version = "1", optional = true } env_logger = "0.10.0" # GPU Icicle integration -icicle = { git = "https://github.com/ingonyama-zk/icicle.git", branch = "rust/large-bucket-factor-msm", optional = true } +icicle = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v0.1.0", optional = true } rustacuda = { version = "0.1", optional = true } # Developer tooling dependencies @@ -100,6 +100,7 @@ batch = ["rand_core/getrandom"] circuit-params = [] counter = ["lazy_static"] icicle_gpu = ["icicle", "rustacuda"] +profile=[] [lib] bench = false diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 8e090248a8..c29874971f 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -146,9 +146,31 @@ pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::C #[cfg(feature = "icicle_gpu")] /// Performs a multi-exponentiation operation on GPU using Icicle library pub fn best_multiexp_gpu(coeffs: &[C::Scalar], is_lagrange: bool) -> C::Curve { - let scalars_ptr: DeviceBuffer<::icicle::curves::bn254::ScalarField_BN254> = icicle::copy_scalars_to_device::(coeffs); + let scalars_ptr: DeviceBuffer<::icicle::curves::bn254::ScalarField_BN254> = + icicle::copy_scalars_to_device::(coeffs); - return icicle::multiexp_on_device::(scalars_ptr, is_lagrange); + icicle::multiexp_on_device::(scalars_ptr, is_lagrange) +} + +#[cfg(feature = "icicle_gpu")] +/// Performs a batch multi-exponentiation operation on GPU using Icicle library +pub fn best_batch_multiexp_gpu( + coeffs: &[C::Scalar], + bases: &[C], + batch_size: usize, +) -> Vec { + let scalars_ptr: DeviceBuffer<::icicle::curves::bn254::ScalarField_BN254> = + icicle::copy_scalars_to_device::(coeffs); + let all_bases = vec![bases; batch_size] + .iter() + .flat_map(|bases| bases.iter()) + .copied() + .collect::>(); + assert!(scalars_ptr.len() == all_bases.len()); + let bases_ptr: DeviceBuffer<::icicle::curves::bn254::PointAffineNoInfinity_BN254> = + icicle::copy_points_to_device::(all_bases.as_slice()); + + icicle::batch_multiexp_on_device::(scalars_ptr, bases_ptr, batch_size) } /// Performs a multi-exponentiation operation. diff --git a/halo2_proofs/src/icicle.rs b/halo2_proofs/src/icicle.rs index 191dfad012..222d95ef57 100644 --- a/halo2_proofs/src/icicle.rs +++ b/halo2_proofs/src/icicle.rs @@ -1,5 +1,8 @@ use group::ff::PrimeField; -use icicle::{curves::bn254::{Point_BN254, ScalarField_BN254}, test_bn254::commit_bn254}; +use icicle::{ + curves::bn254::{Point_BN254, ScalarField_BN254}, + test_bn254::{commit_batch_bn254, commit_bn254}, +}; use std::sync::{Arc, Once}; pub use icicle::curves::bn254::PointAffineNoInfinity_BN254; @@ -7,15 +10,21 @@ use rustacuda::memory::CopyDestination; use rustacuda::prelude::*; pub use halo2curves::CurveAffine; -use std::{mem, env}; +use log::info; +use std::{env, mem}; static mut GPU_CONTEXT: Option = None; static mut GPU_G: Option> = None; static mut GPU_G_LAGRANGE: Option> = None; static GPU_INIT: Once = Once::new(); -pub fn should_use_cpu_msm(size: usize) -> bool { - size <= (1 << u8::from_str_radix(&env::var("ICICLE_SMALL_K").unwrap_or("8".to_string()), 10).unwrap()) +pub fn is_small_circuit(size: usize) -> bool { + size <= (1 + << u8::from_str_radix( + &env::var("ICICLE_SMALL_CIRCUIT").unwrap_or("8".to_string()), + 10, + ) + .unwrap()) } pub fn init_gpu(g: &[C], g_lagrange: &[C]) { @@ -24,20 +33,27 @@ pub fn init_gpu(g: &[C], g_lagrange: &[C]) { GPU_CONTEXT = Some(rustacuda::quick_init().unwrap()); GPU_G = Some(copy_points_to_device(g)); GPU_G_LAGRANGE = Some(copy_points_to_device(g_lagrange)); + info!("GPU initialized"); }); } } -fn u32_from_u8(u8_arr: &[u8;32]) -> [u32;8]{ - let mut t = [0u32;8]; - for i in 0..8{ - t[i] = u32::from_le_bytes([u8_arr[4*i],u8_arr[4*i+1],u8_arr[4*i+2],u8_arr[4*i+3]]); +fn u32_from_u8(u8_arr: &[u8; 32]) -> [u32; 8] { + let mut t = [0u32; 8]; + for i in 0..8 { + t[i] = u32::from_le_bytes([ + u8_arr[4 * i], + u8_arr[4 * i + 1], + u8_arr[4 * i + 2], + u8_arr[4 * i + 3], + ]); } - return t; + return t; } -fn repr_from_u32(u32_arr: &[u32;8]) -> ::Base { - let t : &[<::Base as PrimeField>::Repr] = unsafe { mem::transmute(&u32_arr[..]) }; +fn repr_from_u32(u32_arr: &[u32; 8]) -> ::Base { + let t: &[<::Base as PrimeField>::Repr] = + unsafe { mem::transmute(&u32_arr[..]) }; return PrimeField::from_repr(t[0]).unwrap(); } @@ -50,14 +66,17 @@ fn icicle_scalars_from_c(coeffs: &[C::Scalar]) -> Vec>(), )]; - - let _coeffs: &Arc> = unsafe { mem::transmute(&_coeffs) }; - _coeffs.iter().map(|x| { - ScalarField_BN254::from_limbs(x) - }).collect::>() + + let _coeffs: &Arc> = unsafe { mem::transmute(&_coeffs) }; + _coeffs + .iter() + .map(|x| ScalarField_BN254::from_limbs(x)) + .collect::>() } -pub fn copy_scalars_to_device(coeffs: &[C::Scalar]) -> DeviceBuffer { +pub fn copy_scalars_to_device( + coeffs: &[C::Scalar], +) -> DeviceBuffer { let scalars = icicle_scalars_from_c::(coeffs); DeviceBuffer::from_slice(scalars.as_slice()).unwrap() @@ -65,39 +84,56 @@ pub fn copy_scalars_to_device(coeffs: &[C::Scalar]) -> DeviceBuf fn icicle_points_from_c(bases: &[C]) -> Vec { let _bases = [Arc::new( - bases.iter().map(|p| { - let coordinates = p.coordinates().unwrap(); - [coordinates.x().to_repr(),coordinates.y().to_repr()] - }).collect::>(), + bases + .iter() + .map(|p| { + let coordinates = p.coordinates().unwrap(); + [coordinates.x().to_repr(), coordinates.y().to_repr()] + }) + .collect::>(), )]; - - let _bases: &Arc> = unsafe { mem::transmute(&_bases) }; - _bases.iter().map(|x| { - let tx = u32_from_u8(&x[0]); - let ty = u32_from_u8(&x[1]); - PointAffineNoInfinity_BN254::from_limbs(&tx,&ty) - }).collect::>() + + let _bases: &Arc> = unsafe { mem::transmute(&_bases) }; + _bases + .iter() + .map(|x| { + let tx = u32_from_u8(&x[0]); + let ty = u32_from_u8(&x[1]); + PointAffineNoInfinity_BN254::from_limbs(&tx, &ty) + }) + .collect::>() } -pub fn copy_points_to_device(bases: &[C]) -> DeviceBuffer { +pub fn copy_points_to_device( + bases: &[C], +) -> DeviceBuffer { let points = icicle_points_from_c(bases); - + DeviceBuffer::from_slice(points.as_slice()).unwrap() } -fn c_from_icicle_point(commit_res: Point_BN254) -> C::Curve { - let (x , y) = if is_infinity_point(commit_res){ - (repr_from_u32::(&[0u32;8]), repr_from_u32::(&[0u32;8])) - } else{ +fn c_from_icicle_point(commit_res: Point_BN254) -> C::Curve { + let (x, y) = if is_infinity_point(commit_res) { + ( + repr_from_u32::(&[0u32; 8]), + repr_from_u32::(&[0u32; 8]), + ) + } else { let affine_res_from_cuda = commit_res.to_affine(); - (repr_from_u32::(&affine_res_from_cuda.x.s), repr_from_u32::(&affine_res_from_cuda.y.s)) + ( + repr_from_u32::(&affine_res_from_cuda.x.s), + repr_from_u32::(&affine_res_from_cuda.y.s), + ) }; - let affine = C::from_xy(x,y).unwrap(); + let affine = C::from_xy(x, y).unwrap(); return affine.to_curve(); } -pub fn multiexp_on_device(mut coeffs: DeviceBuffer, is_lagrange: bool) -> C::Curve { +pub fn multiexp_on_device( + mut coeffs: DeviceBuffer, + is_lagrange: bool, +) -> C::Curve { let base_ptr: &mut DeviceBuffer; unsafe { if is_lagrange { @@ -110,10 +146,23 @@ pub fn multiexp_on_device(mut coeffs: DeviceBuffer(h_commit_result) } +pub fn batch_multiexp_on_device( + mut coeffs: DeviceBuffer, + mut bases: DeviceBuffer, + batch_size: usize, +) -> Vec { + let d_commit_result = commit_batch_bn254(&mut bases, &mut coeffs, batch_size); + let mut h_commit_result: Vec = + (0..batch_size).map(|_| Point_BN254::zero()).collect(); + d_commit_result.copy_to(&mut h_commit_result[..]).unwrap(); + + h_commit_result + .iter() + .map(|commit_result| c_from_icicle_point::(*commit_result)) + .collect() +} diff --git a/halo2_proofs/src/plonk/permutation/prover.rs b/halo2_proofs/src/plonk/permutation/prover.rs index d6b108554d..9e3d97d628 100644 --- a/halo2_proofs/src/plonk/permutation/prover.rs +++ b/halo2_proofs/src/plonk/permutation/prover.rs @@ -18,6 +18,9 @@ use crate::{ transcript::{EncodedChallenge, TranscriptWrite}, }; +#[cfg(feature = "icicle_gpu")] +use crate::icicle; + pub(crate) struct CommittedSet { pub(crate) permutation_product_poly: Polynomial, pub(crate) permutation_product_coset: Polynomial, @@ -80,6 +83,8 @@ impl Argument { let mut last_z = C::Scalar::ONE; let mut sets = vec![]; + let mut z_set = vec![]; + let mut blind_set = vec![]; for (columns, permutations) in self .columns @@ -165,21 +170,13 @@ impl Argument { } // Set new last_z last_z = z[params.n() as usize - (blinding_factors + 1)]; - let blind = Blind(C::Scalar::random(&mut rng)); - - let permutation_product_commitment_projective = params.commit_lagrange(&z, blind); - let permutation_product_blind = blind; + z_set.push(z.clone()); + blind_set.push(blind); let z = domain.lagrange_to_coeff(z); let permutation_product_poly = z.clone(); - let permutation_product_coset = domain.coeff_to_extended(z.clone()); - - let permutation_product_commitment = - permutation_product_commitment_projective.to_affine(); - - // Hash the permutation product commitment - transcript.write_point(permutation_product_commitment)?; + let permutation_product_blind = blind; sets.push(CommittedSet { permutation_product_poly, @@ -188,6 +185,37 @@ impl Argument { }); } + #[cfg(feature = "icicle_gpu")] + if std::env::var("ENABLE_ICICLE_GPU").is_ok() && icicle::is_small_circuit(z_set[0].len()) { + let permutation_product_commitment_projectives = + params.commit_lagrange_batch(&z_set, &blind_set); + permutation_product_commitment_projectives + .iter() + .for_each(|commitment_projective| { + let permutation_product_commitment = commitment_projective.to_affine(); + + // Hash the permutation product commitment + transcript + .write_point(permutation_product_commitment) + .unwrap(); + }); + + return Ok(Committed { sets }); + } + + //NOTE: Since commit_lagrange checks for icicle_gpu feature internally, we can delegate the decision to fall back + // to CPU to it instead of duplicating code here for when icicle_gpu is not enabled + z_set.iter().zip(blind_set.iter()).for_each(|(z, blind)| { + let permutation_product_commitment_projective = params.commit_lagrange(&z, *blind); + let permutation_product_commitment = + permutation_product_commitment_projective.to_affine(); + + // Hash the permutation product commitment + transcript + .write_point(permutation_product_commitment) + .unwrap(); + }); + Ok(Committed { sets }) } } diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index abe3b6e40e..a9a5f4ec19 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -30,6 +30,11 @@ use crate::{ }; use group::prime::PrimeCurveAffine; +#[cfg(feature = "icicle_gpu")] +use crate::icicle; +use log::{debug, info}; +use std::time::Instant; + /// This creates a proof for the provided `circuit` when given the public /// parameters `params` and the proving key [`ProvingKey`] that was /// generated previously for the same circuit. The provided `instances` @@ -298,6 +303,8 @@ where } } + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let (advice, challenges) = { let mut advice = vec![ AdviceSingle:: { @@ -387,11 +394,42 @@ where } }) .collect(); + + let now = std::time::Instant::now(); + #[cfg(feature = "icicle_gpu")] + let mut advice_commitments_projective: Vec<_>; + #[cfg(feature = "icicle_gpu")] + if std::env::var("ENABLE_ICICLE_GPU").is_ok() + && icicle::is_small_circuit(advice_values[0].len()) + { + advice_commitments_projective = + params.commit_lagrange_batch(&advice_values, &blinds); + debug!( + "GPU: advice_commitments_projective of length {} took: {}", + advice_commitments_projective.len(), + now.elapsed().as_millis() + ); + } else { + advice_commitments_projective = advice_values + .iter() + .zip(blinds.iter()) + .map(|(poly, blind)| params.commit_lagrange(poly, *blind)) + .collect(); + } + + #[cfg(not(feature = "icicle_gpu"))] let advice_commitments_projective: Vec<_> = advice_values .iter() .zip(blinds.iter()) .map(|(poly, blind)| params.commit_lagrange(poly, *blind)) .collect(); + #[cfg(not(feature = "icicle_gpu"))] + debug!( + "CPU: advice_commitments_projective of length {} took: {}", + advice_commitments_projective.len(), + now.elapsed().as_millis() + ); + let mut advice_commitments = vec![Scheme::Curve::identity(); advice_commitments_projective.len()]; ::CurveExt::batch_normalize( @@ -428,10 +466,21 @@ where (advice, challenges) }; + #[cfg(feature = "profile")] + info!( + "Advice and Challenge generation: {} ms", + start.elapsed().as_millis() + ); // Sample theta challenge for keeping lookup columns linearly independent + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); + #[cfg(feature = "profile")] + info!("theta generation: {} ms", start.elapsed().as_millis()); + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let lookups: Vec>> = instance .iter() .zip(advice.iter()) @@ -458,14 +507,25 @@ where .collect() }) .collect::, _>>()?; - + #[cfg(feature = "profile")] + info!("Lookups prepare: {} ms", start.elapsed().as_millis()); // Sample beta challenge + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); + #[cfg(feature = "profile")] + info!("beta generation: {} ms", start.elapsed().as_millis()); // Sample gamma challenge + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); + #[cfg(feature = "profile")] + info!("gamma generation: {} ms", start.elapsed().as_millis()); // Commit to permutations. + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let permutations: Vec> = instance .iter() .zip(advice.iter()) @@ -484,7 +544,11 @@ where ) }) .collect::, _>>()?; + #[cfg(feature = "profile")] + info!("permutation commit: {} ms", start.elapsed().as_millis()); + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let lookups: Vec>> = lookups .into_iter() .map(|lookups| -> Result, _> { @@ -495,7 +559,14 @@ where .collect::, _>>() }) .collect::, _>>()?; + #[cfg(feature = "profile")] + info!( + "lookups commit_grand_sum: {} ms", + start.elapsed().as_millis() + ); + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let shuffles: Vec>> = instance .iter() .zip(advice.iter()) @@ -524,13 +595,26 @@ where }) .collect::, _>>()?; + #[cfg(feature = "profile")] + info!("shuffle commit_product: {} ms", start.elapsed().as_millis()); + // Commit to the vanishing argument's random polynomial for blinding h(x_3) + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; + #[cfg(feature = "profile")] + info!("vanishing commit: {} ms", start.elapsed().as_millis()); // Obtain challenge for keeping all separate gates linearly independent + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); + #[cfg(feature = "profile")] + info!("y generation: {} ms", start.elapsed().as_millis()); // Calculate the advice polys + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let advice: Vec> = advice .into_iter() .map( @@ -548,8 +632,14 @@ where }, ) .collect(); - + #[cfg(feature = "profile")] + info!( + "advice langrange_to_coeff: {} ms", + start.elapsed().as_millis() + ); // Evaluate the h(X) polynomial + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let h_poly = pk.ev.evaluate_h( pk, &advice @@ -569,9 +659,15 @@ where &shuffles, &permutations, ); + #[cfg(feature = "profile")] + info!("h_poly: {} ms", start.elapsed().as_millis()); // Construct the vanishing argument's h(X) commitments + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; + #[cfg(feature = "profile")] + info!("vanishing construction: {} ms", start.elapsed().as_millis()); let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); @@ -598,6 +694,8 @@ where } } + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); // Compute and hash advice evals for each circuit instance for advice in advice.iter() { // Evaluate polynomials at omega^i x @@ -715,6 +813,9 @@ where // We query the h(X) polynomial at x .chain(vanishing.open(x)); + #[cfg(feature = "profile")] + info!("evaluations: {} ms", start.elapsed().as_millis()); + #[cfg(feature = "counter")] { use crate::{FFT_COUNTER, MSM_COUNTER}; @@ -727,10 +828,15 @@ where *FFT_COUNTER.lock().unwrap() = BTreeMap::new(); } + #[cfg(feature = "profile")] + let start = std::time::Instant::now(); let prover = P::new(params); - prover + let proof = prover .create_proof(rng, transcript, instances) - .map_err(|_| Error::ConstraintSystemFailure) + .map_err(|_| Error::ConstraintSystemFailure); + #[cfg(feature = "profile")] + info!("prover.create_proof : {} ms", start.elapsed().as_millis()); + proof } #[test] diff --git a/halo2_proofs/src/plonk/vanishing/prover.rs b/halo2_proofs/src/plonk/vanishing/prover.rs index 7943086826..fa6a2d9e8e 100644 --- a/halo2_proofs/src/plonk/vanishing/prover.rs +++ b/halo2_proofs/src/plonk/vanishing/prover.rs @@ -17,6 +17,9 @@ use crate::{ transcript::{EncodedChallenge, TranscriptWrite}, }; +#[cfg(feature = "icicle_gpu")] +use crate::icicle; + pub(in crate::plonk) struct Committed { random_poly: Polynomial, random_blind: Blind, @@ -126,11 +129,28 @@ impl Committed { .collect(); // Compute commitments to each h(X) piece + #[cfg(feature = "icicle_gpu")] + let mut h_commitments_projective: Vec<_>; + #[cfg(feature = "icicle_gpu")] + if std::env::var("ENABLE_ICICLE_GPU").is_ok() + && icicle::is_small_circuit(params.n() as usize) + { + h_commitments_projective = params.commit_batch(&h_pieces, &h_blinds); + } else { + h_commitments_projective = h_pieces + .iter() + .zip(h_blinds.iter()) + .map(|(h_piece, blind)| params.commit(h_piece, *blind)) + .collect(); + } + + #[cfg(not(feature = "icicle_gpu"))] let h_commitments_projective: Vec<_> = h_pieces .iter() .zip(h_blinds.iter()) .map(|(h_piece, blind)| params.commit(h_piece, *blind)) .collect(); + let mut h_commitments = vec![C::identity(); h_commitments_projective.len()]; C::Curve::batch_normalize(&h_commitments_projective, &mut h_commitments); let h_commitments = h_commitments; diff --git a/halo2_proofs/src/poly/commitment.rs b/halo2_proofs/src/poly/commitment.rs index 590767e68e..9e5d1891e7 100644 --- a/halo2_proofs/src/poly/commitment.rs +++ b/halo2_proofs/src/poly/commitment.rs @@ -66,6 +66,16 @@ pub trait Params<'params, C: CurveAffine>: Sized + Clone { r: Blind, ) -> C::CurveExt; + #[cfg(feature = "icicle_gpu")] + /// This commits to a batch of polynomials using their evaluations over the $2^k$ size + /// evaluation domain. The commitments will be blinded by the blinding factors + /// `rs`. + fn commit_lagrange_batch( + &self, + polys: &Vec>, + rs: &Vec>, + ) -> Vec; + /// Writes params to a buffer. fn write(&self, writer: &mut W) -> io::Result<()>; @@ -87,6 +97,16 @@ pub trait ParamsProver<'params, C: CurveAffine>: Params<'params, C> { fn commit(&self, poly: &Polynomial, r: Blind) -> C::CurveExt; + #[cfg(feature = "icicle_gpu")] + /// This computes a commitment to a polynomial described by the provided + /// slice of coefficients. The commitment may be blinded by the blinding + /// factor `r`. + fn commit_batch( + &self, + polys: &Vec>, + rs: &Vec>, + ) -> Vec; + /// Getter for g generators fn get_g(&self) -> &[C]; diff --git a/halo2_proofs/src/poly/ipa/commitment.rs b/halo2_proofs/src/poly/ipa/commitment.rs index 96c98d5fbc..095d780313 100644 --- a/halo2_proofs/src/poly/ipa/commitment.rs +++ b/halo2_proofs/src/poly/ipa/commitment.rs @@ -102,6 +102,31 @@ impl<'params, C: CurveAffine> Params<'params, C> for ParamsIPA { best_multiexp_cpu::(&tmp_scalars, &tmp_bases) } + #[cfg(feature = "icicle_gpu")] + /// Falls back to single CPU MSM + fn commit_lagrange_batch( + &self, + polys: &Vec>, + rs: &Vec>, + ) -> Vec { + polys + .iter() + .zip(rs.iter()) + .map(|(poly, r)| { + let mut tmp_scalars = Vec::with_capacity(poly.len() + 1); + let mut tmp_bases = Vec::with_capacity(poly.len() + 1); + + tmp_scalars.extend(poly.iter()); + tmp_scalars.push(r.0); + + tmp_bases.extend(self.g_lagrange.iter()); + tmp_bases.push(self.w); + + best_multiexp_cpu::(&tmp_scalars, &tmp_bases) + }) + .collect::>() + } + /// Writes params to a buffer. fn write(&self, writer: &mut W) -> io::Result<()> { writer.write_all(&self.k.to_le_bytes())?; @@ -222,6 +247,31 @@ impl<'params, C: CurveAffine> ParamsProver<'params, C> for ParamsIPA { best_multiexp_cpu::(&tmp_scalars, &tmp_bases) } + #[cfg(feature = "icicle_gpu")] + /// Falls back to single CPU MSM + fn commit_batch( + &self, + polys: &Vec>, + rs: &Vec>, + ) -> Vec { + polys + .iter() + .zip(rs.iter()) + .map(|(poly, r)| { + let mut tmp_scalars = Vec::with_capacity(poly.len() + 1); + let mut tmp_bases = Vec::with_capacity(poly.len() + 1); + + tmp_scalars.extend(poly.iter()); + tmp_scalars.push(r.0); + + tmp_bases.extend(self.g.iter()); + tmp_bases.push(self.w); + + best_multiexp_cpu::(&tmp_scalars, &tmp_bases) + }) + .collect::>() + } + fn get_g(&self) -> &[C] { &self.g } diff --git a/halo2_proofs/src/poly/ipa/commitment/prover.rs b/halo2_proofs/src/poly/ipa/commitment/prover.rs index 24394f1e56..ab3b895fb8 100644 --- a/halo2_proofs/src/poly/ipa/commitment/prover.rs +++ b/halo2_proofs/src/poly/ipa/commitment/prover.rs @@ -112,8 +112,10 @@ pub fn create_proof< let value_r_j = compute_inner_product(&p_prime[0..half], &b[half..]); let l_j_randomness = C::Scalar::random(&mut rng); let r_j_randomness = C::Scalar::random(&mut rng); - let l_j = l_j + &best_multiexp_cpu(&[value_l_j * &z, l_j_randomness], &[params.u, params.w]); - let r_j = r_j + &best_multiexp_cpu(&[value_r_j * &z, r_j_randomness], &[params.u, params.w]); + let l_j = + l_j + &best_multiexp_cpu(&[value_l_j * &z, l_j_randomness], &[params.u, params.w]); + let r_j = + r_j + &best_multiexp_cpu(&[value_r_j * &z, r_j_randomness], &[params.u, params.w]); let l_j = l_j.to_affine(); let r_j = r_j.to_affine(); diff --git a/halo2_proofs/src/poly/kzg/commitment.rs b/halo2_proofs/src/poly/kzg/commitment.rs index 1ce330dff1..00ab582159 100644 --- a/halo2_proofs/src/poly/kzg/commitment.rs +++ b/halo2_proofs/src/poly/kzg/commitment.rs @@ -1,7 +1,7 @@ use crate::arithmetic::{best_multiexp_cpu, g_to_lagrange, parallelize}; #[cfg(feature = "icicle_gpu")] -use crate::arithmetic::best_multiexp_gpu; +use crate::arithmetic::{best_batch_multiexp_gpu, best_multiexp_gpu}; #[cfg(feature = "icicle_gpu")] use crate::icicle; @@ -17,7 +17,7 @@ use rand_core::{OsRng, RngCore}; use std::fmt::Debug; use std::marker::PhantomData; -use std::{io, env}; +use std::{env, io}; use super::msm::MSMKZG; @@ -149,7 +149,6 @@ where g2: E::G2Affine, s_g2: E::G2Affine, ) -> Self { - // let g_lagrange = if let Some(g_l) = g_lagrange { // g_l // } else { @@ -344,7 +343,7 @@ where assert!(bases.len() >= size); #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::should_use_cpu_msm(size) { + if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::is_small_circuit(size) { best_multiexp_gpu::(&scalars, true) } else { best_multiexp_cpu(&scalars, &bases[0..size]) @@ -354,6 +353,29 @@ where best_multiexp_cpu(&scalars, &bases[0..size]) } + #[cfg(feature = "icicle_gpu")] + fn commit_lagrange_batch( + &self, + polys: &Vec>, + _: &Vec>, + ) -> Vec { + use log::info; + + let batch_size = polys.len(); + let size = polys[0].len(); + let mut scalars = Vec::with_capacity(size * batch_size); + for poly in polys { + scalars.extend(poly.iter()); + } + + info!( + "Running batch icicle with size {} and batch_size {}", + size, batch_size + ); + + best_batch_multiexp_gpu::(&scalars, &self.g_lagrange, batch_size) + } + /// Writes params to a buffer. fn write(&self, writer: &mut W) -> io::Result<()> { self.write_custom(writer, SerdeFormat::RawBytes) @@ -397,7 +419,7 @@ where assert!(bases.len() >= size); #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::should_use_cpu_msm(size) { + if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::is_small_circuit(size) { best_multiexp_gpu::(&scalars, false) } else { best_multiexp_cpu(&scalars, &bases[0..size]) @@ -407,6 +429,29 @@ where best_multiexp_cpu(&scalars, &bases[0..size]) } + #[cfg(feature = "icicle_gpu")] + fn commit_batch( + &self, + polys: &Vec>, + rs: &Vec>, + ) -> Vec { + use log::info; + + let batch_size = polys.len(); + let size = polys[0].len(); + let mut scalars = Vec::with_capacity(size * batch_size); + for poly in polys { + scalars.extend(poly.iter()); + } + + info!( + "Running batch icicle with size {} and batch_size {}", + size, batch_size + ); + + best_batch_multiexp_gpu::(&scalars, &self.g, batch_size) + } + fn get_g(&self) -> &[E::G1Affine] { &self.g }