From 33d0f7c2e012ab0381eff5e224300cc58a2d9798 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Mon, 19 Aug 2024 12:44:29 -0300 Subject: [PATCH] fix(gpu): fix compression benchmarking --- tfhe/benches/integer/bench.rs | 131 +----------------- .../integer/glwe_packing_compression.rs | 91 ++++++++++++ 2 files changed, 95 insertions(+), 127 deletions(-) diff --git a/tfhe/benches/integer/bench.rs b/tfhe/benches/integer/bench.rs index c238ad7126..6b8b7c1799 100644 --- a/tfhe/benches/integer/bench.rs +++ b/tfhe/benches/integer/bench.rs @@ -1126,15 +1126,11 @@ define_server_key_bench_default_fn!( #[cfg(feature = "gpu")] mod cuda { use super::*; - use criterion::{black_box, criterion_group}; + use criterion::criterion_group; use tfhe::core_crypto::gpu::CudaStreams; use tfhe::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; - use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder; - use tfhe::integer::gpu::ciphertext::{CudaRadixCiphertext, CudaUnsignedRadixCiphertext}; - use tfhe::integer::gpu::gen_keys_radix_gpu; + use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext; use tfhe::integer::gpu::server_key::CudaServerKey; - use tfhe::shortint::parameters::list_compression::COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; fn bench_cuda_server_key_unary_function_clean_inputs( c: &mut Criterion, @@ -1404,121 +1400,6 @@ mod cuda { bench_group.finish() } - fn cuda_compress(c: &mut Criterion) { - let bench_name = "integer::cuda::compression"; - let mut bench_group = c.benchmark_group(bench_name); - bench_group - .sample_size(15) - .measurement_time(std::time::Duration::from_secs(30)); - - let stream = CudaStreams::new_multi_gpu(); - - let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - - let log_message_modulus = param.message_modulus.0.ilog2() as usize; - - for num_bits in [ - 8, - 16, - 32, - 64, - 128, - 256, - comp_param.lwe_per_glwe.0 * log_message_modulus, - ] { - assert_eq!(num_bits % log_message_modulus, 0); - let num_blocks = num_bits / log_message_modulus; - - // Generate private compression key - let (cks, _) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); - let private_compression_key = cks.new_compression_private_key(comp_param); - - // Generate and convert compression keys - let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream); - let (compressed_compression_key, _) = - radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); - let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream); - - // Encrypt - let ct = cks.encrypt_radix(0_u32, num_blocks); - let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream); - - // Benchmark - let mut builder = CudaCompressedCiphertextListBuilder::new(); - - builder.push(d_ct, &stream); - - bench_group.bench_function(format!("compress_u{num_bits}"), |b| { - b.iter(|| { - let compressed = builder.build(&cuda_compression_key, &stream); - - _ = black_box(compressed); - }) - }); - } - } - - fn cuda_decompress(c: &mut Criterion) { - let bench_name = "integer::cuda::compression"; - let mut bench_group = c.benchmark_group(bench_name); - bench_group - .sample_size(15) - .measurement_time(std::time::Duration::from_secs(30)); - - let stream = CudaStreams::new_multi_gpu(); - - let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - - let log_message_modulus = param.message_modulus.0.ilog2() as usize; - - for num_bits in [ - 8, - 16, - 32, - 64, - 128, - 256, - comp_param.lwe_per_glwe.0 * log_message_modulus, - ] { - assert_eq!(num_bits % log_message_modulus, 0); - let num_blocks = num_bits / log_message_modulus; - - // Generate private compression key - let (cks, _) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); - let private_compression_key = cks.new_compression_private_key(comp_param); - - // Generate and convert compression keys - let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream); - let (compressed_compression_key, compressed_decompression_key) = - radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); - let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream); - let cuda_decompression_key = - compressed_decompression_key.decompress_to_cuda(radix_cks.parameters(), &stream); - - // Encrypt - let ct = cks.encrypt_radix(0_u32, num_blocks); - let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream); - - // Benchmark - let mut builder = CudaCompressedCiphertextListBuilder::new(); - - builder.push(d_ct, &stream); - - let compressed = builder.build(&cuda_compression_key, &stream); - - bench_group.bench_function(format!("decompress_u{num_bits}"), |b| { - b.iter(|| { - let unpacked: CudaRadixCiphertext = - compressed.get(0, &cuda_decompression_key, &stream); - - _ = black_box(unpacked); - }) - }); - } - } - macro_rules! define_cuda_server_key_bench_clean_input_unary_fn ( (method_name: $server_key_method:ident, display_name:$name:ident) => { ::paste::paste!{ @@ -2171,8 +2052,6 @@ mod cuda { cuda_unsigned_overflowing_scalar_add, ); - criterion_group!(cuda_compress_ops, cuda_compress, cuda_decompress); - fn cuda_bench_server_key_cast_function( c: &mut Criterion, bench_name: &str, @@ -2263,8 +2142,8 @@ mod cuda { #[cfg(feature = "gpu")] use cuda::{ - cuda_cast_ops, cuda_compress_ops, default_cuda_dedup_ops, default_cuda_ops, - default_scalar_cuda_ops, unchecked_cuda_ops, unchecked_scalar_cuda_ops, + cuda_cast_ops, default_cuda_dedup_ops, default_cuda_ops, default_scalar_cuda_ops, + unchecked_cuda_ops, unchecked_scalar_cuda_ops, }; criterion_group!( @@ -2616,13 +2495,11 @@ criterion_group!(oprf, oprf::unsigned_oprf); fn go_through_gpu_bench_groups(val: &str) { match val.to_lowercase().as_str() { "default" => { - cuda_compress_ops(); default_cuda_ops(); default_scalar_cuda_ops(); cuda_cast_ops(); } "fast_default" => { - cuda_compress_ops(); default_cuda_dedup_ops(); } "unchecked" => { diff --git a/tfhe/benches/integer/glwe_packing_compression.rs b/tfhe/benches/integer/glwe_packing_compression.rs index 16673ef1c7..992fdb5652 100644 --- a/tfhe/benches/integer/glwe_packing_compression.rs +++ b/tfhe/benches/integer/glwe_packing_compression.rs @@ -4,6 +4,18 @@ use tfhe::integer::{ClientKey, RadixCiphertext}; use tfhe::shortint::parameters::list_compression::COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; +#[cfg(feature = "gpu")] +use tfhe::core_crypto::gpu::CudaStreams; + +#[cfg(feature = "gpu")] +use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder; + +#[cfg(feature = "gpu")] +use tfhe::integer::gpu::ciphertext::{CudaRadixCiphertext, CudaUnsignedRadixCiphertext}; + +#[cfg(feature = "gpu")] +use tfhe::integer::gpu::gen_keys_radix_gpu; + fn cpu_glwe_packing(c: &mut Criterion) { let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; @@ -12,6 +24,9 @@ fn cpu_glwe_packing(c: &mut Criterion) { let bench_name = "integer_packing_compression"; let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(30)); let cks = ClientKey::new(param); @@ -72,9 +87,85 @@ fn cpu_glwe_packing(c: &mut Criterion) { } } +#[cfg(feature = "gpu")] +fn gpu_glwe_packing(c: &mut Criterion) { + let bench_name = "integer_cuda_packing_compression"; + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(30)); + + let stream = CudaStreams::new_multi_gpu(); + + let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + + let log_message_modulus = param.message_modulus.0.ilog2() as usize; + + for bit_size in [ + 8, + 16, + 32, + 64, + 128, + 256, + comp_param.lwe_per_glwe.0 * log_message_modulus, + ] { + assert_eq!(bit_size % log_message_modulus, 0); + let num_blocks = bit_size / log_message_modulus; + + // Generate private compression key + let cks = ClientKey::new(param); + let private_compression_key = cks.new_compression_private_key(comp_param); + + // Generate and convert compression keys + let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream); + let (compressed_compression_key, compressed_decompression_key) = + radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); + let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream); + let cuda_decompression_key = + compressed_decompression_key.decompress_to_cuda(radix_cks.parameters(), &stream); + + // Encrypt + let ct = cks.encrypt_radix(0_u32, num_blocks); + let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream); + + // Benchmark + let mut builder = CudaCompressedCiphertextListBuilder::new(); + + builder.push(d_ct, &stream); + + let bench_id = format!("pack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + b.iter(|| { + let compressed = builder.build(&cuda_compression_key, &stream); + + _ = black_box(compressed); + }) + }); + + let compressed = builder.build(&cuda_compression_key, &stream); + + let bench_id = format!("unpack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + b.iter(|| { + let unpacked: CudaRadixCiphertext = + compressed.get(0, &cuda_decompression_key, &stream); + + _ = black_box(unpacked); + }) + }); + } +} + +#[cfg(feature = "gpu")] +criterion_group!(gpu_glwe_packing2, gpu_glwe_packing); criterion_group!(cpu_glwe_packing2, cpu_glwe_packing); fn main() { + #[cfg(feature = "gpu")] + gpu_glwe_packing2(); + #[cfg(not(feature = "gpu"))] cpu_glwe_packing2(); Criterion::default().configure_from_args().final_summary();