fix: fix compression code for GPU which assumed a CPU data layout

- the CPU data layout is truncated to only store relevant bodies (i.e. emtpy bodies are assumed to be 0) but the GPU CUDA code manages full GLWEs only. To fix that we manage the data layout during conversions to have consistent behavior when copying the list to/from CPU/GPU. Compression code has been fixed on the CPU side to have the proper length for the output expected by the CUDA code
zama-ai · Feb 13, 2025 · 8756869 · 8756869
1 parent 9e4b585
commit 8756869
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 9 deletions.
diff --git a/tfhe/src/integer/gpu/ciphertext/compressed_ciphertext_list.rs b/tfhe/src/integer/gpu/ciphertext/compressed_ciphertext_list.rs
@@ -2,7 +2,7 @@ use crate::core_crypto::entities::packed_integers::PackedIntegers;
 use crate::core_crypto::gpu::vec::{CudaVec, GpuIndex};
 use crate::core_crypto::gpu::CudaStreams;
 use crate::core_crypto::prelude::compressed_modulus_switched_glwe_ciphertext::CompressedModulusSwitchedGlweCiphertext;
-use crate::core_crypto::prelude::{CiphertextCount, LweCiphertextCount};
+use crate::core_crypto::prelude::{glwe_ciphertext_size, CiphertextCount, LweCiphertextCount};
 use crate::integer::ciphertext::{CompressedCiphertextList, DataKind};
 use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
 use crate::integer::gpu::ciphertext::{
@@ -333,11 +333,25 @@ impl CompressedCiphertextList {
         let message_modulus = self.packed_list.message_modulus;
         let carry_modulus = self.packed_list.carry_modulus;
 
-        let flat_cpu_data = modulus_switched_glwe_ciphertext_list
+        let mut flat_cpu_data = modulus_switched_glwe_ciphertext_list
             .iter()
             .flat_map(|ct| ct.packed_integers.packed_coeffs.clone())
             .collect_vec();
 
+        let glwe_ciphertext_count = self.packed_list.modulus_switched_glwe_ciphertext_list.len();
+        let glwe_size = self.packed_list.modulus_switched_glwe_ciphertext_list[0]
+            .glwe_dimension()
+            .to_glwe_size();
+        let polynomial_size =
+            self.packed_list.modulus_switched_glwe_ciphertext_list[0].polynomial_size();
+
+        // FIXME: have a more precise memory handling, this is too long and should be "just" the
+        // original flat_cpu_data.len()
+        let unpacked_glwe_ciphertext_flat_len =
+            glwe_ciphertext_count * glwe_ciphertext_size(glwe_size, polynomial_size);
+
+        flat_cpu_data.resize(unpacked_glwe_ciphertext_flat_len, 0u64);
+
         let flat_gpu_data = unsafe {
             let v = CudaVec::from_cpu_async(flat_cpu_data.as_slice(), streams, 0);
             streams.synchronize();

diff --git a/tfhe/src/integer/gpu/list_compression/server_keys.rs b/tfhe/src/integer/gpu/list_compression/server_keys.rs
@@ -3,8 +3,8 @@ use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
 use crate::core_crypto::gpu::vec::CudaVec;
 use crate::core_crypto::gpu::CudaStreams;
 use crate::core_crypto::prelude::{
-    glwe_ciphertext_size, glwe_mask_size, CiphertextModulus, CiphertextModulusLog,
-    GlweCiphertextCount, LweCiphertextCount, PolynomialSize,
+    glwe_ciphertext_size, CiphertextModulus, CiphertextModulusLog, GlweCiphertextCount,
+    LweCiphertextCount, PolynomialSize,
 };
 use crate::integer::ciphertext::DataKind;
 use crate::integer::compression_keys::CompressionKey;
@@ -173,12 +173,12 @@ impl CudaCompressionKey {
             .sum();
 
         let num_glwes = num_lwes.div_ceil(self.lwe_per_glwe.0);
-        let glwe_mask_size = glwe_mask_size(
-            compressed_glwe_size.to_glwe_dimension(),
-            compressed_polynomial_size,
-        );
+        let glwe_ciphertext_size =
+            glwe_ciphertext_size(compressed_glwe_size, compressed_polynomial_size);
         // The number of u64 (both mask and bodies)
-        let uncompressed_len = num_glwes * glwe_mask_size + num_lwes;
+        // FIXME: have a more precise memory handling, this is too long and should be
+        // num_glwes * glwe_mask_size + num_lwes
+        let uncompressed_len = num_glwes * glwe_ciphertext_size;
         let number_bits_to_pack = uncompressed_len * self.storage_log_modulus.0;
         let compressed_len = number_bits_to_pack.div_ceil(u64::BITS as usize);
         let mut packed_glwe_list = CudaVec::new(compressed_len, streams, 0);