chore(gpu): panic when polynomial size is not supported

zama-ai · Feb 20, 2024 · 6213579 · 6213579
1 parent 41c38d1
commit 6213579
Show file tree

Hide file tree

Showing 10 changed files with 77 additions and 116 deletions.
diff --git a/backends/tfhe-cuda-backend/cuda/src/device.cu b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -56,7 +56,7 @@ void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
   if (size > free_mem) {
     PANIC("Cuda error: not enough memory on device. "
           "Available: %zu vs Requested: %lu",
-          free_mem, size);
+          free_mem, size)
   }
 }
 
@@ -79,7 +79,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, dest));
   if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid device pointer in async copy to GPU.");
+    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
   }
 
   check_cuda_error(cudaSetDevice(stream->gpu_index));
@@ -94,7 +94,7 @@ void cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size) {
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, dest));
   if (attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid device pointer in copy to GPU.");
+    PANIC("Cuda error: invalid device pointer in copy to GPU.")
   }
   check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice));
 }
@@ -106,7 +106,7 @@ void cuda_memcpy_to_cpu(void *dest, void *src, uint64_t size) {
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, src));
   if (attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid device pointer in copy to CPU.");
+    PANIC("Cuda error: invalid device pointer in copy to CPU.")
   }
   check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToHost));
 }
@@ -120,16 +120,16 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
   check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
   if (attr_dest.device != stream->gpu_index &&
       attr_dest.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.");
+    PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
   }
   cudaPointerAttributes attr_src;
   check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
   if (attr_src.device != stream->gpu_index &&
       attr_src.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.");
+    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
   }
   if (attr_src.device != attr_dest.device) {
-    PANIC("Cuda error: different devices specified in copy from GPU to GPU.");
+    PANIC("Cuda error: different devices specified in copy from GPU to GPU.")
   }
 
   check_cuda_error(cudaSetDevice(stream->gpu_index));
@@ -150,7 +150,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, dest));
   if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in cuda memset.");
+    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
   }
   check_cuda_error(cudaSetDevice(stream->gpu_index));
   check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
@@ -169,7 +169,7 @@ void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
   if (attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in cuda set value.");
+    PANIC("Cuda error: invalid dest device pointer in cuda set value.")
   }
   int block_size = 256;
   int num_blocks = (n + block_size - 1) / block_size;
@@ -194,7 +194,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
   cudaPointerAttributes attr;
   check_cuda_error(cudaPointerGetAttributes(&attr, src));
   if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid src device pointer in copy to CPU async.");
+    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
   }
 
   check_cuda_error(cudaSetDevice(stream->gpu_index));

diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -70,7 +70,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
         static_cast<uint64_t *>(ksk), lwe_ciphertext_count);
     break;
   default:
-    PANIC("Cuda error: integer operation not supported");
+    PANIC("Cuda error: integer operation not supported")
   }
 }
 

diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu
@@ -59,7 +59,9 @@ void cuda_full_propagation_64_inplace(
         ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
     break;
   default:
-    break;
+    PANIC("Cuda error (full propagation inplace): unsupported polynomial size. "
+          "Supported N's are powers of two"
+          " in the interval [256..16384].")
   }
 }
 

diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cu b/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cu
@@ -24,7 +24,8 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
         allocate_gpu_memory);
     break;
   default:
-    break;
+    PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
+          "Only N = 2048 is supported")
   }
 }
 
@@ -75,7 +76,8 @@ void cuda_integer_mult_radix_ciphertext_kb_64(
         num_blocks);
     break;
   default:
-    break;
+    PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
+          "Only N = 2048 is supported")
   }
 }
 

diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu
@@ -36,6 +36,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
         static_cast<uint64_t *>(ksk), lwe_ciphertext_count, num_scalar_blocks);
     break;
   default:
-    PANIC("Cuda error: integer operation not supported");
+    PANIC("Cuda error: integer operation not supported")
   }
 }
diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh
@@ -363,7 +363,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
           msb_lut = mem_ptr->eq_buffer->is_non_zero_lut;
           break;
         default:
-          PANIC("Cuda error: integer operation not supported");
+          PANIC("Cuda error: integer operation not supported")
         }
 
         host_compare_with_zero_equality(msb_stream, lwe_array_msb_out, msb,
@@ -388,7 +388,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
         num_halved_scalar_blocks + (num_msb_radix_blocks > 0));
     break;
   default:
-    PANIC("Cuda error: integer operation not supported");
+    PANIC("Cuda error: integer operation not supported")
   }
 
   // The result will be in the two first block. Everything else is

diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap.cuh
@@ -20,7 +20,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
     // 32 bits
     switch (pbs_type) {
     case MULTI_BIT:
-      PANIC("Error: 32-bit multibit PBS is not supported.\n");
+      PANIC("Error: 32-bit multibit PBS is not supported.\n")
     case LOW_LAT:
       cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
           stream, lwe_array_out, lwe_output_indexes, lut_vector,
@@ -69,12 +69,12 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
           num_luts, lwe_idx, max_shared_memory);
       break;
     default:
-      PANIC("Error: unsupported cuda PBS type.");
+      PANIC("Error: unsupported cuda PBS type.")
     }
     break;
   default:
     PANIC("Cuda error: unsupported modulus size: only 32 and 64 bit integer "
-          "moduli are supported.");
+          "moduli are supported.")
   }
 }
 
@@ -91,7 +91,7 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
     // 32 bits
     switch (pbs_type) {
     case MULTI_BIT:
-      PANIC("Error: 32-bit multibit PBS is not supported.\n");
+      PANIC("Error: 32-bit multibit PBS is not supported.\n")
     case LOW_LAT:
       scratch_cuda_bootstrap_low_latency_32(
           stream, pbs_buffer, glwe_dimension, polynomial_size, level_count,
@@ -103,7 +103,7 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
           input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
       break;
     default:
-      PANIC("Error: unsupported cuda PBS type.");
+      PANIC("Error: unsupported cuda PBS type.")
     }
     break;
   case sizeof(uint64_t):
@@ -126,11 +126,11 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
           input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
       break;
     default:
-      PANIC("Error: unsupported cuda PBS type.");
+      PANIC("Error: unsupported cuda PBS type.")
     }
     break;
   default:
     PANIC("Cuda error: unsupported modulus size: only 32 and 64 bit integer "
-          "moduli are supported.");
+          "moduli are supported.")
   }
 }
diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap_amortized.cu b/backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap_amortized.cu
@@ -11,28 +11,6 @@ uint64_t get_buffer_size_bootstrap_amortized_64(
       max_shared_memory);
 }
 
-/*
- * Runs standard checks to validate the inputs
- */
-void checks_fast_bootstrap_amortized(int polynomial_size) {
-  assert(
-      ("Error (GPU amortized PBS): polynomial size should be one of 256, 512, "
-       "1024, 2048, 4096, 8192, 16384",
-       polynomial_size == 256 || polynomial_size == 512 ||
-           polynomial_size == 1024 || polynomial_size == 2048 ||
-           polynomial_size == 4096 || polynomial_size == 8192 ||
-           polynomial_size == 16384));
-}
-
-/*
- * Runs standard checks to validate the inputs
- */
-void checks_bootstrap_amortized(int nbits, int base_log, int polynomial_size) {
-  assert(("Error (GPU amortized PBS): base log should be <= nbits",
-          base_log <= nbits));
-  checks_fast_bootstrap_amortized(polynomial_size);
-}
-
 /*
  * This scratch function allocates the necessary amount of data on the GPU for
  * the amortized PBS on 32 bits inputs, into `pbs_buffer`. It also
@@ -43,7 +21,6 @@ void scratch_cuda_bootstrap_amortized_32(
     cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
     uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
     uint32_t max_shared_memory, bool allocate_gpu_memory) {
-  checks_fast_bootstrap_amortized(polynomial_size);
 
   switch (polynomial_size) {
   case 256:
@@ -82,7 +59,9 @@ void scratch_cuda_bootstrap_amortized_32(
         input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
     break;
   default:
-    break;
+    PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
+          "N's are powers of two"
+          " in the interval [256..16384].")
   }
 }
 
@@ -96,7 +75,6 @@ void scratch_cuda_bootstrap_amortized_64(
     cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
     uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
     uint32_t max_shared_memory, bool allocate_gpu_memory) {
-  checks_fast_bootstrap_amortized(polynomial_size);
 
   switch (polynomial_size) {
   case 256:
@@ -135,7 +113,9 @@ void scratch_cuda_bootstrap_amortized_64(
         input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
     break;
   default:
-    break;
+    PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
+          "N's are powers of two"
+          " in the interval [256..16384].")
   }
 }
 
@@ -150,7 +130,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
     uint32_t base_log, uint32_t level_count, uint32_t num_samples,
     uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
 
-  checks_bootstrap_amortized(32, base_log, polynomial_size);
+  if (base_log > 32)
+    PANIC("Cuda error (amortized PBS): base log should be > number of bits in "
+          "the ciphertext representation (32)");
 
   switch (polynomial_size) {
   case 256:
@@ -217,7 +199,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
         max_shared_memory);
     break;
   default:
-    break;
+    PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
+          "N's are powers of two"
+          " in the interval [256..16384].")
   }
 }
 
@@ -294,7 +278,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
     uint32_t base_log, uint32_t level_count, uint32_t num_samples,
     uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
 
-  checks_bootstrap_amortized(64, base_log, polynomial_size);
+  if (base_log > 64)
+    PANIC("Cuda error (amortized PBS): base log should be > number of bits in "
+          "the ciphertext representation (64)");
 
   switch (polynomial_size) {
   case 256:
@@ -361,7 +347,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
         max_shared_memory);
     break;
   default:
-    break;
+    PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
+          "N's are powers of two"
+          " in the interval [256..16384].")
   }
 }