Skip to content

Commit

Permalink
chore(gpu): panic when polynomial size is not supported
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Feb 20, 2024
1 parent 41c38d1 commit 6213579
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 116 deletions.
20 changes: 10 additions & 10 deletions backends/tfhe-cuda-backend/cuda/src/device.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
if (size > free_mem) {
PANIC("Cuda error: not enough memory on device. "
"Available: %zu vs Requested: %lu",
free_mem, size);
free_mem, size)
}
}

Expand All @@ -79,7 +79,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid device pointer in async copy to GPU.");
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
}

check_cuda_error(cudaSetDevice(stream->gpu_index));
Expand All @@ -94,7 +94,7 @@ void cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size) {
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
if (attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid device pointer in copy to GPU.");
PANIC("Cuda error: invalid device pointer in copy to GPU.")
}
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice));
}
Expand All @@ -106,7 +106,7 @@ void cuda_memcpy_to_cpu(void *dest, void *src, uint64_t size) {
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, src));
if (attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid device pointer in copy to CPU.");
PANIC("Cuda error: invalid device pointer in copy to CPU.")
}
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToHost));
}
Expand All @@ -120,16 +120,16 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
if (attr_dest.device != stream->gpu_index &&
attr_dest.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.");
PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
}
cudaPointerAttributes attr_src;
check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
if (attr_src.device != stream->gpu_index &&
attr_src.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.");
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
}
if (attr_src.device != attr_dest.device) {
PANIC("Cuda error: different devices specified in copy from GPU to GPU.");
PANIC("Cuda error: different devices specified in copy from GPU to GPU.")
}

check_cuda_error(cudaSetDevice(stream->gpu_index));
Expand All @@ -150,7 +150,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid dest device pointer in cuda memset.");
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
}
check_cuda_error(cudaSetDevice(stream->gpu_index));
check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
Expand All @@ -169,7 +169,7 @@ void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
if (attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid dest device pointer in cuda set value.");
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
}
int block_size = 256;
int num_blocks = (n + block_size - 1) / block_size;
Expand All @@ -194,7 +194,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
cudaPointerAttributes attr;
check_cuda_error(cudaPointerGetAttributes(&attr, src));
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid src device pointer in copy to CPU async.");
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
}

check_cuda_error(cudaSetDevice(stream->gpu_index));
Expand Down
2 changes: 1 addition & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
static_cast<uint64_t *>(ksk), lwe_ciphertext_count);
break;
default:
PANIC("Cuda error: integer operation not supported");
PANIC("Cuda error: integer operation not supported")
}
}

Expand Down
4 changes: 3 additions & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/integer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ void cuda_full_propagation_64_inplace(
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
break;
default:
break;
PANIC("Cuda error (full propagation inplace): unsupported polynomial size. "
"Supported N's are powers of two"
" in the interval [256..16384].")
}
}

Expand Down
6 changes: 4 additions & 2 deletions backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
allocate_gpu_memory);
break;
default:
break;
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
"Only N = 2048 is supported")
}
}

Expand Down Expand Up @@ -75,7 +76,8 @@ void cuda_integer_mult_radix_ciphertext_kb_64(
num_blocks);
break;
default:
break;
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
"Only N = 2048 is supported")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
static_cast<uint64_t *>(ksk), lwe_ciphertext_count, num_scalar_blocks);
break;
default:
PANIC("Cuda error: integer operation not supported");
PANIC("Cuda error: integer operation not supported")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
msb_lut = mem_ptr->eq_buffer->is_non_zero_lut;
break;
default:
PANIC("Cuda error: integer operation not supported");
PANIC("Cuda error: integer operation not supported")
}

host_compare_with_zero_equality(msb_stream, lwe_array_msb_out, msb,
Expand All @@ -388,7 +388,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
num_halved_scalar_blocks + (num_msb_radix_blocks > 0));
break;
default:
PANIC("Cuda error: integer operation not supported");
PANIC("Cuda error: integer operation not supported")
}

// The result will be in the two first block. Everything else is
Expand Down
14 changes: 7 additions & 7 deletions backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
// 32 bits
switch (pbs_type) {
case MULTI_BIT:
PANIC("Error: 32-bit multibit PBS is not supported.\n");
PANIC("Error: 32-bit multibit PBS is not supported.\n")
case LOW_LAT:
cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
stream, lwe_array_out, lwe_output_indexes, lut_vector,
Expand Down Expand Up @@ -69,12 +69,12 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
num_luts, lwe_idx, max_shared_memory);
break;
default:
PANIC("Error: unsupported cuda PBS type.");
PANIC("Error: unsupported cuda PBS type.")
}
break;
default:
PANIC("Cuda error: unsupported modulus size: only 32 and 64 bit integer "
"moduli are supported.");
"moduli are supported.")
}
}

Expand All @@ -91,7 +91,7 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
// 32 bits
switch (pbs_type) {
case MULTI_BIT:
PANIC("Error: 32-bit multibit PBS is not supported.\n");
PANIC("Error: 32-bit multibit PBS is not supported.\n")
case LOW_LAT:
scratch_cuda_bootstrap_low_latency_32(
stream, pbs_buffer, glwe_dimension, polynomial_size, level_count,
Expand All @@ -103,7 +103,7 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
PANIC("Error: unsupported cuda PBS type.");
PANIC("Error: unsupported cuda PBS type.")
}
break;
case sizeof(uint64_t):
Expand All @@ -126,11 +126,11 @@ void execute_scratch_pbs(cuda_stream_t *stream, int8_t **pbs_buffer,
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
PANIC("Error: unsupported cuda PBS type.");
PANIC("Error: unsupported cuda PBS type.")
}
break;
default:
PANIC("Cuda error: unsupported modulus size: only 32 and 64 bit integer "
"moduli are supported.");
"moduli are supported.")
}
}
48 changes: 18 additions & 30 deletions backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap_amortized.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,6 @@ uint64_t get_buffer_size_bootstrap_amortized_64(
max_shared_memory);
}

/*
* Runs standard checks to validate the inputs
*/
void checks_fast_bootstrap_amortized(int polynomial_size) {
assert(
("Error (GPU amortized PBS): polynomial size should be one of 256, 512, "
"1024, 2048, 4096, 8192, 16384",
polynomial_size == 256 || polynomial_size == 512 ||
polynomial_size == 1024 || polynomial_size == 2048 ||
polynomial_size == 4096 || polynomial_size == 8192 ||
polynomial_size == 16384));
}

/*
* Runs standard checks to validate the inputs
*/
void checks_bootstrap_amortized(int nbits, int base_log, int polynomial_size) {
assert(("Error (GPU amortized PBS): base log should be <= nbits",
base_log <= nbits));
checks_fast_bootstrap_amortized(polynomial_size);
}

/*
* This scratch function allocates the necessary amount of data on the GPU for
* the amortized PBS on 32 bits inputs, into `pbs_buffer`. It also
Expand All @@ -43,7 +21,6 @@ void scratch_cuda_bootstrap_amortized_32(
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
uint32_t max_shared_memory, bool allocate_gpu_memory) {
checks_fast_bootstrap_amortized(polynomial_size);

switch (polynomial_size) {
case 256:
Expand Down Expand Up @@ -82,7 +59,9 @@ void scratch_cuda_bootstrap_amortized_32(
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
break;
PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
"N's are powers of two"
" in the interval [256..16384].")
}
}

Expand All @@ -96,7 +75,6 @@ void scratch_cuda_bootstrap_amortized_64(
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
uint32_t max_shared_memory, bool allocate_gpu_memory) {
checks_fast_bootstrap_amortized(polynomial_size);

switch (polynomial_size) {
case 256:
Expand Down Expand Up @@ -135,7 +113,9 @@ void scratch_cuda_bootstrap_amortized_64(
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
break;
PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
"N's are powers of two"
" in the interval [256..16384].")
}
}

Expand All @@ -150,7 +130,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {

checks_bootstrap_amortized(32, base_log, polynomial_size);
if (base_log > 32)
PANIC("Cuda error (amortized PBS): base log should be > number of bits in "
"the ciphertext representation (32)");

switch (polynomial_size) {
case 256:
Expand Down Expand Up @@ -217,7 +199,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
max_shared_memory);
break;
default:
break;
PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
"N's are powers of two"
" in the interval [256..16384].")
}
}

Expand Down Expand Up @@ -294,7 +278,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {

checks_bootstrap_amortized(64, base_log, polynomial_size);
if (base_log > 64)
PANIC("Cuda error (amortized PBS): base log should be > number of bits in "
"the ciphertext representation (64)");

switch (polynomial_size) {
case 256:
Expand Down Expand Up @@ -361,7 +347,9 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
max_shared_memory);
break;
default:
break;
PANIC("Cuda error (amortized PBS): unsupported polynomial size. Supported "
"N's are powers of two"
" in the interval [256..16384].")
}
}

Expand Down
Loading

0 comments on commit 6213579

Please sign in to comment.