Skip to content

Commit

Permalink
fix(gpu): fix bug with noise level/degree tracking
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Jan 31, 2025
1 parent 99664d6 commit 1c769e0
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3098,13 +3098,13 @@ template <typename Torus> struct int_cmux_buffer {
this->params = params;

if (allocate_gpu_memory) {
create_trivial_radix_ciphertext_async<Torus>(
streams[0], gpu_indexes[0], buffer_in, 2 * num_radix_blocks,
params.big_lwe_dimension);
create_trivial_radix_ciphertext_async<Torus>(
create_zero_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
buffer_in, 2 * num_radix_blocks,
params.big_lwe_dimension);
create_zero_radix_ciphertext_async<Torus>(
streams[0], gpu_indexes[0], buffer_out, 2 * num_radix_blocks,
params.big_lwe_dimension);
create_trivial_radix_ciphertext_async<Torus>(
create_zero_radix_ciphertext_async<Torus>(
streams[0], gpu_indexes[0], condition_array, 2 * num_radix_blocks,
params.big_lwe_dimension);

Expand Down Expand Up @@ -4531,9 +4531,9 @@ template <typename Torus> struct int_abs_buffer {
streams, gpu_indexes, gpu_count, BITOP_TYPE::BITXOR, params,
num_radix_blocks, allocate_gpu_memory);

create_trivial_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
mask, num_radix_blocks,
params.big_lwe_dimension);
create_zero_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
mask, num_radix_blocks,
params.big_lwe_dimension);
}
}

Expand Down
2 changes: 1 addition & 1 deletion backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ uint64_t generate_lookup_table_with_encoding(
int index = i * box_size;
for (int j = index; j < index + box_size; j++) {
auto f_eval = f(i);
degree = max(degree, f_eval);
degree = std::max(degree, f_eval);
body[j] = f_eval * output_delta;
}
}
Expand Down
45 changes: 23 additions & 22 deletions backends/tfhe-cuda-backend/cuda/src/integer/radix_ciphertext.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@
#include "integer/integer.h"

template <typename Torus>
void create_trivial_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *output_radix,
const uint32_t num_radix_blocks,
const uint32_t lwe_dimension) {
output_radix->lwe_dimension = lwe_dimension;
output_radix->num_radix_blocks = num_radix_blocks;
uint32_t lwe_size_bytes = (lwe_dimension + 1) * sizeof(Torus);
output_radix->ptr = (void *)cuda_malloc_async(
num_radix_blocks * lwe_size_bytes, stream, gpu_index);
output_radix->degrees = (Torus *)(malloc(num_radix_blocks * sizeof(Torus)));
output_radix->noise_levels =
(Torus *)(malloc(num_radix_blocks * sizeof(Torus)));
for (uint i = 0; i < output_radix->num_radix_blocks; i++) {
output_radix->degrees[i] = 0;
output_radix->noise_levels[i] = 0;
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *radix,
const uint32_t num_radix_blocks,
const uint32_t lwe_dimension) {
radix->lwe_dimension = lwe_dimension;
radix->num_radix_blocks = num_radix_blocks;
uint32_t size = (lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
radix->ptr = (void *)cuda_malloc_async(size, stream, gpu_index);
cuda_memset_async(radix->ptr, 0, size, stream, gpu_index);
radix->degrees = (uint64_t *)(malloc(num_radix_blocks * sizeof(uint64_t)));
radix->noise_levels =
(uint64_t *)(malloc(num_radix_blocks * sizeof(uint64_t)));
if (radix->degrees == NULL || radix->noise_levels == NULL) {
PANIC("Cuda error: degrees / noise levels not allocated correctly")
}
for (uint i = 0; i < num_radix_blocks; i++) {
radix->degrees[i] = 0;
radix->noise_levels[i] = 0;
}
}

Expand All @@ -40,10 +43,9 @@ void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix,
output_radix->num_radix_blocks = end_lwe_index - start_lwe_index + 1;
output_radix->lwe_dimension = input_radix->lwe_dimension;
Torus *in_ptr = (Torus *)input_radix->ptr;
output_radix->ptr = (void *)(&in_ptr[start_lwe_index * lwe_size]);
output_radix->degrees = &input_radix->degrees[start_lwe_index * lwe_size];
output_radix->noise_levels =
&input_radix->noise_levels[start_lwe_index * lwe_size];
output_radix->ptr = (void *)(in_ptr + start_lwe_index * lwe_size);
output_radix->degrees = input_radix->degrees + start_lwe_index;
output_radix->noise_levels = input_radix->noise_levels + start_lwe_index;
}

template <typename Torus>
Expand All @@ -68,8 +70,7 @@ void copy_radix_ciphertext_to_larger_output_slice_async(
out_ptr = &out_ptr[output_start_lwe_index * lwe_size];

cuda_memcpy_async_gpu_to_gpu(out_ptr, input_radix->ptr,
input_radix->num_radix_blocks *
(input_radix->lwe_dimension + 1) *
input_radix->num_radix_blocks * lwe_size *
sizeof(Torus),
stream, gpu_index);
for (uint i = 0; i < input_radix->num_radix_blocks; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ __host__ void host_addition(cudaStream_t stream, uint32_t gpu_index,
CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *input_1,
CudaRadixCiphertextFFI const *input_2) {
if (output->num_radix_blocks != input_1->num_radix_blocks ||
output->num_radix_blocks != input_2->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
if (output->lwe_dimension != input_1->lwe_dimension ||
output->lwe_dimension != input_2->lwe_dimension)
PANIC("Cuda error: input and output num radix blocks must be the same")

cuda_set_device(gpu_index);
// lwe_size includes the presence of the body
Expand Down

0 comments on commit 1c769e0

Please sign in to comment.