From 09eaa0baf74d6f52cecfb44a78dfbff13ad3a2bc Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Wed, 5 Feb 2025 18:16:00 +0100 Subject: [PATCH] chore(gpu): encapsulate after_bitop functions for better readability --- .../cuda/include/integer/integer_utilities.h | 24 ++++++++++ .../cuda/src/integer/bitwise_ops.cu | 47 +++++++++++++++++++ .../cuda/src/integer/bitwise_ops.cuh | 37 ++++----------- .../cuda/src/integer/scalar_bitops.cu | 44 +++++++++++++++++ .../cuda/src/integer/scalar_bitops.cuh | 34 +++----------- 5 files changed, 130 insertions(+), 56 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h index 58c8c6c783..a9d9cce95d 100644 --- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h +++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h @@ -4737,4 +4737,28 @@ template struct int_div_rem_memory { } }; +void update_degrees_after_bitand(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks); +void update_degrees_after_bitor(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks); +void update_degrees_after_bitxor(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks); +void update_degrees_after_scalar_bitand(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks); +void update_degrees_after_scalar_bitor(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks); +void update_degrees_after_scalar_bitxor(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks); #endif // CUDA_INTEGER_UTILITIES_H diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu index 509c4f7cc3..2b190e2fc6 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu +++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu @@ -41,3 +41,50 @@ void cleanup_cuda_integer_bitop(void *const *streams, (int_bitop_buffer *)(*mem_ptr_void); mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count); } + +void update_degrees_after_bitand(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks) { + for (uint i = 0; i < num_radix_blocks; i++) { + output_degrees[i] = + std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]); + } +} + +void update_degrees_after_bitor(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks) { + for (uint i = 0; i < num_radix_blocks; i++) { + auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]); + auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]); + auto result = max; + + for (uint j = 0; j < min + 1; j++) { + if (max | j > result) { + result = max | j; + } + } + output_degrees[i] = result; + } +} + +void update_degrees_after_bitxor(uint64_t *output_degrees, + uint64_t *lwe_array_1_degrees, + uint64_t *lwe_array_2_degrees, + uint32_t num_radix_blocks) { + for (uint i = 0; i < num_radix_blocks; i++) { + auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]); + auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]); + auto result = max; + + // Try every possibility to find the worst case + for (uint j = 0; j < min + 1; j++) { + if (max ^ j > result) { + result = max ^ j; + } + } + output_degrees[i] = result; + } +} diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh index e39e33a8c8..8bfb6e0514 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh @@ -22,36 +22,17 @@ __host__ void host_integer_radix_bitop_kb( auto lut = mem_ptr->lut; uint64_t degrees[lwe_array_1->num_radix_blocks]; if (mem_ptr->op == BITOP_TYPE::BITAND) { - for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { - degrees[i] = std::min(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); - } + update_degrees_after_bitand(degrees, lwe_array_1->degrees, + lwe_array_2->degrees, + lwe_array_1->num_radix_blocks); } else if (mem_ptr->op == BITOP_TYPE::BITOR) { - for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { - auto max = std::max(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); - auto min = std::min(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); - auto result = max; - - for (uint j = 0; j < min + 1; j++) { - if (max | j > result) { - result = max | j; - } - } - degrees[i] = result; - } + update_degrees_after_bitor(degrees, lwe_array_1->degrees, + lwe_array_2->degrees, + lwe_array_1->num_radix_blocks); } else if (mem_ptr->op == BITXOR) { - for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) { - auto max = std::max(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); - auto min = std::min(lwe_array_1->degrees[i], lwe_array_2->degrees[i]); - auto result = max; - - // Try every possibility to find the worst case - for (uint j = 0; j < min + 1; j++) { - if (max ^ j > result) { - result = max ^ j; - } - } - degrees[i] = result; - } + update_degrees_after_bitxor(degrees, lwe_array_1->degrees, + lwe_array_2->degrees, + lwe_array_1->num_radix_blocks); } integer_radix_apply_bivariate_lookup_table_kb( diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu index e64d98f8b9..70d23582d6 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu +++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu @@ -13,3 +13,47 @@ void cuda_scalar_bitop_integer_radix_ciphertext_kb_64( num_clear_blocks, (int_bitop_buffer *)mem_ptr, bsks, (uint64_t **)(ksks)); } + +void update_degrees_after_scalar_bitand(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks) { + for (uint i = 0; i < num_clear_blocks; i++) { + output_degrees[i] = std::min(clear_degrees[i], input_degrees[i]); + } +} +void update_degrees_after_scalar_bitor(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks) { + for (uint i = 0; i < num_clear_blocks; i++) { + auto max = std::max(clear_degrees[i], input_degrees[i]); + auto min = std::min(clear_degrees[i], input_degrees[i]); + auto result = max; + + for (uint j = 0; j < min + 1; j++) { + if (max | j > result) { + result = max | j; + } + } + output_degrees[i] = result; + } +} +void update_degrees_after_scalar_bitxor(uint64_t *output_degrees, + uint64_t *clear_degrees, + uint64_t *input_degrees, + uint32_t num_clear_blocks) { + for (uint i = 0; i < num_clear_blocks; i++) { + auto max = std::max(clear_degrees[i], input_degrees[i]); + auto min = std::min(clear_degrees[i], input_degrees[i]); + auto result = max; + + // Try every possibility to find the worst case + for (uint j = 0; j < min + 1; j++) { + if (max ^ j > result) { + result = max ^ j; + } + } + output_degrees[i] = result; + } +} diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh index ec25dc24de..2121ff3c0c 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh @@ -38,36 +38,14 @@ __host__ void host_integer_radix_scalar_bitop_kb( num_clear_blocks * sizeof(Torus), streams[0], gpu_indexes[0]); if (mem_ptr->op == BITOP_TYPE::SCALAR_BITAND) { - for (uint i = 0; i < num_clear_blocks; i++) { - degrees[i] = std::min(clear_degrees[i], input->degrees[i]); - } + update_degrees_after_scalar_bitand(degrees, clear_degrees, input->degrees, + num_clear_blocks); } else if (mem_ptr->op == BITOP_TYPE::SCALAR_BITOR) { - for (uint i = 0; i < num_clear_blocks; i++) { - auto max = std::max(clear_degrees[i], input->degrees[i]); - auto min = std::min(clear_degrees[i], input->degrees[i]); - auto result = max; - - for (uint j = 0; j < min + 1; j++) { - if (max | j > result) { - result = max | j; - } - } - degrees[i] = result; - } + update_degrees_after_scalar_bitor(degrees, clear_degrees, input->degrees, + num_clear_blocks); } else if (mem_ptr->op == SCALAR_BITXOR) { - for (uint i = 0; i < num_clear_blocks; i++) { - auto max = std::max(clear_degrees[i], input->degrees[i]); - auto min = std::min(clear_degrees[i], input->degrees[i]); - auto result = max; - - // Try every possibility to find the worst case - for (uint j = 0; j < min + 1; j++) { - if (max ^ j > result) { - result = max ^ j; - } - } - degrees[i] = result; - } + update_degrees_after_scalar_bitxor(degrees, clear_degrees, input->degrees, + num_clear_blocks); } cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks, num_clear_blocks * sizeof(Torus), streams[0],