Skip to content

Commit

Permalink
refactor(gpu): remove lwe chunk size argument
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Aug 1, 2024
1 parent 5547d92 commit 2fe63c9
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 155 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,15 @@ void scratch_cuda_multi_bit_programmable_bootstrap_64(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t chunk_size = 0);
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);

void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
uint32_t level_count, uint32_t num_samples, uint32_t lwe_chunk_size = 0);
uint32_t level_count, uint32_t num_samples);

void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
uint32_t gpu_index,
Expand All @@ -51,8 +50,7 @@ void scratch_cuda_tbc_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size);
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);

template <typename Torus>
void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Expand All @@ -61,24 +59,21 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size);
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
#endif

template <typename Torus>
void scratch_cuda_cg_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);

template <typename Torus>
void scratch_cuda_cg_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);

template <typename Torus>
void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Expand All @@ -87,16 +82,14 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0);
uint32_t base_log, uint32_t level_count, uint32_t num_samples);

template <typename Torus>
void scratch_cuda_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);

template <typename Torus>
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Expand All @@ -105,8 +98,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0);
uint32_t base_log, uint32_t level_count, uint32_t num_samples);

template <typename Torus>
__host__ __device__ uint64_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@ __host__ void scratch_cg_multi_bit_programmable_bootstrap(
cudaStream_t stream, uint32_t gpu_index,
pbs_buffer<Torus, MULTI_BIT> **buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0) {
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory) {

uint64_t full_sm_keybundle =
get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle<Torus>(
Expand Down Expand Up @@ -242,9 +241,8 @@ __host__ void scratch_cg_multi_bit_programmable_bootstrap(
check_cuda_error(cudaGetLastError());
}

if (!lwe_chunk_size)
lwe_chunk_size = get_lwe_chunk_size<Torus, params>(
gpu_index, input_lwe_ciphertext_count, polynomial_size);
auto lwe_chunk_size = get_lwe_chunk_size<Torus, params>(
gpu_index, input_lwe_ciphertext_count, polynomial_size);
*buffer = new pbs_buffer<Torus, MULTI_BIT>(
stream, gpu_index, glwe_dimension, polynomial_size, level_count,
input_lwe_ciphertext_count, lwe_chunk_size, PBS_VARIANT::CG,
Expand Down Expand Up @@ -336,12 +334,10 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
Torus *lwe_array_in, Torus *lwe_input_indexes, uint64_t *bootstrapping_key,
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0) {
uint32_t base_log, uint32_t level_count, uint32_t num_samples) {

if (!lwe_chunk_size)
lwe_chunk_size = get_lwe_chunk_size<Torus, params>(gpu_index, num_samples,
polynomial_size);
auto lwe_chunk_size = get_lwe_chunk_size<Torus, params>(
gpu_index, num_samples, polynomial_size);

for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor);
lwe_offset += lwe_chunk_size) {
Expand Down
Loading

0 comments on commit 2fe63c9

Please sign in to comment.