diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh index 13642d9ee0..1ad84b1b33 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh @@ -672,7 +672,6 @@ void host_full_propagate_inplace(cudaStream_t *streams, uint32_t *gpu_indexes, for (int i = 0; i < num_blocks; i++) { auto cur_input_block = &input_blocks[i * big_lwe_size]; - cudaSetDevice(gpu_indexes[0]); /// Since the keyswitch is done on one input only, use only 1 GPU execute_keyswitch_async( streams, gpu_indexes, 1, mem_ptr->tmp_small_lwe_vector, @@ -692,7 +691,7 @@ void host_full_propagate_inplace(cudaStream_t *streams, uint32_t *gpu_indexes, mem_ptr->lut->lwe_trivial_indexes, bsks, mem_ptr->lut->buffer, params.glwe_dimension, params.small_lwe_dimension, params.polynomial_size, params.pbs_base_log, params.pbs_level, - params.grouping_factor, 2, params.pbs_type); + params.grouping_factor, 1, params.pbs_type); cuda_memcpy_async_gpu_to_gpu(cur_input_block, mem_ptr->tmp_big_lwe_vector, big_lwe_size * sizeof(Torus), streams[0],