Skip to content

Commit

Permalink
feat(gpu): cast between unsigned cuda radix ciphertexts
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Feb 19, 2024
1 parent c30395d commit 799829e
Show file tree
Hide file tree
Showing 5 changed files with 693 additions and 197 deletions.
61 changes: 56 additions & 5 deletions tfhe/src/core_crypto/gpu/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ impl CudaStream {
///
/// # Safety
///
/// - `dest` __must__ be a valid pointer to the GPU global memory
/// - [CudaStream::synchronize] __must__ be called after the copy
/// as soon as synchronization is required
pub unsafe fn memset_async<T>(&self, dest: &mut CudaVec<T>, value: T)
Expand All @@ -105,7 +104,6 @@ impl CudaStream {
///
/// # Safety
///
/// - `dest` __must__ be a valid pointer to the GPU global memory
/// - [CudaStream::synchronize] __must__ be called after the copy
/// as soon as synchronization is required
pub unsafe fn copy_to_gpu_async<T>(&self, dest: &mut CudaVec<T>, src: &[T])
Expand All @@ -131,8 +129,6 @@ impl CudaStream {
///
/// # Safety
///
/// - `src` __must__ be a valid pointer to the GPU global memory
/// - `dest` __must__ be a valid pointer to the GPU global memory
/// - [CudaStream::synchronize] __must__ be called after the copy
/// as soon as synchronization is required
pub unsafe fn copy_gpu_to_gpu_async<T>(&self, dest: &mut CudaVec<T>, src: &CudaVec<T>)
Expand All @@ -152,11 +148,66 @@ impl CudaStream {
}
}

/// Copies data between two CudaVec, selecting a range of `src` as target
///
/// # Safety
///
/// - [CudaStream::synchronize] __must__ be called after the copy
/// as soon as synchronization is required
pub unsafe fn copy_src_range_gpu_to_gpu_async<R, T>(
&self,
range: R,
dest: &mut CudaVec<T>,
src: &CudaVec<T>,
) where
R: std::ops::RangeBounds<usize>,
T: Numeric,
{
let (start, end) = src.range_bounds_to_start_end(range).into_inner();
// size is > 0 thanks to this check
if end < start {
return;
}
assert!(end < src.len());
assert!(end - start < dest.len());

let src_ptr = src.as_c_ptr().add(start * std::mem::size_of::<T>());
let size = (end - start + 1) * std::mem::size_of::<T>();
cuda_memcpy_async_gpu_to_gpu(dest.as_mut_c_ptr(), src_ptr, size as u64, self.as_c_ptr());
}

/// Copies data between two CudaVec, selecting a range of `dest` as target
///
/// # Safety
///
/// - [CudaStream::synchronize] __must__ be called after the copy
/// as soon as synchronization is required
pub unsafe fn copy_dest_range_gpu_to_gpu_async<R, T>(
&self,
range: R,
dest: &mut CudaVec<T>,
src: &CudaVec<T>,
) where
R: std::ops::RangeBounds<usize>,
T: Numeric,
{
let (start, end) = dest.range_bounds_to_start_end(range).into_inner();
// size is > 0 thanks to this check
if end < start {
return;
}
assert!(end < dest.len());
assert!(end - start < src.len());

let dest_ptr = dest.as_mut_c_ptr().add(start * std::mem::size_of::<T>());
let size = (end - start + 1) * std::mem::size_of::<T>();
cuda_memcpy_async_gpu_to_gpu(dest_ptr, src.as_c_ptr(), size as u64, self.as_c_ptr());
}

/// Copies data from GPU pointer into slice
///
/// # Safety
///
/// - `src` __must__ be a valid pointer to the GPU global memory
/// - [CudaStream::synchronize] __must__ be called as soon as synchronization is
/// required
pub unsafe fn copy_to_cpu_async<T>(&self, dest: &mut [T], src: &CudaVec<T>)
Expand Down
20 changes: 20 additions & 0 deletions tfhe/src/core_crypto/gpu/vec.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::core_crypto::gpu::{CudaDevice, CudaPtr, CudaStream};
use crate::core_crypto::prelude::Numeric;
use std::collections::Bound::{Excluded, Included, Unbounded};
use std::ffi::c_void;
use std::marker::PhantomData;

Expand Down Expand Up @@ -66,6 +67,25 @@ impl<T: Numeric> CudaVec<T> {
pub fn is_empty(&self) -> bool {
self.len == 0
}

pub(crate) fn range_bounds_to_start_end<R>(&self, range: R) -> std::ops::RangeInclusive<usize>
where
R: std::ops::RangeBounds<usize>,
{
let start = match range.start_bound() {
Unbounded => 0usize,
Included(start) => *start,
Excluded(start) => *start + 1,
};

let end = match range.end_bound() {
Unbounded => self.len().saturating_sub(1),
Included(end) => *end,
Excluded(end) => end.saturating_sub(1),
};

start..=end
}
}

// SAFETY
Expand Down
67 changes: 67 additions & 0 deletions tfhe/src/integer/gpu/ciphertext/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,70 @@ impl CudaRadixCiphertextInfo {
.collect(),
}
}

pub(crate) fn after_extend_radix_with_trivial_zero_blocks_lsb(
&self,
num_blocks: usize,
) -> Self {
let mut new_block_info = Self {
blocks: Vec::with_capacity(self.blocks.len() + num_blocks),
};
for _ in 0..num_blocks {
new_block_info.blocks.push(CudaBlockInfo {
degree: Degree::new(0),
message_modulus: self.blocks.first().unwrap().message_modulus,
carry_modulus: self.blocks.first().unwrap().carry_modulus,
pbs_order: self.blocks.first().unwrap().pbs_order,
noise_level: NoiseLevel::ZERO,
});
}
for &b in self.blocks.iter() {
new_block_info.blocks.push(b);
}
new_block_info
}

pub(crate) fn after_extend_radix_with_trivial_zero_blocks_msb(
&self,
num_blocks: usize,
) -> Self {
let mut new_block_info = Self {
blocks: Vec::with_capacity(self.blocks.len() + num_blocks),
};
for &b in self.blocks.iter() {
new_block_info.blocks.push(b);
}
for _ in 0..num_blocks {
new_block_info.blocks.push(CudaBlockInfo {
degree: Degree::new(0),
message_modulus: self.blocks.first().unwrap().message_modulus,
carry_modulus: self.blocks.first().unwrap().carry_modulus,
pbs_order: self.blocks.first().unwrap().pbs_order,
noise_level: NoiseLevel::ZERO,
});
}
new_block_info
}

pub(crate) fn after_trim_radix_blocks_lsb(&self, num_blocks: usize) -> Self {
let mut new_block_info = Self {
blocks: Vec::with_capacity(self.blocks.len().saturating_sub(num_blocks)),
};
new_block_info
.blocks
.extend(self.blocks[num_blocks..].iter().copied());
new_block_info
}

pub(crate) fn after_trim_radix_blocks_msb(&self, num_blocks: usize) -> Self {
let mut new_block_info = Self {
blocks: Vec::with_capacity(self.blocks.len().saturating_sub(num_blocks)),
};
new_block_info
.blocks
.extend(self.blocks[..num_blocks].iter().copied());
new_block_info
}
}

// #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
Expand All @@ -399,6 +463,9 @@ pub struct CudaRadixCiphertext {
}

impl CudaRadixCiphertext {
pub fn new(d_blocks: CudaLweCiphertextList<u64>, info: CudaRadixCiphertextInfo) -> Self {
Self { d_blocks, info }
}
/// Copies a RadixCiphertext to the GPU memory
///
/// # Example
Expand Down
Loading

0 comments on commit 799829e

Please sign in to comment.