Skip to content

Commit

Permalink
feat(capi): add Cuda support
Browse files Browse the repository at this point in the history
- This adds GPU support in the C API
- Also make ctest (cmake test launcher) print
  test output when it fails
  • Loading branch information
tmontaigu committed Feb 9, 2024
1 parent 8c54c82 commit ece82c5
Show file tree
Hide file tree
Showing 8 changed files with 254 additions and 5 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/aws_tfhe_gpu_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ jobs:
run: |
make test_user_doc_gpu
- name: Test C API
run: |
make test_c_api_gpu
- name: Slack Notification
if: ${{ always() }}
continue-on-error: true
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,13 @@ build_c_api: install_rs_check_toolchain
-p $(TFHE_SPEC)
@"$(MAKE)" symlink_c_libs_without_fingerprint

.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \
-p $(TFHE_SPEC)
@"$(MAKE)" symlink_c_libs_without_fingerprint

.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
Expand Down Expand Up @@ -410,6 +417,10 @@ test_c_api_c: build_c_api
.PHONY: test_c_api # Run all the tests for the C API
test_c_api: test_c_api_rs test_c_api_c

.PHONY: test_c_api_gpu # Run the C tests for the C API
test_c_api_gpu: build_c_api_gpu
./scripts/c_api_tests.sh --gpu

.PHONY: test_shortint_ci # Run the tests for shortint ci
test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
Expand Down
15 changes: 11 additions & 4 deletions scripts/c_api_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ function usage() {
echo
echo "--help Print this message"
echo "--build-only Pass to only build the tests without running them"
echo "--gpu Enable GPU support"
echo
}

BUILD_ONLY=0

WITH_FEATURE_GPU="OFF"
while [ -n "$1" ]
do
case "$1" in
Expand All @@ -24,6 +25,9 @@ do
BUILD_ONLY=1
;;

"--gpu" )
WITH_FEATURE_GPU="ON"
;;
*)
echo "Unknown param : $1"
exit 1
Expand All @@ -40,7 +44,7 @@ mkdir -p "${TFHE_BUILD_DIR}"

cd "${TFHE_BUILD_DIR}"

cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}"
cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}" -DWITH_FEATURE_GPU="${WITH_FEATURE_GPU}"

make -j

Expand All @@ -55,5 +59,8 @@ if [[ $(uname) == "Darwin" ]]; then
nproc_bin="sysctl -n hw.logicalcpu"
fi

# Let's go parallel
ARGS="-j$(${nproc_bin})" make test
if [ "${WITH_FEATURE_GPU}" == "ON" ]; then
ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --tests-regex ".*cuda.*"
else
ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --exclude-regex ".*cuda.*"
fi
2 changes: 2 additions & 0 deletions tfhe/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ fn gen_c_api() {
"shortint",
#[cfg(feature = "integer")]
"integer",
#[cfg(feature = "gpu")]
"gpu",
];

let parse_expand_vec = if parse_expand_features_vec.is_empty() {
Expand Down
13 changes: 13 additions & 0 deletions tfhe/c_api_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ if(NOT CARGO_PROFILE)
endif()
set(TFHE_C_API_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/../../target/${CARGO_PROFILE}")

option(WITH_FEATURE_GPU "Enable if tfhe-rs C API was compiled with the 'gpu' feature activated" OFF)

include_directories(${TFHE_C_API_RELEASE})
# This one is to fetch the dynamic buffer header
include_directories(${TFHE_C_API_RELEASE}/deps)
Expand All @@ -22,6 +24,11 @@ if(APPLE)
endif()
endif()

if (WITH_FEATURE_GPU)
find_package(CUDAToolkit 10.0 REQUIRED)
find_package(OpenMP REQUIRED)
endif()

file(GLOB TEST_CASES test_*.c)
foreach (testsourcefile ${TEST_CASES})
get_filename_component(testname ${testsourcefile} NAME_WLE)
Expand All @@ -34,6 +41,12 @@ foreach (testsourcefile ${TEST_CASES})
)
target_include_directories(${testname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${testname} LINK_PUBLIC Tfhe TfheDynamicBuffer m pthread dl)

if (WITH_FEATURE_GPU)
target_link_libraries(${testname} LINK_PUBLIC CUDA::cudart -lstdc++ OpenMP::OpenMP_CXX)
target_compile_definitions(${testname} PUBLIC -DWITH_FEATURE_GPU)
endif()

if(APPLE)
target_link_libraries(${testname} LINK_PUBLIC ${SECURITY_FRAMEWORK})
endif()
Expand Down
103 changes: 103 additions & 0 deletions tfhe/c_api_tests/test_high_level_integers_cuda.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#if defined(WITH_FEATURE_GPU)
#include <tfhe.h>

#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdint.h>

int uint8_client_key(const ClientKey *client_key) {
int ok;
FheUint8 *lhs = NULL;
FheUint8 *rhs = NULL;
FheUint8 *result = NULL;

uint8_t lhs_clear = 123;
uint8_t rhs_clear = 14;

ok = fhe_uint8_try_encrypt_with_client_key_u8(lhs_clear, client_key, &lhs);
assert(ok == 0);

ok = fhe_uint8_try_encrypt_with_client_key_u8(rhs_clear, client_key, &rhs);
assert(ok == 0);

uint8_t clear;

// Check addition
{
ok = fhe_uint8_add(lhs, rhs, &result);
assert(ok == 0);

ok = fhe_uint8_decrypt(result, client_key, &clear);
assert(ok == 0);

assert(clear == (lhs_clear + rhs_clear));
}

// Check sum
{
FheUint8 *sum_result;
const FheUint8 *data[2] = {lhs, rhs};
ok = fhe_uint8_sum(data, 2, &sum_result);
assert(ok == 0);

clear = 0;
ok = fhe_uint8_decrypt(result, client_key, &clear);
assert(ok == 0);

assert(clear == (lhs_clear + rhs_clear));
fhe_uint8_destroy(sum_result);
}

fhe_uint8_destroy(lhs);
fhe_uint8_destroy(rhs);
fhe_uint8_destroy(result);
return ok;
}


int main(void) {
int ok = 0;
{
ConfigBuilder *builder;
Config *config;

ok = config_builder_default(&builder);
assert(ok == 0);
ok = config_builder_build(builder, &config);
assert(ok == 0);

ClientKey *client_key = NULL;
CompressedServerKey *compressed_sks = NULL;
CudaServerKey *cuda_server_key = NULL;

ok = client_key_generate(config, &client_key);
assert(ok == 0);

ok = compressed_server_key_new(client_key, &compressed_sks);
assert(ok == 0);

ok = compressed_server_key_decompress_to_gpu(compressed_sks, &cuda_server_key);
assert(ok == 0);

ok = set_cuda_server_key(cuda_server_key);
assert(ok == 0);

uint8_client_key(client_key);

client_key_destroy(client_key);
compressed_server_key_destroy(compressed_sks);
cuda_server_key_destroy(cuda_server_key);
}

return ok;
}

#else
#include <stdio.h>

int main(void) {
fputs("tfhe-rs was not compiled with gpu support\n", stdout);
return 0;
}
#endif
2 changes: 1 addition & 1 deletion tfhe/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ usize_is_size_t = true

[defines]
# "target_os = freebsd" = "DEFINE_FREEBSD"
# "feature = serde" = "DEFINE_SERDE"
"feature = gpu" = "WITH_FEATURE_GPU"


[export]
Expand Down
108 changes: 108 additions & 0 deletions tfhe/src/c_api/high_level_api/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,32 @@ pub struct PublicKey(pub(crate) crate::high_level_api::PublicKey);
pub struct CompactPublicKey(pub(crate) crate::high_level_api::CompactPublicKey);
pub struct CompressedCompactPublicKey(pub(crate) crate::high_level_api::CompressedCompactPublicKey);
pub struct ServerKey(pub(crate) crate::high_level_api::ServerKey);
/// Compressed version of the ServerKey
///
/// Allows to save storage space and transfer time.
/// Also, the CompressedServerKey is the key format that allows to select
/// the target hardware of the actual ServerKey when decompressing it.
pub struct CompressedServerKey(pub(crate) crate::high_level_api::CompressedServerKey);

/// ServerKey that lives on a Cuda GPU
#[cfg(feature = "gpu")]
pub struct CudaServerKey(pub(crate) crate::high_level_api::CudaServerKey);

impl_destroy_on_type!(ClientKey);
impl_destroy_on_type!(PublicKey);
impl_destroy_on_type!(CompactPublicKey);
impl_destroy_on_type!(CompressedCompactPublicKey);
impl_destroy_on_type!(ServerKey);
impl_destroy_on_type!(CompressedServerKey);
#[cfg(feature = "gpu")]
impl_destroy_on_type!(CudaServerKey);

impl_serialize_deserialize_on_type!(ClientKey);
impl_serialize_deserialize_on_type!(PublicKey);
impl_serialize_deserialize_on_type!(CompactPublicKey);
impl_serialize_deserialize_on_type!(CompressedCompactPublicKey);
impl_serialize_deserialize_on_type!(ServerKey);
impl_serialize_deserialize_on_type!(CompressedServerKey);

#[no_mangle]
pub unsafe extern "C" fn generate_keys(
Expand Down Expand Up @@ -51,13 +65,107 @@ pub unsafe extern "C" fn set_server_key(server_key: *const ServerKey) -> c_int {
})
}

/// Sets the cuda server key.
///
/// Once a cuda server key is set in a thread, all computations done in
/// that thread will actually happend on the Cuda GPU.
///
/// Does not take ownership of the key
#[cfg(feature = "gpu")]
#[no_mangle]
pub unsafe extern "C" fn set_cuda_server_key(server_key: *const CudaServerKey) -> c_int {
catch_panic(|| {
let server_key = get_ref_checked(server_key).unwrap();

let cloned = server_key.0.clone();
crate::high_level_api::set_server_key(cloned);
})
}

#[no_mangle]
pub unsafe extern "C" fn unset_server_key() -> c_int {
catch_panic(|| {
crate::high_level_api::unset_server_key();
})
}

/// Creates a new compressed server key
#[no_mangle]
pub unsafe extern "C" fn compressed_server_key_new(
client_key: *const ClientKey,
result_server_key: *mut *mut CompressedServerKey,
) -> c_int {
catch_panic(|| {
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
*result_server_key = std::ptr::null_mut();

let cks = get_ref_checked(client_key).unwrap();

let sks = crate::high_level_api::CompressedServerKey::new(&cks.0);

*result_server_key = Box::into_raw(Box::new(CompressedServerKey(sks)));
})
}

/// Decompresses the CompressedServerKey to a ServerKey that lives on CPU
#[no_mangle]
pub unsafe extern "C" fn compressed_server_key_decompress(
compressed_server_key: *const CompressedServerKey,
result_server_key: *mut *mut ServerKey,
) -> c_int {
catch_panic(|| {
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
*result_server_key = std::ptr::null_mut();

let c_sks = get_ref_checked(compressed_server_key).unwrap();

let sks = c_sks.0.clone().decompress();

*result_server_key = Box::into_raw(Box::new(ServerKey(sks)));
})
}

/// Decompresses the CompressedServerKey to a CudaServerKey that lives on GPU
#[cfg(feature = "gpu")]
#[no_mangle]
pub unsafe extern "C" fn compressed_server_key_decompress_to_gpu(
compressed_server_key: *const CompressedServerKey,
result_server_key: *mut *mut CudaServerKey,
) -> c_int {
catch_panic(|| {
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
*result_server_key = std::ptr::null_mut();

let c_sks = get_ref_checked(compressed_server_key).unwrap();

let sks = c_sks.0.decompress_to_gpu();

*result_server_key = Box::into_raw(Box::new(CudaServerKey(sks)));
})
}

/// Generates a client key with the given config
///
/// This function takes ownership of the config,
/// thus the given config pointer should not be used/freed after.
#[no_mangle]
pub unsafe extern "C" fn client_key_generate(
config: *mut super::config::Config,
result_client_key: *mut *mut ClientKey,
) -> c_int {
catch_panic(|| {
check_ptr_is_non_null_and_aligned(result_client_key).unwrap();

*result_client_key = std::ptr::null_mut();

let config = Box::from_raw(config);

let cks = crate::high_level_api::ClientKey::generate(config.0);

*result_client_key = Box::into_raw(Box::new(ClientKey(cks)));
})
}

#[no_mangle]
pub unsafe extern "C" fn public_key_new(
client_key: *const ClientKey,
Expand Down

0 comments on commit ece82c5

Please sign in to comment.