diff --git a/.github/workflows/aws_tfhe_gpu_tests.yml b/.github/workflows/aws_tfhe_gpu_tests.yml index 3fb6ef04f1..ba5c102298 100644 --- a/.github/workflows/aws_tfhe_gpu_tests.yml +++ b/.github/workflows/aws_tfhe_gpu_tests.yml @@ -94,6 +94,11 @@ jobs: run: | make test_user_doc_gpu + - name: Test C API + run: | + make test_c_api_gpu + + - name: Slack Notification if: ${{ always() }} continue-on-error: true diff --git a/Makefile b/Makefile index 933962aa0c..c95488a25b 100644 --- a/Makefile +++ b/Makefile @@ -307,6 +307,13 @@ build_c_api: install_rs_check_toolchain -p $(TFHE_SPEC) @"$(MAKE)" symlink_c_libs_without_fingerprint +.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer +build_c_api_gpu: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \ + --features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \ + -p $(TFHE_SPEC) + @"$(MAKE)" symlink_c_libs_without_fingerprint + .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT build_c_api_experimental_deterministic_fft: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \ @@ -410,6 +417,10 @@ test_c_api_c: build_c_api .PHONY: test_c_api # Run all the tests for the C API test_c_api: test_c_api_rs test_c_api_c +.PHONY: test_c_api_gpu # Run the C tests for the C API +test_c_api_gpu: build_c_api_gpu + ./scripts/c_api_tests.sh --gpu + .PHONY: test_shortint_ci # Run the tests for shortint ci test_shortint_ci: install_rs_build_toolchain install_cargo_nextest BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \ diff --git a/scripts/c_api_tests.sh b/scripts/c_api_tests.sh index e416974348..526254978c 100755 --- a/scripts/c_api_tests.sh +++ b/scripts/c_api_tests.sh @@ -7,11 +7,12 @@ function usage() { echo echo "--help Print this message" echo "--build-only Pass to only build the tests without running them" + echo "--gpu Enable GPU support" echo } BUILD_ONLY=0 - +WITH_FEATURE_GPU="OFF" while [ -n "$1" ] do case "$1" in @@ -24,6 +25,9 @@ do BUILD_ONLY=1 ;; + "--gpu" ) + WITH_FEATURE_GPU="ON" + ;; *) echo "Unknown param : $1" exit 1 @@ -40,7 +44,7 @@ mkdir -p "${TFHE_BUILD_DIR}" cd "${TFHE_BUILD_DIR}" -cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}" +cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}" -DWITH_FEATURE_GPU="${WITH_FEATURE_GPU}" make -j @@ -55,5 +59,8 @@ if [[ $(uname) == "Darwin" ]]; then nproc_bin="sysctl -n hw.logicalcpu" fi -# Let's go parallel -ARGS="-j$(${nproc_bin})" make test +if [ "${WITH_FEATURE_GPU}" == "ON" ]; then + ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --tests-regex ".*cuda.*" +else + ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --exclude-regex ".*cuda.*" +fi diff --git a/tfhe/build.rs b/tfhe/build.rs index b8dab7d9da..54deb47e58 100644 --- a/tfhe/build.rs +++ b/tfhe/build.rs @@ -55,6 +55,8 @@ fn gen_c_api() { "shortint", #[cfg(feature = "integer")] "integer", + #[cfg(feature = "gpu")] + "gpu", ]; let parse_expand_vec = if parse_expand_features_vec.is_empty() { diff --git a/tfhe/c_api_tests/CMakeLists.txt b/tfhe/c_api_tests/CMakeLists.txt index a82d487c7a..4b3df9d433 100644 --- a/tfhe/c_api_tests/CMakeLists.txt +++ b/tfhe/c_api_tests/CMakeLists.txt @@ -7,6 +7,8 @@ if(NOT CARGO_PROFILE) endif() set(TFHE_C_API_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/../../target/${CARGO_PROFILE}") +option(WITH_FEATURE_GPU "Enable if tfhe-rs C API was compiled with the 'gpu' feature activated" OFF) + include_directories(${TFHE_C_API_RELEASE}) # This one is to fetch the dynamic buffer header include_directories(${TFHE_C_API_RELEASE}/deps) @@ -22,6 +24,11 @@ if(APPLE) endif() endif() +if (WITH_FEATURE_GPU) + find_package(CUDAToolkit 10.0 REQUIRED) + find_package(OpenMP REQUIRED) +endif() + file(GLOB TEST_CASES test_*.c) foreach (testsourcefile ${TEST_CASES}) get_filename_component(testname ${testsourcefile} NAME_WLE) @@ -34,6 +41,12 @@ foreach (testsourcefile ${TEST_CASES}) ) target_include_directories(${testname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${testname} LINK_PUBLIC Tfhe TfheDynamicBuffer m pthread dl) + + if (WITH_FEATURE_GPU) + target_link_libraries(${testname} LINK_PUBLIC CUDA::cudart -lstdc++ OpenMP::OpenMP_CXX) + target_compile_definitions(${testname} PUBLIC -DWITH_FEATURE_GPU) + endif() + if(APPLE) target_link_libraries(${testname} LINK_PUBLIC ${SECURITY_FRAMEWORK}) endif() diff --git a/tfhe/c_api_tests/test_high_level_integers_cuda.c b/tfhe/c_api_tests/test_high_level_integers_cuda.c new file mode 100644 index 0000000000..92689f2234 --- /dev/null +++ b/tfhe/c_api_tests/test_high_level_integers_cuda.c @@ -0,0 +1,103 @@ +#if defined(WITH_FEATURE_GPU) +#include + +#include +#include +#include +#include + +int uint8_client_key(const ClientKey *client_key) { + int ok; + FheUint8 *lhs = NULL; + FheUint8 *rhs = NULL; + FheUint8 *result = NULL; + + uint8_t lhs_clear = 123; + uint8_t rhs_clear = 14; + + ok = fhe_uint8_try_encrypt_with_client_key_u8(lhs_clear, client_key, &lhs); + assert(ok == 0); + + ok = fhe_uint8_try_encrypt_with_client_key_u8(rhs_clear, client_key, &rhs); + assert(ok == 0); + + uint8_t clear; + + // Check addition + { + ok = fhe_uint8_add(lhs, rhs, &result); + assert(ok == 0); + + ok = fhe_uint8_decrypt(result, client_key, &clear); + assert(ok == 0); + + assert(clear == (lhs_clear + rhs_clear)); + } + + // Check sum + { + FheUint8 *sum_result; + const FheUint8 *data[2] = {lhs, rhs}; + ok = fhe_uint8_sum(data, 2, &sum_result); + assert(ok == 0); + + clear = 0; + ok = fhe_uint8_decrypt(result, client_key, &clear); + assert(ok == 0); + + assert(clear == (lhs_clear + rhs_clear)); + fhe_uint8_destroy(sum_result); + } + + fhe_uint8_destroy(lhs); + fhe_uint8_destroy(rhs); + fhe_uint8_destroy(result); + return ok; +} + + +int main(void) { + int ok = 0; + { + ConfigBuilder *builder; + Config *config; + + ok = config_builder_default(&builder); + assert(ok == 0); + ok = config_builder_build(builder, &config); + assert(ok == 0); + + ClientKey *client_key = NULL; + CompressedServerKey *compressed_sks = NULL; + CudaServerKey *cuda_server_key = NULL; + + ok = client_key_generate(config, &client_key); + assert(ok == 0); + + ok = compressed_server_key_new(client_key, &compressed_sks); + assert(ok == 0); + + ok = compressed_server_key_decompress_to_gpu(compressed_sks, &cuda_server_key); + assert(ok == 0); + + ok = set_cuda_server_key(cuda_server_key); + assert(ok == 0); + + uint8_client_key(client_key); + + client_key_destroy(client_key); + compressed_server_key_destroy(compressed_sks); + cuda_server_key_destroy(cuda_server_key); + } + + return ok; +} + +#else +#include + +int main(void) { + fputs("tfhe-rs was not compiled with gpu support\n", stdout); + return 0; +} +#endif diff --git a/tfhe/cbindgen.toml b/tfhe/cbindgen.toml index 033f17399c..1392b8f58a 100644 --- a/tfhe/cbindgen.toml +++ b/tfhe/cbindgen.toml @@ -45,7 +45,7 @@ usize_is_size_t = true [defines] # "target_os = freebsd" = "DEFINE_FREEBSD" -# "feature = serde" = "DEFINE_SERDE" +"feature = gpu" = "WITH_FEATURE_GPU" [export] diff --git a/tfhe/src/c_api/high_level_api/keys.rs b/tfhe/src/c_api/high_level_api/keys.rs index 1eaaac56c1..da04699a3c 100644 --- a/tfhe/src/c_api/high_level_api/keys.rs +++ b/tfhe/src/c_api/high_level_api/keys.rs @@ -6,18 +6,32 @@ pub struct PublicKey(pub(crate) crate::high_level_api::PublicKey); pub struct CompactPublicKey(pub(crate) crate::high_level_api::CompactPublicKey); pub struct CompressedCompactPublicKey(pub(crate) crate::high_level_api::CompressedCompactPublicKey); pub struct ServerKey(pub(crate) crate::high_level_api::ServerKey); +/// Compressed version of the ServerKey +/// +/// Allows to save storage space and transfer time. +/// Also, the CompressedServerKey is the key format that allows to select +/// the target hardware of the actual ServerKey when decompressing it. +pub struct CompressedServerKey(pub(crate) crate::high_level_api::CompressedServerKey); + +/// ServerKey that lives on a Cuda GPU +#[cfg(feature = "gpu")] +pub struct CudaServerKey(pub(crate) crate::high_level_api::CudaServerKey); impl_destroy_on_type!(ClientKey); impl_destroy_on_type!(PublicKey); impl_destroy_on_type!(CompactPublicKey); impl_destroy_on_type!(CompressedCompactPublicKey); impl_destroy_on_type!(ServerKey); +impl_destroy_on_type!(CompressedServerKey); +#[cfg(feature = "gpu")] +impl_destroy_on_type!(CudaServerKey); impl_serialize_deserialize_on_type!(ClientKey); impl_serialize_deserialize_on_type!(PublicKey); impl_serialize_deserialize_on_type!(CompactPublicKey); impl_serialize_deserialize_on_type!(CompressedCompactPublicKey); impl_serialize_deserialize_on_type!(ServerKey); +impl_serialize_deserialize_on_type!(CompressedServerKey); #[no_mangle] pub unsafe extern "C" fn generate_keys( @@ -51,6 +65,23 @@ pub unsafe extern "C" fn set_server_key(server_key: *const ServerKey) -> c_int { }) } +/// Sets the cuda server key. +/// +/// Once a cuda server key is set in a thread, all computations done in +/// that thread will actually happend on the Cuda GPU. +/// +/// Does not take ownership of the key +#[cfg(feature = "gpu")] +#[no_mangle] +pub unsafe extern "C" fn set_cuda_server_key(server_key: *const CudaServerKey) -> c_int { + catch_panic(|| { + let server_key = get_ref_checked(server_key).unwrap(); + + let cloned = server_key.0.clone(); + crate::high_level_api::set_server_key(cloned); + }) +} + #[no_mangle] pub unsafe extern "C" fn unset_server_key() -> c_int { catch_panic(|| { @@ -58,6 +89,83 @@ pub unsafe extern "C" fn unset_server_key() -> c_int { }) } +/// Creates a new compressed server key +#[no_mangle] +pub unsafe extern "C" fn compressed_server_key_new( + client_key: *const ClientKey, + result_server_key: *mut *mut CompressedServerKey, +) -> c_int { + catch_panic(|| { + check_ptr_is_non_null_and_aligned(result_server_key).unwrap(); + *result_server_key = std::ptr::null_mut(); + + let cks = get_ref_checked(client_key).unwrap(); + + let sks = crate::high_level_api::CompressedServerKey::new(&cks.0); + + *result_server_key = Box::into_raw(Box::new(CompressedServerKey(sks))); + }) +} + +/// Decompresses the CompressedServerKey to a ServerKey that lives on CPU +#[no_mangle] +pub unsafe extern "C" fn compressed_server_key_decompress( + compressed_server_key: *const CompressedServerKey, + result_server_key: *mut *mut ServerKey, +) -> c_int { + catch_panic(|| { + check_ptr_is_non_null_and_aligned(result_server_key).unwrap(); + *result_server_key = std::ptr::null_mut(); + + let c_sks = get_ref_checked(compressed_server_key).unwrap(); + + let sks = c_sks.0.clone().decompress(); + + *result_server_key = Box::into_raw(Box::new(ServerKey(sks))); + }) +} + +/// Decompresses the CompressedServerKey to a CudaServerKey that lives on GPU +#[cfg(feature = "gpu")] +#[no_mangle] +pub unsafe extern "C" fn compressed_server_key_decompress_to_gpu( + compressed_server_key: *const CompressedServerKey, + result_server_key: *mut *mut CudaServerKey, +) -> c_int { + catch_panic(|| { + check_ptr_is_non_null_and_aligned(result_server_key).unwrap(); + *result_server_key = std::ptr::null_mut(); + + let c_sks = get_ref_checked(compressed_server_key).unwrap(); + + let sks = c_sks.0.decompress_to_gpu(); + + *result_server_key = Box::into_raw(Box::new(CudaServerKey(sks))); + }) +} + +/// Generates a client key with the given config +/// +/// This function takes ownership of the config, +/// thus the given config pointer should not be used/freed after. +#[no_mangle] +pub unsafe extern "C" fn client_key_generate( + config: *mut super::config::Config, + result_client_key: *mut *mut ClientKey, +) -> c_int { + catch_panic(|| { + check_ptr_is_non_null_and_aligned(result_client_key).unwrap(); + + *result_client_key = std::ptr::null_mut(); + + let config = Box::from_raw(config); + + let cks = crate::high_level_api::ClientKey::generate(config.0); + + *result_client_key = Box::into_raw(Box::new(ClientKey(cks))); + }) +} + #[no_mangle] pub unsafe extern "C" fn public_key_new( client_key: *const ClientKey,