diff --git a/.github/workflows/aws_tfhe_gpu_tests.yml b/.github/workflows/aws_tfhe_gpu_tests.yml
index 3fb6ef04f1..ba5c102298 100644
--- a/.github/workflows/aws_tfhe_gpu_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_tests.yml
@@ -94,6 +94,11 @@ jobs:
         run: |
           make test_user_doc_gpu
 
+      - name: Test C API
+        run: |
+          make test_c_api_gpu
+
+
       - name: Slack Notification
         if: ${{ always() }}
         continue-on-error: true
diff --git a/Makefile b/Makefile
index 933962aa0c..c95488a25b 100644
--- a/Makefile
+++ b/Makefile
@@ -307,6 +307,13 @@ build_c_api: install_rs_check_toolchain
 		-p $(TFHE_SPEC)
 	@"$(MAKE)" symlink_c_libs_without_fingerprint
 
+.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
+build_c_api_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint
+
 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
@@ -410,6 +417,10 @@ test_c_api_c: build_c_api
 .PHONY: test_c_api # Run all the tests for the C API
 test_c_api: test_c_api_rs test_c_api_c
 
+.PHONY: test_c_api_gpu # Run the C tests for the C API
+test_c_api_gpu: build_c_api_gpu
+	./scripts/c_api_tests.sh --gpu
+
 .PHONY: test_shortint_ci # Run the tests for shortint ci
 test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
diff --git a/scripts/c_api_tests.sh b/scripts/c_api_tests.sh
index e416974348..526254978c 100755
--- a/scripts/c_api_tests.sh
+++ b/scripts/c_api_tests.sh
@@ -7,11 +7,12 @@ function usage() {
     echo
     echo "--help                    Print this message"
     echo "--build-only              Pass to only build the tests without running them"
+    echo "--gpu                     Enable GPU support"
     echo
 }
 
 BUILD_ONLY=0
-
+WITH_FEATURE_GPU="OFF"
 while [ -n "$1" ]
 do
    case "$1" in
@@ -24,6 +25,9 @@ do
             BUILD_ONLY=1
             ;;
 
+        "--gpu" )
+            WITH_FEATURE_GPU="ON"
+            ;;
         *)
             echo "Unknown param : $1"
             exit 1
@@ -40,7 +44,7 @@ mkdir -p "${TFHE_BUILD_DIR}"
 
 cd "${TFHE_BUILD_DIR}"
 
-cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}"
+cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}" -DWITH_FEATURE_GPU="${WITH_FEATURE_GPU}"
 
 make -j
 
@@ -55,5 +59,8 @@ if [[ $(uname) == "Darwin" ]]; then
     nproc_bin="sysctl -n hw.logicalcpu"
 fi
 
-# Let's go parallel
-ARGS="-j$(${nproc_bin})" make test
+if [ "${WITH_FEATURE_GPU}" == "ON" ]; then
+    ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --tests-regex ".*cuda.*"
+else
+    ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --exclude-regex ".*cuda.*"
+fi
diff --git a/tfhe/build.rs b/tfhe/build.rs
index b8dab7d9da..54deb47e58 100644
--- a/tfhe/build.rs
+++ b/tfhe/build.rs
@@ -55,6 +55,8 @@ fn gen_c_api() {
         "shortint",
         #[cfg(feature = "integer")]
         "integer",
+        #[cfg(feature = "gpu")]
+        "gpu",
     ];
 
     let parse_expand_vec = if parse_expand_features_vec.is_empty() {
diff --git a/tfhe/c_api_tests/CMakeLists.txt b/tfhe/c_api_tests/CMakeLists.txt
index a82d487c7a..4b3df9d433 100644
--- a/tfhe/c_api_tests/CMakeLists.txt
+++ b/tfhe/c_api_tests/CMakeLists.txt
@@ -7,6 +7,8 @@ if(NOT CARGO_PROFILE)
 endif()
 set(TFHE_C_API_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/../../target/${CARGO_PROFILE}")
 
+option(WITH_FEATURE_GPU "Enable if tfhe-rs C API was compiled with the 'gpu' feature activated" OFF)
+
 include_directories(${TFHE_C_API_RELEASE})
 # This one is to fetch the dynamic buffer header
 include_directories(${TFHE_C_API_RELEASE}/deps)
@@ -22,6 +24,11 @@ if(APPLE)
     endif()
 endif()
 
+if (WITH_FEATURE_GPU)
+    find_package(CUDAToolkit 10.0 REQUIRED)
+    find_package(OpenMP REQUIRED)
+endif()
+
 file(GLOB TEST_CASES test_*.c)
 foreach (testsourcefile ${TEST_CASES})
     get_filename_component(testname ${testsourcefile} NAME_WLE)
@@ -34,6 +41,12 @@ foreach (testsourcefile ${TEST_CASES})
     )
     target_include_directories(${testname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
     target_link_libraries(${testname} LINK_PUBLIC Tfhe TfheDynamicBuffer m pthread dl)
+
+    if (WITH_FEATURE_GPU)
+        target_link_libraries(${testname} LINK_PUBLIC CUDA::cudart -lstdc++ OpenMP::OpenMP_CXX)
+        target_compile_definitions(${testname} PUBLIC -DWITH_FEATURE_GPU)
+    endif()
+
     if(APPLE)
         target_link_libraries(${testname} LINK_PUBLIC ${SECURITY_FRAMEWORK})
     endif()
diff --git a/tfhe/c_api_tests/test_high_level_integers_cuda.c b/tfhe/c_api_tests/test_high_level_integers_cuda.c
new file mode 100644
index 0000000000..92689f2234
--- /dev/null
+++ b/tfhe/c_api_tests/test_high_level_integers_cuda.c
@@ -0,0 +1,103 @@
+#if defined(WITH_FEATURE_GPU)
+#include <tfhe.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdint.h>
+
+int uint8_client_key(const ClientKey *client_key) {
+  int ok;
+  FheUint8 *lhs = NULL;
+  FheUint8 *rhs = NULL;
+  FheUint8 *result = NULL;
+
+  uint8_t lhs_clear = 123;
+  uint8_t rhs_clear = 14;
+
+  ok = fhe_uint8_try_encrypt_with_client_key_u8(lhs_clear, client_key, &lhs);
+  assert(ok == 0);
+
+  ok = fhe_uint8_try_encrypt_with_client_key_u8(rhs_clear, client_key, &rhs);
+  assert(ok == 0);
+
+  uint8_t clear;
+
+  // Check addition
+  {
+    ok = fhe_uint8_add(lhs, rhs, &result);
+    assert(ok == 0);
+
+    ok = fhe_uint8_decrypt(result, client_key, &clear);
+    assert(ok == 0);
+
+    assert(clear == (lhs_clear + rhs_clear));
+  }
+
+  // Check sum
+  {
+    FheUint8 *sum_result;
+    const FheUint8 *data[2] = {lhs, rhs};
+    ok = fhe_uint8_sum(data, 2, &sum_result);
+    assert(ok == 0);
+
+    clear = 0;
+    ok = fhe_uint8_decrypt(result, client_key, &clear);
+    assert(ok == 0);
+
+    assert(clear == (lhs_clear + rhs_clear));
+    fhe_uint8_destroy(sum_result);
+  }
+
+  fhe_uint8_destroy(lhs);
+  fhe_uint8_destroy(rhs);
+  fhe_uint8_destroy(result);
+  return ok;
+}
+
+
+int main(void) {
+  int ok = 0;
+  {
+    ConfigBuilder *builder;
+    Config *config;
+
+    ok = config_builder_default(&builder);
+    assert(ok == 0);
+    ok = config_builder_build(builder, &config);
+    assert(ok == 0);
+
+    ClientKey *client_key = NULL;
+    CompressedServerKey *compressed_sks = NULL;
+    CudaServerKey *cuda_server_key = NULL;
+    
+    ok = client_key_generate(config, &client_key);
+    assert(ok == 0);
+    
+    ok = compressed_server_key_new(client_key, &compressed_sks);
+    assert(ok == 0);
+    
+    ok = compressed_server_key_decompress_to_gpu(compressed_sks, &cuda_server_key);
+    assert(ok == 0);
+
+    ok = set_cuda_server_key(cuda_server_key);
+    assert(ok == 0);
+    
+    uint8_client_key(client_key);
+    
+    client_key_destroy(client_key);
+    compressed_server_key_destroy(compressed_sks);
+    cuda_server_key_destroy(cuda_server_key);
+  }
+
+  return ok;
+}
+
+#else
+#include <stdio.h>
+
+int main(void) {
+  fputs("tfhe-rs was not compiled with gpu support\n", stdout);
+  return 0;
+}
+#endif
diff --git a/tfhe/cbindgen.toml b/tfhe/cbindgen.toml
index 033f17399c..1392b8f58a 100644
--- a/tfhe/cbindgen.toml
+++ b/tfhe/cbindgen.toml
@@ -45,7 +45,7 @@ usize_is_size_t = true
 
 [defines]
 # "target_os = freebsd" = "DEFINE_FREEBSD"
-# "feature = serde" = "DEFINE_SERDE"
+"feature = gpu" = "WITH_FEATURE_GPU"
 
 
 [export]
diff --git a/tfhe/src/c_api/high_level_api/keys.rs b/tfhe/src/c_api/high_level_api/keys.rs
index 1eaaac56c1..da04699a3c 100644
--- a/tfhe/src/c_api/high_level_api/keys.rs
+++ b/tfhe/src/c_api/high_level_api/keys.rs
@@ -6,18 +6,32 @@ pub struct PublicKey(pub(crate) crate::high_level_api::PublicKey);
 pub struct CompactPublicKey(pub(crate) crate::high_level_api::CompactPublicKey);
 pub struct CompressedCompactPublicKey(pub(crate) crate::high_level_api::CompressedCompactPublicKey);
 pub struct ServerKey(pub(crate) crate::high_level_api::ServerKey);
+/// Compressed version of the ServerKey
+///
+/// Allows to save storage space and transfer time.
+/// Also, the CompressedServerKey is the key format that allows to select
+/// the target hardware of the actual ServerKey when decompressing it.
+pub struct CompressedServerKey(pub(crate) crate::high_level_api::CompressedServerKey);
+
+/// ServerKey that lives on a Cuda GPU
+#[cfg(feature = "gpu")]
+pub struct CudaServerKey(pub(crate) crate::high_level_api::CudaServerKey);
 
 impl_destroy_on_type!(ClientKey);
 impl_destroy_on_type!(PublicKey);
 impl_destroy_on_type!(CompactPublicKey);
 impl_destroy_on_type!(CompressedCompactPublicKey);
 impl_destroy_on_type!(ServerKey);
+impl_destroy_on_type!(CompressedServerKey);
+#[cfg(feature = "gpu")]
+impl_destroy_on_type!(CudaServerKey);
 
 impl_serialize_deserialize_on_type!(ClientKey);
 impl_serialize_deserialize_on_type!(PublicKey);
 impl_serialize_deserialize_on_type!(CompactPublicKey);
 impl_serialize_deserialize_on_type!(CompressedCompactPublicKey);
 impl_serialize_deserialize_on_type!(ServerKey);
+impl_serialize_deserialize_on_type!(CompressedServerKey);
 
 #[no_mangle]
 pub unsafe extern "C" fn generate_keys(
@@ -51,6 +65,23 @@ pub unsafe extern "C" fn set_server_key(server_key: *const ServerKey) -> c_int {
     })
 }
 
+/// Sets the cuda server key.
+///
+/// Once a cuda server key is set in a thread, all computations done in
+/// that thread will actually happend on the Cuda GPU.
+///
+/// Does not take ownership of the key
+#[cfg(feature = "gpu")]
+#[no_mangle]
+pub unsafe extern "C" fn set_cuda_server_key(server_key: *const CudaServerKey) -> c_int {
+    catch_panic(|| {
+        let server_key = get_ref_checked(server_key).unwrap();
+
+        let cloned = server_key.0.clone();
+        crate::high_level_api::set_server_key(cloned);
+    })
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn unset_server_key() -> c_int {
     catch_panic(|| {
@@ -58,6 +89,83 @@ pub unsafe extern "C" fn unset_server_key() -> c_int {
     })
 }
 
+/// Creates a new compressed server key
+#[no_mangle]
+pub unsafe extern "C" fn compressed_server_key_new(
+    client_key: *const ClientKey,
+    result_server_key: *mut *mut CompressedServerKey,
+) -> c_int {
+    catch_panic(|| {
+        check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
+        *result_server_key = std::ptr::null_mut();
+
+        let cks = get_ref_checked(client_key).unwrap();
+
+        let sks = crate::high_level_api::CompressedServerKey::new(&cks.0);
+
+        *result_server_key = Box::into_raw(Box::new(CompressedServerKey(sks)));
+    })
+}
+
+/// Decompresses the CompressedServerKey to a ServerKey that lives on CPU
+#[no_mangle]
+pub unsafe extern "C" fn compressed_server_key_decompress(
+    compressed_server_key: *const CompressedServerKey,
+    result_server_key: *mut *mut ServerKey,
+) -> c_int {
+    catch_panic(|| {
+        check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
+        *result_server_key = std::ptr::null_mut();
+
+        let c_sks = get_ref_checked(compressed_server_key).unwrap();
+
+        let sks = c_sks.0.clone().decompress();
+
+        *result_server_key = Box::into_raw(Box::new(ServerKey(sks)));
+    })
+}
+
+/// Decompresses the CompressedServerKey to a CudaServerKey that lives on GPU
+#[cfg(feature = "gpu")]
+#[no_mangle]
+pub unsafe extern "C" fn compressed_server_key_decompress_to_gpu(
+    compressed_server_key: *const CompressedServerKey,
+    result_server_key: *mut *mut CudaServerKey,
+) -> c_int {
+    catch_panic(|| {
+        check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
+        *result_server_key = std::ptr::null_mut();
+
+        let c_sks = get_ref_checked(compressed_server_key).unwrap();
+
+        let sks = c_sks.0.decompress_to_gpu();
+
+        *result_server_key = Box::into_raw(Box::new(CudaServerKey(sks)));
+    })
+}
+
+/// Generates a client key with the given config
+///
+/// This function takes ownership of the config,
+/// thus the given config pointer should not be used/freed after.
+#[no_mangle]
+pub unsafe extern "C" fn client_key_generate(
+    config: *mut super::config::Config,
+    result_client_key: *mut *mut ClientKey,
+) -> c_int {
+    catch_panic(|| {
+        check_ptr_is_non_null_and_aligned(result_client_key).unwrap();
+
+        *result_client_key = std::ptr::null_mut();
+
+        let config = Box::from_raw(config);
+
+        let cks = crate::high_level_api::ClientKey::generate(config.0);
+
+        *result_client_key = Box::into_raw(Box::new(ClientKey(cks)));
+    })
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn public_key_new(
     client_key: *const ClientKey,