From b532543df4b52e7fbd4d87a8ee58c73b6f0d8001 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 8 Jan 2025 21:28:46 -0500
Subject: [PATCH 01/13] Bump jinja2 in /scripts/copy_from_upstream in the pip
 group (#2036)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps the pip group in /scripts/copy_from_upstream with 1 update: [jinja2](https://github.com/pallets/jinja).

Updates `jinja2` from 3.1.4 to 3.1.5
- [Release notes](https://github.com/pallets/jinja/releases)
- [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5)

---
updated-dependencies:
- dependency-name: jinja2
  dependency-type: direct:production
  dependency-group: pip
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Pablo Gutiérrez <pablogf@MSI.>
Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 scripts/copy_from_upstream/requirements.in  | 2 +-
 scripts/copy_from_upstream/requirements.txt | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/copy_from_upstream/requirements.in b/scripts/copy_from_upstream/requirements.in
index e303f900f2..67c1f28252 100644
--- a/scripts/copy_from_upstream/requirements.in
+++ b/scripts/copy_from_upstream/requirements.in
@@ -1,7 +1,7 @@
 attrs==20.3.0
 GitPython==3.1.41
 importlib-metadata==3.7.0
-Jinja2==3.1.4
+Jinja2==3.1.5
 markdown-it-py==2.2.0
 MarkupSafe==2.1.3
 mdit-py-plugins==0.3.4
diff --git a/scripts/copy_from_upstream/requirements.txt b/scripts/copy_from_upstream/requirements.txt
index e123830d25..888ef5d839 100644
--- a/scripts/copy_from_upstream/requirements.txt
+++ b/scripts/copy_from_upstream/requirements.txt
@@ -20,9 +20,9 @@ importlib-metadata==3.7.0 \
     --hash=sha256:24499ffde1b80be08284100393955842be4a59c7c16bbf2738aad0e464a8e0aa \
     --hash=sha256:c6af5dbf1126cd959c4a8d8efd61d4d3c83bddb0459a17e554284a077574b614
     # via -r requirements.in
-jinja2==3.1.4 \
-    --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \
-    --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
+jinja2==3.1.5 \
+    --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \
+    --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb
     # via -r requirements.in
 markdown-it-py==2.2.0 \
     --hash=sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30 \

From 9136dd75028d3c1c2f248e7b6ae2b966bb8be67c Mon Sep 17 00:00:00 2001
From: Daiki Ueno <dueno@redhat.com>
Date: Thu, 16 Jan 2025 05:55:51 +0900
Subject: [PATCH 02/13] Avoid unresolved symbols from libcrypto when compiled
 with OQS_DLOPEN_OPENSSL (#2043)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Do not assume OpenSSL memory functions when libcrypto is dlopened

Otherwise, when the OQS_DLOPEN_OPENSSL is defined but OpenSSL is
used only partially, e.g., with OQS_USE_SHA3_OPENSSL=ON, there will be
some unresolved symbols in the final artifact:

```
$ cmake -GNinja -DBUILD_SHARED_LIBS=ON -DOQS_USE_AES_OPENSSL=ON -DOQS_USE_AES_INSTRUCTIONS=OFF -DOQS_DIST_BUILD=ON -DOQS_USE_SHA3_OPENSSL=ON -DOQS_DLOPEN_OPENSSL=ON -DCMAKE_BUILD_TYPE=Debug -LAH ..
$ ninja
$ nm -g lib/liboqs.so.0.12.1-dev | grep '^[[:space:]]*U '
                 U __assert_fail@GLIBC_2.2.5
                 U CRYPTO_free
                 U CRYPTO_malloc
                 U dlopen@GLIBC_2.34
                 U dlsym@GLIBC_2.34
```

Signed-off-by: Daiki Ueno <dueno@redhat.com>

* Wrap OpenSSL memory functions with OSSL_FUNC

This enables those OpenSSL memory functions can be either resolved at
build time or at run-time through dlopen. Note that we use CRYPTO_*
functions instead of OPENSSL_* as the latter are defined as a macro
and cannot be dynamically resolved.

Signed-off-by: Daiki Ueno <dueno@redhat.com>

---------

Signed-off-by: Daiki Ueno <dueno@redhat.com>
Signed-off-by: Pablo Gutiérrez <pablogf@MSI.>
Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/common/common.c         | 35 ++++++++++++---
 src/common/common.h         | 86 +++++++++++++------------------------
 src/common/ossl_functions.h |  6 ++-
 src/common/ossl_helpers.h   |  1 +
 4 files changed, 66 insertions(+), 62 deletions(-)

diff --git a/src/common/common.c b/src/common/common.c
index 795f3f97c9..7f45e37b1b 100644
--- a/src/common/common.c
+++ b/src/common/common.c
@@ -299,8 +299,8 @@ OQS_API void OQS_MEM_secure_free(void *ptr, size_t len) {
 }
 
 OQS_API void OQS_MEM_insecure_free(void *ptr) {
-#if (defined(OQS_USE_OPENSSL) || defined(OQS_DLOPEN_OPENSSL)) && defined(OPENSSL_VERSION_NUMBER)
-	OPENSSL_free(ptr);
+#if defined(OQS_USE_OPENSSL) && defined(OPENSSL_VERSION_NUMBER)
+	OSSL_FUNC(CRYPTO_free)(ptr, OPENSSL_FILE, OPENSSL_LINE);
 #else
 	free(ptr); // IGNORE memory-check
 #endif
@@ -313,7 +313,7 @@ void *OQS_MEM_aligned_alloc(size_t alignment, size_t size) {
 		return NULL;
 	}
 	const size_t offset = alignment - 1 + sizeof(uint8_t);
-	uint8_t *buffer = OPENSSL_malloc(size + offset);
+	uint8_t *buffer = OSSL_FUNC(CRYPTO_malloc)(size + offset, OPENSSL_FILE, OPENSSL_LINE);
 	if (!buffer) {
 		return NULL;
 	}
@@ -321,7 +321,7 @@ void *OQS_MEM_aligned_alloc(size_t alignment, size_t size) {
 	ptrdiff_t diff = ptr - buffer;
 	if (diff > UINT8_MAX) {
 		// Free and return NULL if alignment is too large
-		OPENSSL_free(buffer);
+		OSSL_FUNC(CRYPTO_free)(buffer, OPENSSL_FILE, OPENSSL_LINE);
 		errno = EINVAL;
 		return NULL;
 	}
@@ -396,7 +396,7 @@ void OQS_MEM_aligned_free(void *ptr) {
 #if defined(OQS_USE_OPENSSL)
 	// Use OpenSSL's free function
 	uint8_t *u8ptr = ptr;
-	OPENSSL_free(u8ptr - u8ptr[-1]);
+	OSSL_FUNC(CRYPTO_free)(u8ptr - u8ptr[-1], OPENSSL_FILE, OPENSSL_LINE);
 #elif defined(OQS_HAVE_ALIGNED_ALLOC) || defined(OQS_HAVE_POSIX_MEMALIGN) || defined(OQS_HAVE_MEMALIGN)
 	free(ptr); // IGNORE memory-check
 #elif defined(__MINGW32__) || defined(__MINGW64__)
@@ -410,3 +410,28 @@ void OQS_MEM_aligned_free(void *ptr) {
 	free(u8ptr - u8ptr[-1]); // IGNORE memory-check
 #endif
 }
+
+OQS_API void *OQS_MEM_malloc(size_t size) {
+#if defined(OQS_USE_OPENSSL)
+	return OSSL_FUNC(CRYPTO_malloc)(size, OPENSSL_FILE, OPENSSL_LINE);
+#else
+	return malloc(size); // IGNORE memory-check
+#endif
+}
+
+OQS_API void *OQS_MEM_calloc(size_t num_elements, size_t element_size) {
+#if defined(OQS_USE_OPENSSL)
+	return OSSL_FUNC(CRYPTO_zalloc)(num_elements * element_size,
+	                                OPENSSL_FILE, OPENSSL_LINE);
+#else
+	return calloc(num_elements, element_size); // IGNORE memory-check
+#endif
+}
+
+OQS_API char *OQS_MEM_strdup(const char *str) {
+#if defined(OQS_USE_OPENSSL)
+	return OSSL_FUNC(CRYPTO_strdup)(str, OPENSSL_FILE, OPENSSL_LINE);
+#else
+	return strdup(str); // IGNORE memory-check
+#endif
+}
diff --git a/src/common/common.h b/src/common/common.h
index e264db7147..0dcf448970 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -19,62 +19,6 @@
 extern "C" {
 #endif
 
-/**
- * @brief Memory allocation and deallocation functions.
- *
- * These macros provide a unified interface for memory operations,
- * using OpenSSL functions when OQS_USE_OPENSSL is defined, and
- * standard C library functions otherwise.
- */
-#if (defined(OQS_USE_OPENSSL) || defined(OQS_DLOPEN_OPENSSL)) &&               \
-    defined(OPENSSL_VERSION_NUMBER)
-#include <openssl/crypto.h>
-
-/**
- * Allocates memory of a given size.
- * @param size The size of the memory to be allocated in bytes.
- * @return A pointer to the allocated memory.
- */
-#define OQS_MEM_malloc(size) OPENSSL_malloc(size)
-
-/**
- * Allocates memory for an array of elements of a given size.
- * @param num_elements The number of elements to allocate.
- * @param element_size The size of each element in bytes.
- * @return A pointer to the allocated memory.
- */
-#define OQS_MEM_calloc(num_elements, element_size)                             \
-  OPENSSL_zalloc((num_elements) * (element_size))
-/**
- * Duplicates a string.
- * @param str The string to be duplicated.
- * @return A pointer to the newly allocated string.
- */
-#define OQS_MEM_strdup(str) OPENSSL_strdup(str)
-#else
-/**
- * Allocates memory of a given size.
- * @param size The size of the memory to be allocated in bytes.
- * @return A pointer to the allocated memory.
- */
-#define OQS_MEM_malloc(size) malloc(size) // IGNORE memory-check
-
-/**
- * Allocates memory for an array of elements of a given size.
- * @param num_elements The number of elements to allocate.
- * @param element_size The size of each element in bytes.
- * @return A pointer to the allocated memory.
- */
-#define OQS_MEM_calloc(num_elements, element_size)                             \
-  calloc(num_elements, element_size)    // IGNORE memory-check
-/**
- * Duplicates a string.
- * @param str The string to be duplicated.
- * @return A pointer to the newly allocated string.
- */
-#define OQS_MEM_strdup(str) strdup(str) // IGNORE memory-check
-#endif
-
 /**
  * Macro for terminating the program if x is
  * a null pointer.
@@ -236,6 +180,36 @@ OQS_API void OQS_destroy(void);
  */
 OQS_API const char *OQS_version(void);
 
+/**
+ * @brief Memory allocation and deallocation functions.
+ *
+ * These functions provide a unified interface for memory operations,
+ * using OpenSSL functions when OQS_USE_OPENSSL is defined, and
+ * standard C library functions otherwise.
+ */
+
+/**
+ * Allocates memory of a given size.
+ * @param size The size of the memory to be allocated in bytes.
+ * @return A pointer to the allocated memory.
+ */
+OQS_API void *OQS_MEM_malloc(size_t size);
+
+/**
+ * Allocates memory for an array of elements of a given size.
+ * @param num_elements The number of elements to allocate.
+ * @param element_size The size of each element in bytes.
+ * @return A pointer to the allocated memory.
+ */
+OQS_API void *OQS_MEM_calloc(size_t num_elements, size_t element_size);
+
+/**
+ * Duplicates a string.
+ * @param str The string to be duplicated.
+ * @return A pointer to the newly allocated string.
+ */
+OQS_API char *OQS_MEM_strdup(const char *str);
+
 /**
  * Constant time comparison of byte sequences `a` and `b` of length `len`.
  * Returns 0 if the byte sequences are equal or if `len`=0.
diff --git a/src/common/ossl_functions.h b/src/common/ossl_functions.h
index 7e02898b3c..4779168c27 100644
--- a/src/common/ossl_functions.h
+++ b/src/common/ossl_functions.h
@@ -60,4 +60,8 @@ VOID_FUNC(void, OPENSSL_cleanse, (void *ptr, size_t len), (ptr, len))
 FUNC(int, RAND_bytes, (unsigned char *buf, int num), (buf, num))
 FUNC(int, RAND_poll, (void), ())
 FUNC(int, RAND_status, (void), ())
-VOID_FUNC(void, OPENSSL_thread_stop, (void), ())
\ No newline at end of file
+VOID_FUNC(void, OPENSSL_thread_stop, (void), ())
+FUNC(void *, CRYPTO_malloc, (size_t num, const char *file, int line), (num, file, line))
+FUNC(void *, CRYPTO_zalloc, (size_t num, const char *file, int line), (num, file, line))
+FUNC(char *, CRYPTO_strdup, (const char *str, const char *file, int line), (str, file, line))
+VOID_FUNC(void, CRYPTO_free, (void *ptr, const char *file, int line), (ptr, file, line))
diff --git a/src/common/ossl_helpers.h b/src/common/ossl_helpers.h
index 7587d80f36..1abccea738 100644
--- a/src/common/ossl_helpers.h
+++ b/src/common/ossl_helpers.h
@@ -6,6 +6,7 @@
 extern "C" {
 #endif
 
+#include <openssl/crypto.h>
 #include <openssl/err.h>
 #include <openssl/evp.h>
 #include <openssl/rand.h>

From 6cc9ef18c4e5154dbed6fe4b0681f6429c470aa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Tue, 21 Jan 2025 16:23:03 +0100
Subject: [PATCH 03/13] Added sig_stfl.h path to .Doxyfile INPUT setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 docs/.Doxyfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/.Doxyfile b/docs/.Doxyfile
index c29f2514dc..6fd9dcf2ab 100644
--- a/docs/.Doxyfile
+++ b/docs/.Doxyfile
@@ -957,6 +957,7 @@ INPUT                  = src/common/aes/aes_ops.h \
                          src/common/sha3/sha3x4_ops.h \
                          src/kem/kem.h \
                          src/sig/sig.h \
+                         src/sig_stfl.h \
                          README.md \
                          CONFIGURE.md \
                          CONTRIBUTORS

From 27e3cff107499e4351aa80373a1209afbf95867a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Mon, 27 Jan 2025 13:07:23 +0100
Subject: [PATCH 04/13] added sig_stfl path to .Doxyfile INPUT setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 docs/.Doxyfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/.Doxyfile b/docs/.Doxyfile
index 6fd9dcf2ab..a4e173ecf4 100644
--- a/docs/.Doxyfile
+++ b/docs/.Doxyfile
@@ -957,7 +957,7 @@ INPUT                  = src/common/aes/aes_ops.h \
                          src/common/sha3/sha3x4_ops.h \
                          src/kem/kem.h \
                          src/sig/sig.h \
-                         src/sig_stfl.h \
+                         src/sig_stfl/sig_stfl.h \
                          README.md \
                          CONFIGURE.md \
                          CONTRIBUTORS

From d504307308bbbbb2bd1240bd011ef6eec46ca534 Mon Sep 17 00:00:00 2001
From: Spencer Wilson <spencer.wilson@uwaterloo.ca>
Date: Sun, 26 Jan 2025 13:10:19 -0500
Subject: [PATCH 05/13] Update to public Ubuntu 24.04 ARM runner [full tests]
 (#2050)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Spencer Wilson <spencer.wilson@uwaterloo.ca>
Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 .github/actionlint.yaml     | 4 ++--
 .github/workflows/linux.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml
index 6ba90d6575..54de09a802 100644
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -1,12 +1,12 @@
 # Labels of self-hosted runner in array of strings.
 
-# NB. oqs-arm64 is not self-hosted but this configuration
+# NB. ubuntu-24.04-arm is not self-hosted but this configuration
 #     is required for liboqs to lint correctly with actionlint v1.7.1
 
 self-hosted-runner:
   # Labels of self-hosted runner in array of string
   labels:
-    - oqs-arm64
+    - ubuntu-24.04-arm
 # Configuration variables in array of strings defined in your repository or organization
 config-variables:
   # - DEFAULT_RUNNER
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index bb412d17c1..7705f034d6 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -13,7 +13,7 @@ jobs:
       matrix:
         include:
           - name: arm64
-            runner: oqs-arm64
+            runner: ubuntu-24.04-arm
             container: openquantumsafe/ci-ubuntu-latest:latest
             PYTEST_ARGS: --maxprocesses=10 --ignore=tests/test_kat_all.py
             CMAKE_ARGS: -DOQS_ENABLE_SIG_STFL_LMS=ON -DOQS_ENABLE_SIG_STFL_XMSS=ON -DOQS_HAZARDOUS_EXPERIMENTAL_ENABLE_SIG_STFL_KEY_SIG_GEN=ON

From d1698a3ea69f96910aa86fe7ec6d437d76f99492 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Mon, 27 Jan 2025 16:27:47 +0100
Subject: [PATCH 06/13] Added Doxygen comments of algorithm identifiers until
 XMSSMT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index 9c85846fc7..c4ae42a60b 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -43,44 +43,80 @@ extern "C"
 {
 #endif
 
-/* Algorithm identifier for XMSS-SHA2_10_256 */
+/** Algorithm identifier for XMSS-SHA2_10_256  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h10 "XMSS-SHA2_10_256"
+/** Algorithm identifier for XMSS-SHA2_16_256  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h16 "XMSS-SHA2_16_256"
+/** Algorithm identifier for XMSS-SHA2_20_256  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h20 "XMSS-SHA2_20_256"
+/** Algorithm identifier for XMSS-SHAKE_10_256  */
 #define OQS_SIG_STFL_alg_xmss_shake128_h10 "XMSS-SHAKE_10_256"
+/** Algorithm identifier for XMSS-SHAKE_16_256  */
 #define OQS_SIG_STFL_alg_xmss_shake128_h16 "XMSS-SHAKE_16_256"
+/** Algorithm identifier for XMSS-SHAKE_20_256  */
 #define OQS_SIG_STFL_alg_xmss_shake128_h20 "XMSS-SHAKE_20_256"
+/** Algorithm identifier for XMSS-SHA2_10_512  */
 #define OQS_SIG_STFL_alg_xmss_sha512_h10 "XMSS-SHA2_10_512"
+/** Algorithm identifier for XMSS-SHA2_16_512  */
 #define OQS_SIG_STFL_alg_xmss_sha512_h16 "XMSS-SHA2_16_512"
+/** Algorithm identifier for XMSS-SHA2_20_512  */
 #define OQS_SIG_STFL_alg_xmss_sha512_h20 "XMSS-SHA2_20_512"
+/** Algorithm identifier for XMSS-SHAKE_10_512  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h10 "XMSS-SHAKE_10_512"
+/** Algorithm identifier for XMSS-SHAKE_16_512  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h16 "XMSS-SHAKE_16_512"
+/** Algorithm identifier for XMSS-SHAKE_20_512  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h20 "XMSS-SHAKE_20_512"
+/** Algorithm identifier for XMSS-SHA2_10_192  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h10_192 "XMSS-SHA2_10_192"
+/** Algorithm identifier for XMSS-SHA2_16_192  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h16_192 "XMSS-SHA2_16_192"
+/** Algorithm identifier for XMSS-SHA2_20_192  */
 #define OQS_SIG_STFL_alg_xmss_sha256_h20_192 "XMSS-SHA2_20_192"
+/** Algorithm identifier for XMSS-SHAKE256_10_192  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h10_192 "XMSS-SHAKE256_10_192"
+/** Algorithm identifier for XMSS-SHAKE256_16_192  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h16_192 "XMSS-SHAKE256_16_192"
+/** Algorithm identifier for XMSS-SHAKE256_20_192  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h20_192 "XMSS-SHAKE256_20_192"
+/** Algorithm identifier for XMSS-SHAKE256_10_256  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h10_256 "XMSS-SHAKE256_10_256"
+/** Algorithm identifier for XMSS-SHAKE256_16_256  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h16_256 "XMSS-SHAKE256_16_256"
+/** Algorithm identifier for XMSS-SHAKE256_20_256  */
 #define OQS_SIG_STFL_alg_xmss_shake256_h20_256 "XMSS-SHAKE256_20_256"
 
+/** Algorithm identifier for XMSSMT-SHA2_20/2_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h20_2 "XMSSMT-SHA2_20/2_256"
+/** Algorithm identifier for XMSSMT-SHA2_20/4_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h20_4 "XMSSMT-SHA2_20/4_256"
+/** Algorithm identifier for XMSSMT-SHA2_40/2_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h40_2 "XMSSMT-SHA2_40/2_256"
+/** Algorithm identifier for XMSSMT-SHA2_40/4_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h40_4 "XMSSMT-SHA2_40/4_256"
+/** Algorithm identifier for XMSSMT-SHA2_40/8_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h40_8 "XMSSMT-SHA2_40/8_256"
+/** Algorithm identifier for XMSSMT-SHA2_60/3_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h60_3 "XMSSMT-SHA2_60/3_256"
+/** Algorithm identifier for XMSSMT-SHA2_60/6_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h60_6 "XMSSMT-SHA2_60/6_256"
+/** Algorithm identifier for XMSSMT-SHA2_60/12_256  */
 #define OQS_SIG_STFL_alg_xmssmt_sha256_h60_12 "XMSSMT-SHA2_60/12_256"
+/** Algorithm identifier for XMSSMT-SHAKE_20/2_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h20_2 "XMSSMT-SHAKE_20/2_256"
+/** Algorithm identifier for XMSSMT-SHAKE_20/4_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h20_4 "XMSSMT-SHAKE_20/4_256"
+/** Algorithm identifier for XMSSMT-SHAKE_40/2_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h40_2 "XMSSMT-SHAKE_40/2_256"
+/** Algorithm identifier for XMSSMT-SHAKE_40/4_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h40_4 "XMSSMT-SHAKE_40/4_256"
+/** Algorithm identifier for XMSSMT-SHAKE_40/8_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h40_8 "XMSSMT-SHAKE_40/8_256"
+/** Algorithm identifier for XMSSMT-SHAKE_60/3_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_3 "XMSSMT-SHAKE_60/3_256"
+/** Algorithm identifier for XMSSMT-SHAKE_60/6_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_6 "XMSSMT-SHAKE_60/6_256"
+/** Algorithm identifier for XMSSMT-SHAKE_60/12_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_12 "XMSSMT-SHAKE_60/12_256"
 
 /* Defined LMS parameter identifiers */

From e14998d77fae9e1794a888ec758b0aef3488838b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Mon, 27 Jan 2025 16:30:41 +0100
Subject: [PATCH 07/13] commit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index c4ae42a60b..4c78758de8 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -114,7 +114,7 @@ extern "C"
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h40_8 "XMSSMT-SHAKE_40/8_256"
 /** Algorithm identifier for XMSSMT-SHAKE_60/3_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_3 "XMSSMT-SHAKE_60/3_256"
-/** Algorithm identifier for XMSSMT-SHAKE_60/6_256  */
+/** Algorithm identifier for XMSSMT-SHAKE_60/6_256 */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_6 "XMSSMT-SHAKE_60/6_256"
 /** Algorithm identifier for XMSSMT-SHAKE_60/12_256  */
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_12 "XMSSMT-SHAKE_60/12_256"

From 6bb26035f4306124abbe6571af57067fb364bb03 Mon Sep 17 00:00:00 2001
From: Steven I Reeves <sreeves@nvidia.com>
Date: Mon, 27 Jan 2025 15:17:05 -0800
Subject: [PATCH 08/13] NVIDIA: Adding cuPQC as a backend for ML-KEM. (#2044)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Adding cuPQC as a backend for ML-KEM.

Signed-off-by: Steven Reeves <sreeves@nvidia.com>

* Fixing transposition error that left out OQS_USE_CUPQC in CMake system.

Signed-off-by: Steven Reeves <sreeves@nvidia.com>

* Add CMake dependent options for cupqc. Fixed formatting in kem_ml_kem_####.c and kem/family/kem_scheme.c

Signed-off-by: Steven Reeves <sreeves@nvidia.com>

* Move cupqc_ml-kem source files to correctly named dir

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Stop piggybacking on pqcrystals-kyber-standard and move cupqc_ml-kem metadata to separate upstream repo

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Update licensing information

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Update PLATFORMS.md

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Fix kem_family cmakelists template

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Run copy_from_upsream.py and pull updated upstream

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Add cupqc build test to basic.yml

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Move cupqc build test from basic.yml to linux.yml

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Fix error in linux.yml

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* fixup! Fix error in linux.yml

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Redo cupqc build check

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Supply default CUDA arch to cupqc-buildcheck configuration stage

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Specify CUDAXX in cupqc-buildcheck

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

* Make cuPQC_DIR explicit in cupqc-buildcheck

Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>

---------

Signed-off-by: Steven Reeves <sreeves@nvidia.com>
Signed-off-by: Pravek Sharma <sharmapravek@gmail.com>
Co-authored-by: Pravek Sharma <sharmapravek@gmail.com>
Signed-off-by: Pablo Gutiérrez <pablogf@MSI.>
Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 .CMake/alg_support.cmake                      |  18 ++
 .github/workflows/linux.yml                   |  13 ++
 CMakeLists.txt                                |  11 ++
 CONFIGURE.md                                  |   8 +
 PLATFORMS.md                                  |   1 +
 docs/algorithms/kem/ml_kem.md                 |   7 +
 docs/algorithms/kem/ml_kem.yml                |  34 ++++
 .../add_enable_by_alg_conditional.fragment    |  12 ++
 .../copy_from_upstream/copy_from_upstream.py  |  34 ++--
 .../copy_from_upstream/copy_from_upstream.yml |  12 ++
 .../src/kem/family/CMakeLists.txt             |   9 +
 .../src/kem/family/kem_scheme.c               |  28 ++-
 .../update_upstream_alg_docs.py               |  78 ++++----
 src/CMakeLists.txt                            |   5 +
 src/kem/ml_kem/CMakeLists.txt                 |  24 +++
 .../cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu    | 172 ++++++++++++++++++
 .../cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu     | 172 ++++++++++++++++++
 .../cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu     | 172 ++++++++++++++++++
 src/kem/ml_kem/kem_ml_kem_1024.c              |  17 ++
 src/kem/ml_kem/kem_ml_kem_512.c               |  17 ++
 src/kem/ml_kem/kem_ml_kem_768.c               |  17 ++
 src/oqsconfig.h.cmake                         |   5 +
 22 files changed, 805 insertions(+), 61 deletions(-)
 create mode 100644 src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu
 create mode 100644 src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu
 create mode 100644 src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu

diff --git a/.CMake/alg_support.cmake b/.CMake/alg_support.cmake
index 9afa6e4b15..96677676ed 100644
--- a/.CMake/alg_support.cmake
+++ b/.CMake/alg_support.cmake
@@ -338,18 +338,36 @@ if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCT
 endif()
 endif()
 
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_USE_CUPQC)
+    cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_cuda "" ON "OQS_ENABLE_KEM_ml_kem_512" OFF)
+endif()
+endif()
+
 if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
 if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
     cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
 endif()
 endif()
 
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_USE_CUPQC)
+    cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_cuda "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
+endif()
+endif()
+
 if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
 if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
     cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
 endif()
 endif()
 
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+if(OQS_USE_CUPQC)
+    cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_cuda "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
+endif()
+endif()
+
 
 if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
 if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 7705f034d6..025bd5406e 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -188,6 +188,19 @@ jobs:
                                                 --numprocesses=auto \
                                                 --ignore=tests/test_code_conventions.py ${{ matrix.PYTEST_ARGS }}"
 
+  cupqc-buildcheck:
+    name: Check that code builds with OQS_USE_CUPQC=ON
+    runs-on: ubuntu-latest
+    container: openquantumsafe/ci-ubuntu-latest:latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v4
+      - name: Configure
+        run: mkdir build && cd build && cuPQC_DIR=/cupqc/cupqc/cupqc-pkg-0.2.0/cmake/ CUDACXX=/usr/local/cuda-12.6/bin/nvcc cmake -GNinja -DOQS_USE_CUPQC=ON -DCMAKE_CUDA_ARCHITECTURES=80 .. && cmake -LA -N ..
+      - name: Build code
+        run: ninja
+        working-directory: build
+
   linux_cross_compile:
     runs-on: ubuntu-latest
     container: openquantumsafe/ci-ubuntu-latest:latest
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 114961ed7f..1b4c2b1af6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,7 @@ option(OQS_LIBJADE_BUILD "Enable formally verified implementation of supported a
 option(OQS_PERMIT_UNSUPPORTED_ARCHITECTURE "Permit compilation on an an unsupported architecture." OFF)
 option(OQS_STRICT_WARNINGS "Enable all compiler warnings." OFF)
 option(OQS_EMBEDDED_BUILD "Compile liboqs for an Embedded environment without a full standard library." OFF)
+option(OQS_USE_CUPQC "Utilize cuPQC as the backend for supported PQC algorithms." OFF)
 
 # Libfuzzer isn't supported on gcc
 if('${CMAKE_C_COMPILER_ID}' STREQUAL 'Clang')
@@ -140,6 +141,16 @@ else()
     message(FATAL_ERROR "Unknown or unsupported processor: " ${CMAKE_SYSTEM_PROCESSOR} ". Override by setting OQS_PERMIT_UNSUPPORTED_ARCHITECTURE=ON")
 endif()
 
+if(${OQS_USE_CUPQC})
+    # CMAKE's CUDA language requires CMAKE 3.18
+    cmake_minimum_required (VERSION 3.18)
+    enable_language(CUDA)
+    if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+      set(CMAKE_CUDA_ARCHITECTURES 80 90)
+    endif()
+    find_package(cuPQC 0.2.0 REQUIRED)
+endif()
+
 if (NOT ((CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") AND (ARCH_X86_64 STREQUAL "ON")) AND (OQS_LIBJADE_BUILD STREQUAL "ON"))
     message(FATAL_ERROR "Building liboqs with libjade implementations from libjade is only supported on Linux and Darwin on x86_64.")
 endif()
diff --git a/CONFIGURE.md b/CONFIGURE.md
index 1d00565f31..02a8da2015 100644
--- a/CONFIGURE.md
+++ b/CONFIGURE.md
@@ -13,6 +13,7 @@ The following options can be passed to CMake before the build file generation pr
 - [OQS_DIST_BUILD](#OQS_DIST_BUILD)
 - [OQS_USE_CPUFEATURE_INSTRUCTIONS](#OQS_USE_CPUFEATURE_INSTRUCTIONS)
 - [OQS_USE_OPENSSL](#OQS_USE_OPENSSL)
+- [OQS_USE_CUPQC](#OQS_USE_CUPQC)
 - [OQS_OPT_TARGET](#OQS_OPT_TARGET)
 - [OQS_SPEED_USE_ARM_PMU](#OQS_SPEED_USE_ARM_PMU)
 - [USE_SANITIZER](#USE_SANITIZER)
@@ -124,6 +125,13 @@ Dynamically load OpenSSL through `dlopen`. When using liboqs from other cryptogr
 
 Only has an effect if the system supports `dlopen` and ELF binary format, such as Linux or BSD family.
 
+### OQS_USE_CUPQC
+
+Can be `ON` or `OFF`.  When `ON`, use NVIDIA's cuPQC library where able (currently just ML-KEM).  When this option is enabled, liboqs may not run correctly on machines that lack supported GPUs. To download cuPQC follow the instructions at (https://developer.nvidia.com/cupqc-download/). Detailed descriptions of the API, requirements, and installation guide are in the cuPQC documentation (https://docs.nvidia.com/cuda/cupqc/index.html). While the code shipped by liboqs required to use cuPQC is licensed under Apache 2.0 the cuPQC SDK comes with its own license agreement (https://docs.nvidia.com/cuda/cupqc/license.html). 
+
+**Default**: `OFF`
+
+
 ## Stateful Hash Based Signatures 
 
 XMSS and LMS are the two supported Hash-Based Signatures schemes.
diff --git a/PLATFORMS.md b/PLATFORMS.md
index f1b3fc5ebd..544edd9319 100644
--- a/PLATFORMS.md
+++ b/PLATFORMS.md
@@ -63,3 +63,4 @@ In this policy, the words "must" and "must not" specify absolute requirements th
 - ppc641e for Ubuntu (Focal)
 - s390x for Ubuntu (Focal)
 - loongarch64 for Debian Linux (trixie)
+- NVIDIA GPU architectures 70, 75, 80, 86, 89, and 90 with a x86_64 CPU for Linux
diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md
index d1806517ba..eeaf299dde 100644
--- a/docs/algorithms/kem/ml_kem.md
+++ b/docs/algorithms/kem/ml_kem.md
@@ -9,6 +9,10 @@
 - **Primary Source**<a name="primary-source"></a>:
   - **Source**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
   - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
+- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
+  - **cupqc-cuda**:<a name="cupqc-cuda"></a>
+      - **Source**: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
+      - **Implementation license (SPDX-Identifier)**: Apache-2.0
 
 
 ## Parameter set summary
@@ -25,6 +29,7 @@
 |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
 | [Primary Source](#primary-source) | ref                      | All                         | All                             | None                    | True                               | True                                           | False                 |
 | [Primary Source](#primary-source) | avx2                     | x86\_64                     | Linux,Darwin                    | AVX2,BMI2,POPCNT        | True                               | True                                           | False                 |
+|     [cupqc-cuda](#cupqc-cuda)     | cuda                     | CUDA                        | Linux,Darwin                    | None                    | False                              | False                                          | False                 |
 
 Are implementations chosen based on runtime CPU feature detection? **Yes**.
 
@@ -36,6 +41,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
 |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
 | [Primary Source](#primary-source) | ref                      | All                         | All                             | None                    | True                               | True                                           | False                |
 | [Primary Source](#primary-source) | avx2                     | x86\_64                     | Linux,Darwin                    | AVX2,BMI2,POPCNT        | True                               | True                                           | False                |
+|     [cupqc-cuda](#cupqc-cuda)     | cuda                     | CUDA                        | Linux,Darwin                    | None                    | False                              | False                                          | False                |
 
 Are implementations chosen based on runtime CPU feature detection? **Yes**.
 
@@ -45,6 +51,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
 |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
 | [Primary Source](#primary-source) | ref                      | All                         | All                             | None                    | True                               | True                                           | False                |
 | [Primary Source](#primary-source) | avx2                     | x86\_64                     | Linux,Darwin                    | AVX2,BMI2,POPCNT        | True                               | True                                           | False                |
+|     [cupqc-cuda](#cupqc-cuda)     | cuda                     | CUDA                        | Linux,Darwin                    | None                    | False                              | False                                          | False                |
 
 Are implementations chosen based on runtime CPU feature detection? **Yes**.
 
diff --git a/docs/algorithms/kem/ml_kem.yml b/docs/algorithms/kem/ml_kem.yml
index 81ef2b6c4a..498617ff45 100644
--- a/docs/algorithms/kem/ml_kem.yml
+++ b/docs/algorithms/kem/ml_kem.yml
@@ -20,6 +20,10 @@ primary-upstream:
   source: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd
     with copy_from_upstream patches
   spdx-license-identifier: CC0-1.0 or Apache-2.0
+optimized-upstreams:
+  cupqc-cuda:
+    source: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
+    spdx-license-identifier: Apache-2.0
 parameter-sets:
 - name: ML-KEM-512
   claimed-nist-level: 1
@@ -54,6 +58,16 @@ parameter-sets:
     no-secret-dependent-branching-claimed: true
     no-secret-dependent-branching-checked-by-valgrind: true
     large-stack-usage: false
+  - upstream: cupqc-cuda
+    upstream-id: cuda
+    supported-platforms:
+    - architecture: CUDA
+      operating_systems:
+      - Linux
+      - Darwin
+    no-secret-dependent-branching-claimed: false
+    no-secret-dependent-branching-checked-by-valgrind: false
+    large-stack-usage: false
 - name: ML-KEM-768
   claimed-nist-level: 3
   claimed-security: IND-CCA2
@@ -87,6 +101,16 @@ parameter-sets:
     no-secret-dependent-branching-claimed: true
     no-secret-dependent-branching-checked-by-valgrind: true
     large-stack-usage: false
+  - upstream: cupqc-cuda
+    upstream-id: cuda
+    supported-platforms:
+    - architecture: CUDA
+      operating_systems:
+      - Linux
+      - Darwin
+    no-secret-dependent-branching-claimed: false
+    no-secret-dependent-branching-checked-by-valgrind: false
+    large-stack-usage: false
 - name: ML-KEM-1024
   claimed-nist-level: 5
   claimed-security: IND-CCA2
@@ -120,3 +144,13 @@ parameter-sets:
     no-secret-dependent-branching-claimed: true
     no-secret-dependent-branching-checked-by-valgrind: true
     large-stack-usage: false
+  - upstream: cupqc-cuda
+    upstream-id: cuda
+    supported-platforms:
+    - architecture: CUDA
+      operating_systems:
+      - Linux
+      - Darwin
+    no-secret-dependent-branching-claimed: false
+    no-secret-dependent-branching-checked-by-valgrind: false
+    large-stack-usage: false
diff --git a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg_conditional.fragment b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg_conditional.fragment
index daed5514c0..0830c024fd 100644
--- a/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg_conditional.fragment
+++ b/scripts/copy_from_upstream/.CMake/alg_support.cmake/add_enable_by_alg_conditional.fragment
@@ -11,6 +11,18 @@ if(OQS_DIST_X86_64_BUILD OR ({% for flag in platform['required_flags'] -%} OQS_U
 {%- endif %}
 endif()
 {% if platform['operating_systems'] %}endif()
+{% endif -%}
+            {%- endfor -%}
+            {%- for platform in impl['supported_platforms'] if platform['architecture'] == 'CUDA' %}
+{% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}")
+{% endif -%}
+if(OQS_USE_CUPQC)
+    cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name']  }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}" OFF)
+{%- if 'alias_scheme' in scheme %}
+    cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_{{ impl['name']  }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}" OFF)
+{%- endif %}
+endif()
+{% if platform['operating_systems'] %}endif()
 {% endif -%}
             {%- endfor -%}
             {%- for platform in impl['supported_platforms'] if platform['architecture'] == 'ARM64_V8' %}
diff --git a/scripts/copy_from_upstream/copy_from_upstream.py b/scripts/copy_from_upstream/copy_from_upstream.py
index 400ecc57a0..46968fa33c 100755
--- a/scripts/copy_from_upstream/copy_from_upstream.py
+++ b/scripts/copy_from_upstream/copy_from_upstream.py
@@ -495,14 +495,15 @@ def handle_implementation(impl, family, scheme, dst_basedir):
         else:
             # determine list of files to copy:
             if 'sources' in i:
-                srcs = i['sources'].split(" ")
-                for s in srcs:
-                    # Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
-                    if os.path.isfile(os.path.join(origfolder, s)):
-                        subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
-                    else:
-                        subprocess.run(
-                            ['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
+                if i['sources']:
+                    srcs = i['sources'].split(" ")
+                    for s in srcs:
+                        # Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
+                        if os.path.isfile(os.path.join(origfolder, s)):
+                            subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
+                        else:
+                            subprocess.run(
+                                ['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
             else:
                 subprocess.run(['cp', '-pr', os.path.join(origfolder, '.'), srcfolder])
                 # raise Exception("Malformed YML file: No sources listed to copy. Check upstream YML file." )
@@ -598,14 +599,15 @@ def process_families(instructions, basedir, with_kat, with_generator, with_libja
                             # when provided to the compiler; OQS uses the term ARM_NEON
                             if req['architecture'] == 'arm_8':
                                 req['architecture'] = 'ARM64_V8'
-                            if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
-                                req['required_flags'].remove('asimd')
-                                req['required_flags'].append('arm_neon')
-                            if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
-                                req['required_flags'].remove('sha3')
-                                req['required_flags'].append('arm_sha3')
-                            impl['required_flags'] = req['required_flags']
-                            family['all_required_flags'].update(req['required_flags'])
+                            if 'required_flags' in req:
+                                if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
+                                    req['required_flags'].remove('asimd')
+                                    req['required_flags'].append('arm_neon')
+                                if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
+                                    req['required_flags'].remove('sha3')
+                                    req['required_flags'].append('arm_sha3')
+                                impl['required_flags'] = req['required_flags']
+                                family['all_required_flags'].update(req['required_flags'])
                     except KeyError as ke:
                         if (impl['name'] != family['default_implementation']):
                             print("No required flags found for %s (KeyError %s on impl %s)" % (
diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml
index f80f0979d5..23d1f3a22d 100644
--- a/scripts/copy_from_upstream/copy_from_upstream.yml
+++ b/scripts/copy_from_upstream/copy_from_upstream.yml
@@ -38,6 +38,14 @@ upstreams:
     kem_meta_path: '{pretty_name_full}_META.yml'
     kem_scheme_path: '.'
     patches: [pqcrystals-ml_kem.patch]
+  - 
+    name: cupqc
+    git_url: https://github.com/praveksharma/cupqc-mlkem.git
+    git_branch: main
+    git_commit: b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
+    kem_meta_path: '{pretty_name_full}_META.yml'
+    kem_scheme_path: '.'
+    patches: []
   -
     name: pqcrystals-dilithium
     git_url: https://github.com/pq-crystals/dilithium.git
@@ -166,6 +174,10 @@ kems:
   -
     name: ml_kem
     default_implementation: ref
+    arch_specific_implementations:
+                                      cuda: cuda
+    arch_specific_upstream_locations:
+                                      cuda: cupqc
     upstream_location: pqcrystals-kyber-standard
     schemes:
       -
diff --git a/scripts/copy_from_upstream/src/kem/family/CMakeLists.txt b/scripts/copy_from_upstream/src/kem/family/CMakeLists.txt
index ca9d41eac0..bd648d101d 100644
--- a/scripts/copy_from_upstream/src/kem/family/CMakeLists.txt
+++ b/scripts/copy_from_upstream/src/kem/family/CMakeLists.txt
@@ -33,11 +33,19 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}{%- if 'alias_scheme' in
     target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PUBLIC {{ impl['compile_opts'] }})
            {%- endif -%}
 
+        {%- elif impl['name'] == 'cuda' %}
+
+if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
+    add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/cupqc_ml-kem.cu)
+    target_link_libraries({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} cupqc)
+    set_property(TARGET {{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PROPERTY CUDA_ARCHITECTURES OFF)
+    target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE {{ impl['compile_opts'] }})
         {%- else %}
 
 if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
     add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {% for source_file in impl['sources']|sort -%}{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/{{ source_file }}{%- if not loop.last %} {% endif -%}{%- endfor -%})
         {%- endif %}
+        {%- if impl['name'] != 'cuda' %}
     target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }})
     target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
         {%- if impl['name'] != scheme['default_implementation'] and impl['required_flags'] -%}
@@ -60,6 +68,7 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if
         target_compile_definitions({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE old_gas_syntax)
     endif()
     	{%- endif %}
+        {%- endif %}{# cupqc #}
     set(_{{ family|upper }}_OBJS ${_{{ family|upper }}_OBJS} $<TARGET_OBJECTS:{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}>)
 endif()
     {%- endfor -%}
diff --git a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
index 108078ffcd..630aee1389 100644
--- a/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
+++ b/scripts/copy_from_upstream/src/kem/family/kem_scheme.c
@@ -93,7 +93,9 @@ extern int {{ scheme['metadata']['default_dec_signature']  }}(uint8_t *ss, const
     {%- endfor %}
 
     {%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
-
+{% if impl['name'] == 'cuda'%}
+#if defined(OQS_USE_CUPQC)
+        {%- endif %}
 #if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
         {%- if impl['signature_keypair'] %}
 extern int {{ impl['signature_keypair'] }}(uint8_t *pk, uint8_t *sk);
@@ -113,6 +115,9 @@ extern int {{ impl['signature_dec'] }}(uint8_t *ss, const uint8_t *ct, const uin
 extern int PQCLEAN_{{ scheme['pqclean_scheme_c']|upper }}_{{ impl['name']|upper }}_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
         {%- endif %}
 #endif
+        {%- if impl['name'] == 'cuda'%}
+#endif /* OQS_USE_CUPQC */
+        {%- endif %}
     {%- endfor %}
 
 {%- if libjade_implementation is defined and scheme['libjade_implementation'] %}
@@ -166,7 +171,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *
 {% endfor -%}
 #else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
 {%- endif %}
-    {%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
+    {%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
+	return (OQS_STATUS) {{ impl['signature_keypair'] }}(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
+    {%- endfor %}
+    {%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
     {%- if loop.first %}
 #if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
     {%- else %}
@@ -240,7 +250,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps(uint8_t *c
 {% endfor -%}
 #else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
 {%- endif %}
-    {%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
+    {%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
+	return (OQS_STATUS) {{ impl['signature_enc'] }}(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
+    {%- endfor %}
+    {%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
     {%- if loop.first %}
 #if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
     {%- else %}
@@ -314,7 +329,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps(uint8_t *s
 {% endfor -%}
 #else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
 {%- endif %}
-    {%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
+    {%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
+	return (OQS_STATUS) {{ impl['signature_dec'] }}(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
+    {%- endfor %}
+    {%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
     {%- if loop.first %}
 #if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
     {%- else %}
diff --git a/scripts/copy_from_upstream/update_upstream_alg_docs.py b/scripts/copy_from_upstream/update_upstream_alg_docs.py
index 33483067e2..ba765b84c1 100755
--- a/scripts/copy_from_upstream/update_upstream_alg_docs.py
+++ b/scripts/copy_from_upstream/update_upstream_alg_docs.py
@@ -95,8 +95,43 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
             oqs_yaml_path = os.path.join(liboqs_root, 'docs', 'algorithms', 'kem', '{}.yml'.format(kem['name']))
             if os.path.isfile(oqs_yaml_path):
                oqs_yaml = load_yaml(oqs_yaml_path)
+
+            upstream_base_url = ui['git_url'][:-len(".git")]
+            # upstream is special: We will take the upstream git commit information 
+            # (possibly with added patch comment) as it is what drove the update
+
+            # Need to check if yml is of old format. If so, update to new format
+            if 'primary-upstream' not in oqs_yaml:
+                print("Updating format of {}. Please double check ordering of yaml file".format(scheme['pretty_name_full']))
+                lhs = oqs_yaml['upstream']
+                oqs_yaml['primary-upstream'] = dict()
+                oqs_yaml['primary-upstream']['spdx-license-identifier'] = oqs_yaml['spdx-license-identifier']
+                for i in range(len(oqs_yaml['parameter-sets'])):
+                    for j in range(len(oqs_yaml['parameter-sets'][i]['implementations'])):
+                        oqs_yaml['parameter-sets'][i]['implementations'][j]['upstream'] = 'primary-upstream'
             else:
-                continue
+                lhs = oqs_yaml['primary-upstream']['source']
+            oqs_yaml['primary-upstream']['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(upstream_base_url, ui['git_commit']), "primary-upstream")
+            if 'upstream' in oqs_yaml:
+                del oqs_yaml['upstream']
+                del oqs_yaml['spdx-license-identifier']
+            
+            if ouis:
+                for upstream in ouis:
+                    optimized_upstream_base_url = ouis[upstream]['git_url'][:-len(".git")]
+                    optimized_patches_done=""
+                    if 'patches' in ouis[upstream]:
+                        for patchfilename in ouis[upstream]['patches']:
+                            if kem['name'] in patchfilename:
+                                optimized_patches_done=" with copy_from_upstream patches"
+                    if 'optimized-upstreams' in oqs_yaml and upstream in oqs_yaml['optimized-upstreams']:
+                        lhs = oqs_yaml['optimized-upstreams'][upstream]['source']
+                    else:
+                        lhs = ''
+                        oqs_yaml['optimized-upstreams'] = oqs_yaml.get('optimized-upstreams', dict())
+                        oqs_yaml['optimized-upstreams'][upstream] = oqs_yaml['optimized-upstreams'].get(upstream, dict())
+                    git_commit = ouis[upstream]['git_commit']
+                    oqs_yaml['optimized-upstreams'][upstream]['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+optimized_patches_done).format(optimized_upstream_base_url, git_commit), "optimized-upstreams")
 
             # We cannot assume that the ordering of "parameter-sets"
             # in the OQS YAML files matches that of copy_from_upstream.yml
@@ -111,45 +146,6 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
                 oqs_yaml['type'] = rhs_if_not_equal(oqs_yaml['type'], upstream_yaml['type'], "type")
                 oqs_yaml['principal-submitters'] = rhs_if_not_equal(oqs_yaml['principal-submitters'], upstream_yaml['principal-submitters'], "principal-submitters")
 
-                upstream_base_url = ui['git_url'][:-len(".git")]
-                # upstream is special: We will take the upstream git commit information 
-                # (possibly with added patch comment) as it is what drove the update
-
-                # Need to check if yml is of old format. If so, update to new format
-                if 'primary-upstream' not in oqs_yaml:
-                    print("Updating format of {}. Please double check ordering of yaml file".format(scheme['pretty_name_full']))
-                    lhs = oqs_yaml['upstream']
-                    oqs_yaml['primary-upstream'] = dict()
-                    oqs_yaml['primary-upstream']['spdx-license-identifier'] = oqs_yaml['spdx-license-identifier']
-                    for i in range(len(oqs_yaml['parameter-sets'])):
-                        for j in range(len(oqs_yaml['parameter-sets'][i]['implementations'])):
-                            oqs_yaml['parameter-sets'][i]['implementations'][j]['upstream'] = 'primary-upstream'
-                else:
-                    lhs = oqs_yaml['primary-upstream']['source']
-                oqs_yaml['primary-upstream']['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(upstream_base_url, ui['git_commit']), "primary-upstream")
-                if 'upstream' in oqs_yaml:
-                    del oqs_yaml['upstream']
-                    del oqs_yaml['spdx-license-identifier']
-
-                if ouis:
-                    for upstream in ouis:
-                        optimized_upstream_base_url = ouis[upstream]['git_url'][:-len(".git")]
-                        for patchfilename in ouis[upstream]['patches']:
-                            if kem['name'] in patchfilename:
-                                patches_done=" with copy_from_upstream patches"
-                        patches_done=""
-                        if 'patches' in ouis[upstream]:
-                            for patchfilename in ouis[upstream]['patches']:
-                                if kem['name'] in patchfilename:
-                                    patches_done=" with copy_from_upstream patches"
-                        if 'optimized-upstreams' in oqs_yaml and upstream in oqs_yaml['optimized-upstreams']:
-                            lhs = oqs_yaml['optimized-upstreams'][upstream]['source']
-                        else:
-                            lhs = ''
-                        git_commit = ouis[upstream]['git_commit']
-                        oqs_yaml['optimized-upstreams'][upstream]['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(optimized_upstream_base_url, git_commit), "optimized-upstreams")
-
-
                 if 'auxiliary-submitters' in upstream_yaml:
                         oqs_yaml['auxiliary-submitters'] = rhs_if_not_equal(oqs_yaml['auxiliary-submitters'] if 'auxiliary-submitters' in oqs_yaml else '', upstream_yaml['auxiliary-submitters'], "auxiliary-submitters")
 
@@ -204,7 +200,7 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
                                     upstream_impl['supported_platforms'][i]['architecture'] = 'ARM64_V8'
                                     if 'asimd' in upstream_impl['supported_platforms'][i]['required_flags']:
                                         upstream_impl['supported_platforms'][i]['required_flags'].remove('asimd')
-                                if not upstream_impl['supported_platforms'][i]['required_flags']:
+                                if 'required_flags' in upstream_impl['supported_platforms'][i] and not upstream_impl['supported_platforms'][i]['required_flags']:
                                     del upstream_impl['supported_platforms'][i]['required_flags']
 
                             impl['supported-platforms'] = rhs_if_not_equal(impl['supported-platforms'], upstream_impl['supported_platforms'], "supported-platforms")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a6bca7d998..8f0ac14b6b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -99,6 +99,11 @@ if(${OQS_USE_OPENSSL})
     target_link_libraries(oqs-internal PRIVATE ${OPENSSL_CRYPTO_LIBRARY})
   endif()
 endif()
+if(${OQS_USE_CUPQC})
+    set_property(TARGET oqs PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    target_link_libraries(oqs PRIVATE cupqc)
+    target_link_options(oqs PRIVATE $<DEVICE_LINK: -dlto>)
+endif()
 
 target_include_directories(oqs
                            PUBLIC
diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt
index 14cc9b850d..8af79b6d05 100644
--- a/src/kem/ml_kem/CMakeLists.txt
+++ b/src/kem/ml_kem/CMakeLists.txt
@@ -23,6 +23,14 @@ if(OQS_ENABLE_KEM_ml_kem_512_avx2)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_avx2>)
 endif()
 
+if(OQS_ENABLE_KEM_ml_kem_512_cuda)
+    add_library(ml_kem_512_cuda OBJECT cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu)
+    target_link_libraries(ml_kem_512_cuda cupqc)
+    set_property(TARGET ml_kem_512_cuda PROPERTY CUDA_ARCHITECTURES OFF)
+    target_compile_options(ml_kem_512_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
+    set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_cuda>)
+endif()
+
 if(OQS_ENABLE_KEM_ml_kem_768)
     add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-768_ref/kem.c pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c pqcrystals-kyber-standard_ml-kem-768_ref/poly.c pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768_ref/verify.c)
     target_compile_options(ml_kem_768_ref PUBLIC -DKYBER_K=3)
@@ -41,6 +49,14 @@ if(OQS_ENABLE_KEM_ml_kem_768_avx2)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_avx2>)
 endif()
 
+if(OQS_ENABLE_KEM_ml_kem_768_cuda)
+    add_library(ml_kem_768_cuda OBJECT cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu)
+    target_link_libraries(ml_kem_768_cuda cupqc)
+    set_property(TARGET ml_kem_768_cuda PROPERTY CUDA_ARCHITECTURES OFF)
+    target_compile_options(ml_kem_768_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
+    set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_cuda>)
+endif()
+
 if(OQS_ENABLE_KEM_ml_kem_1024)
     add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c)
     target_compile_options(ml_kem_1024_ref PUBLIC -DKYBER_K=4)
@@ -59,4 +75,12 @@ if(OQS_ENABLE_KEM_ml_kem_1024_avx2)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_avx2>)
 endif()
 
+if(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+    add_library(ml_kem_1024_cuda OBJECT cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu)
+    target_link_libraries(ml_kem_1024_cuda cupqc)
+    set_property(TARGET ml_kem_1024_cuda PROPERTY CUDA_ARCHITECTURES OFF)
+    target_compile_options(ml_kem_1024_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
+    set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_cuda>)
+endif()
+
 set(ML_KEM_OBJS ${_ML_KEM_OBJS} PARENT_SCOPE)
diff --git a/src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu b/src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu
new file mode 100644
index 0000000000..188e2f100d
--- /dev/null
+++ b/src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2025 Nvidia Corporation
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+**/
+
+#include <cupqc.hpp>
+#include <stdexcept>
+#include <oqs/oqsconfig.h>
+
+using namespace cupqc;
+
+// Checks the return value from a CUDA API function
+#define CUDA_CHECK(err) \
+    if (err != cudaSuccess) { failure = true; goto cleanup; }
+
+template<class MLKEM_Keygen>
+__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
+    MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int keypair(uint8_t *pk, uint8_t *sk) {
+    using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_pk = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Keygen>(1);
+        randombytes = get_entropy<MLKEM_Keygen>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
+
+    // Run routine
+    keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Encaps>
+__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
+    MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+    using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Encaps>(1);
+        randombytes = get_entropy<MLKEM_Encaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
+
+    // Run routine
+    encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Decaps>
+__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
+    __shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
+    MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+    using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace = make_workspace<MLKEM_Decaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
+
+    // Run routine
+    decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+
+    return failure ? -1 : 0;
+}
+
+extern "C" {
+    using KEM_1024 = decltype(ML_KEM_1024() + Block());
+
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+    int cupqc_ml_kem_1024_keypair(uint8_t *pk, uint8_t *sk) {
+        return keypair<KEM_1024>(pk, sk);
+    }
+    int cupqc_ml_kem_1024_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+        return encaps<KEM_1024>(ct, ss, pk);
+    }
+    int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+        return decaps<KEM_1024>(ss, ct, sk);
+    }
+#endif
+}
diff --git a/src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu b/src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu
new file mode 100644
index 0000000000..48ca5ca31a
--- /dev/null
+++ b/src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2025 Nvidia Corporation
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+**/
+
+#include <cupqc.hpp>
+#include <stdexcept>
+#include <oqs/oqsconfig.h>
+
+using namespace cupqc;
+
+// Checks the return value from a CUDA API function
+#define CUDA_CHECK(err) \
+    if (err != cudaSuccess) { failure = true; goto cleanup; }
+
+template<class MLKEM_Keygen>
+__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
+    MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int keypair(uint8_t *pk, uint8_t *sk) {
+    using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_pk = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Keygen>(1);
+        randombytes = get_entropy<MLKEM_Keygen>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
+
+    // Run routine
+    keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Encaps>
+__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
+    MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+    using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Encaps>(1);
+        randombytes = get_entropy<MLKEM_Encaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
+
+    // Run routine
+    encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Decaps>
+__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
+    __shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
+    MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+    using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace = make_workspace<MLKEM_Decaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
+
+    // Run routine
+    decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+
+    return failure ? -1 : 0;
+}
+
+extern "C" {
+    using KEM_512  = decltype(ML_KEM_512()  + Block());
+
+#if defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+    int cupqc_ml_kem_512_keypair(uint8_t *pk, uint8_t *sk) {
+        return keypair<KEM_512>(pk, sk);
+    }
+    int cupqc_ml_kem_512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+        return encaps<KEM_512>(ct, ss, pk);
+    }
+    int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+        return decaps<KEM_512>(ss, ct, sk);
+    }
+#endif
+}
diff --git a/src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu b/src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu
new file mode 100644
index 0000000000..594c1f4c24
--- /dev/null
+++ b/src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2025 Nvidia Corporation
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+**/
+
+#include <cupqc.hpp>
+#include <stdexcept>
+#include <oqs/oqsconfig.h>
+
+using namespace cupqc;
+
+// Checks the return value from a CUDA API function
+#define CUDA_CHECK(err) \
+    if (err != cudaSuccess) { failure = true; goto cleanup; }
+
+template<class MLKEM_Keygen>
+__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
+    MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int keypair(uint8_t *pk, uint8_t *sk) {
+    using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_pk = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Keygen>(1);
+        randombytes = get_entropy<MLKEM_Keygen>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
+
+    // Run routine
+    keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Encaps>
+__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
+    __shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
+    MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+    using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr, *randombytes=nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace   = make_workspace<MLKEM_Encaps>(1);
+        randombytes = get_entropy<MLKEM_Encaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
+
+    // Run routine
+    encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_pk != nullptr) cudaFree(d_pk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+    if (randombytes != nullptr) release_entropy(randombytes);
+
+    return failure ? -1 : 0;
+}
+
+template<class MLKEM_Decaps>
+__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
+    __shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
+    MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
+}
+
+template<class MLKEM_Base>
+int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+    using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
+
+    bool failure = false;
+    uint8_t *workspace = nullptr;
+    uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
+
+    // Allocate device workspaces
+    try {
+        workspace = make_workspace<MLKEM_Decaps>(1);
+    } catch (const std::runtime_error& ex) {
+        failure = true;
+        goto cleanup;
+    }
+    CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
+    CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
+
+    // Copy data to GPU
+    CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
+    CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
+
+    // Run routine
+    decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
+
+    // Copy data back to the host
+    CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
+
+cleanup:
+    // Free device memory
+    if (d_ct != nullptr) cudaFree(d_ct);
+    if (d_ss != nullptr) cudaFree(d_ss);
+    if (d_sk != nullptr) cudaFree(d_sk);
+    if (workspace != nullptr) destroy_workspace(workspace);
+
+    return failure ? -1 : 0;
+}
+
+extern "C" {
+    using KEM_768  = decltype(ML_KEM_768()  + Block());
+
+#if defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+    int cupqc_ml_kem_768_keypair(uint8_t *pk, uint8_t *sk) {
+        return keypair<KEM_768>(pk, sk);
+    }
+    int cupqc_ml_kem_768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+        return encaps<KEM_768>(ct, ss, pk);
+    }
+    int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+        return decaps<KEM_768>(ss, ct, sk);
+    }
+#endif
+}
diff --git a/src/kem/ml_kem/kem_ml_kem_1024.c b/src/kem/ml_kem/kem_ml_kem_1024.c
index bc533aef9e..1e471af58a 100644
--- a/src/kem/ml_kem/kem_ml_kem_1024.c
+++ b/src/kem/ml_kem/kem_ml_kem_1024.c
@@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8
 extern int pqcrystals_ml_kem_1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
 #endif
 
+#if defined(OQS_USE_CUPQC)
+#if defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+extern int cupqc_ml_kem_1024_keypair(uint8_t *pk, uint8_t *sk);
+extern int cupqc_ml_kem_1024_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+#endif /* OQS_USE_CUPQC */
+
 OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_1024_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *sec
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_1024_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shar
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_1024_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
diff --git a/src/kem/ml_kem/kem_ml_kem_512.c b/src/kem/ml_kem/kem_ml_kem_512.c
index f2dcde53d2..41805f91be 100644
--- a/src/kem/ml_kem/kem_ml_kem_512.c
+++ b/src/kem/ml_kem/kem_ml_kem_512.c
@@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_
 extern int pqcrystals_ml_kem_512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
 #endif
 
+#if defined(OQS_USE_CUPQC)
+#if defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+extern int cupqc_ml_kem_512_keypair(uint8_t *pk, uint8_t *sk);
+extern int cupqc_ml_kem_512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+#endif /* OQS_USE_CUPQC */
+
 OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_512_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secr
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_512_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *share
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_512_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
diff --git a/src/kem/ml_kem/kem_ml_kem_768.c b/src/kem/ml_kem/kem_ml_kem_768.c
index 14eb6ba404..11a7421b20 100644
--- a/src/kem/ml_kem/kem_ml_kem_768.c
+++ b/src/kem/ml_kem/kem_ml_kem_768.c
@@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_
 extern int pqcrystals_ml_kem_768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
 #endif
 
+#if defined(OQS_USE_CUPQC)
+#if defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+extern int cupqc_ml_kem_768_keypair(uint8_t *pk, uint8_t *sk);
+extern int cupqc_ml_kem_768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+extern int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+#endif
+#endif /* OQS_USE_CUPQC */
+
 OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_768_keypair(public_key, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secr
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_768_enc(ciphertext, shared_secret, public_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
@@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *share
 }
 
 OQS_API OQS_STATUS OQS_KEM_ml_kem_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
+#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
+	return (OQS_STATUS) cupqc_ml_kem_768_dec(shared_secret, ciphertext, secret_key);
+#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
 #if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
 #if defined(OQS_DIST_BUILD)
 	if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake
index 967c35e64e..eb21d7b003 100644
--- a/src/oqsconfig.h.cmake
+++ b/src/oqsconfig.h.cmake
@@ -69,6 +69,8 @@
 
 #cmakedefine OQS_ENABLE_SHA3_xkcp_low_avx2 1
 
+#cmakedefine01 OQS_USE_CUPQC
+
 #cmakedefine OQS_ENABLE_KEM_BIKE 1
 #cmakedefine OQS_ENABLE_KEM_bike_l1 1
 #cmakedefine OQS_ENABLE_KEM_bike_l3 1
@@ -129,10 +131,13 @@
 #cmakedefine OQS_ENABLE_KEM_ML_KEM 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_512 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_512_avx2 1
+#cmakedefine OQS_ENABLE_KEM_ml_kem_512_cuda 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_768 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_768_avx2 1
+#cmakedefine OQS_ENABLE_KEM_ml_kem_768_cuda 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1
 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024_avx2 1
+#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_cuda 1
 
 #cmakedefine OQS_ENABLE_SIG_DILITHIUM 1
 #cmakedefine OQS_ENABLE_SIG_dilithium_2 1

From 1128006fe085753261986d58753d65a4fb91932d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Tue, 28 Jan 2025 11:33:19 +0100
Subject: [PATCH 09/13] added all algorithm identifiers Doxyfile comments  for
 sig_stfl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index 4c78758de8..977320ce98 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -120,49 +120,82 @@ extern "C"
 #define OQS_SIG_STFL_alg_xmssmt_shake128_h60_12 "XMSSMT-SHAKE_60/12_256"
 
 /* Defined LMS parameter identifiers */
+/** Algorithm identifier for LMS-SHA256_H5_W1  */
 #define OQS_SIG_STFL_alg_lms_sha256_h5_w1 "LMS_SHA256_H5_W1" //"5/1"
+/** Algorithm identifier for LMS-SHA256_H5_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h5_w2 "LMS_SHA256_H5_W2" //"5/2"
+/** Algorithm identifier for LMS-SHA256_H5_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h5_w4 "LMS_SHA256_H5_W4" //"5/4"
+/** Algorithm identifier for LMS-SHA256_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h5_w8 "LMS_SHA256_H5_W8" //"5/8"
 
+/** Algorithm identifier for LMS-SHA256_H10_W1  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w1 "LMS_SHA256_H10_W1" //"10/1"
+/** Algorithm identifier for LMS-SHA256_H10_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w2 "LMS_SHA256_H10_W2" //"10/2"
+/** Algorithm identifier for LMS-SHA256_H10_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w4 "LMS_SHA256_H10_W4" //"10/4"
+/** Algorithm identifier for LMS-SHA256_H10_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w8 "LMS_SHA256_H10_W8" //"10/8"
 
+/** Algorithm identifier for LMS-SHA256_H15_W1  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w1 "LMS_SHA256_H15_W1" //"15/1"
+/** Algorithm identifier for LMS-SHA256_H15_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w2 "LMS_SHA256_H15_W2" //"15/2"
+/** Algorithm identifier for LMS-SHA256_H15_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w4 "LMS_SHA256_H15_W4" //"15/4"
+/** Algorithm identifier for LMS-SHA256_H15_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w8 "LMS_SHA256_H15_W8" //"15/8"
 
+/** Algorithm identifier for LMS-SHA256_H20_W1  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w1 "LMS_SHA256_H20_W1" //"20/1"
+/** Algorithm identifier for LMS-SHA256_H20_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w2 "LMS_SHA256_H20_W2" //"20/2"
+/** Algorithm identifier for LMS-SHA256_H20_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w4 "LMS_SHA256_H20_W4" //"20/4"
+/** Algorithm identifier for LMS-SHA256_H20_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8 "LMS_SHA256_H20_W8" //"20/8"
 
+/** Algorithm identifier for LMS-SHA256_H25_W1  */
 #define OQS_SIG_STFL_alg_lms_sha256_h25_w1 "LMS_SHA256_H25_W1" //"25/1"
+/** Algorithm identifier for LMS-SHA256_H25_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h25_w2 "LMS_SHA256_H25_W2" //"25/2"
+/** Algorithm identifier for LMS-SHA256_H25_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h25_w4 "LMS_SHA256_H25_W4" //"25/4"
+/** Algorithm identifier for LMS-SHA256_H25_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h25_w8 "LMS_SHA256_H25_W8" //"25/8"
 
 // 2-Level LMS
+/** Algorithm identifier for LMS-SHA256_H5_W8_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h5_w8_h5_w8 "LMS_SHA256_H5_W8_H5_W8" //"5/8, 5/8"
 
 // RFC 6554
+/** Algorithm identifier for LMS-SHA256_H10_W4_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w4_h5_w8 "LMS_SHA256_H10_W4_H5_W8" //"10/4, 5/8"
 
+/** Algorithm identifier for LMS-SHA256_H10_W8_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w8_h5_w8 "LMS_SHA256_H10_W8_H5_W8"   //"10/8, 5/8"
+/** Algorithm identifier for LMS-SHA256_H10_W2_H10_W2  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w2_h10_w2 "LMS_SHA256_H10_W2_H10_W2" //"10/2, 10/2"
+/** Algorithm identifier for LMS-SHA256_H10_W4_H10_W4  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w4_h10_w4 "LMS_SHA256_H10_W4_H10_W4" //"10/4, 10/4"
+/** Algorithm identifier for LMS-SHA256_H10_W8_H10_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h10_w8_h10_w8 "LMS_SHA256_H10_W8_H10_W8" //"10/8, 10/8"
 
+/** Algorithm identifier for LMS-SHA256_H15_W8_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w8_h5_w8 "LMS_SHA256_H15_W8_H5_W8"   //"15/8, 5/8"
+/** Algorithm identifier for LMS-SHA256_H15_W8_H10_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w8_h10_w8 "LMS_SHA256_H15_W8_H10_W8" //"15/8, 10/8"
+/** Algorithm identifier for LMS-SHA256_H15_W8_H15_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h15_w8_h15_w8 "LMS_SHA256_H15_W8_H15_W8" //"15/8, 15/8"
 
+/** Algorithm identifier for LMS-SHA256_H20_W8_H5_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h5_w8 "LMS_SHA256_H20_W8_H5_W8"   //"20/8, 5/8"
+/** Algorithm identifier for LMS-SHA256_H20_W8_H10_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h10_w8 "LMS_SHA256_H20_W8_H10_W8" //"20/8, 10/8"
+/** Algorithm identifier for LMS-SHA256_H20_W8_H15_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h15_w8 "LMS_SHA256_H20_W8_H15_W8" //"20/8, 15/8"
+/** Algorithm identifier for LMS-SHA256_H20_W8_H20_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h20_w8 "LMS_SHA256_H20_W8_H20_W8" //"20/8, 20/8"
 
 /*

From 6e569f2c00b0d216aa3d424a7d0412953a27be95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Tue, 28 Jan 2025 15:42:44 +0100
Subject: [PATCH 10/13] added additional Doxygen comments to sig_stfl.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index 977320ce98..4dc27da183 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -198,8 +198,8 @@ extern "C"
 /** Algorithm identifier for LMS-SHA256_H20_W8_H20_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h20_w8 "LMS_SHA256_H20_W8_H20_W8" //"20/8, 20/8"
 
-/*
- * Total number of stateful variants defined above, used to create the tracking array
+/** 
+ * Total number of stateful variants defined above, used to create the tracking array 
  */
 #define OQS_SIG_STFL_algs_length 70
 
@@ -258,12 +258,15 @@ OQS_API int OQS_SIG_STFL_alg_count(void);
 OQS_API int OQS_SIG_STFL_alg_is_enabled(const char *method_name);
 
 #ifndef OQS_ALLOW_STFL_KEY_AND_SIG_GEN
+
+/** Signature schemes object */
 typedef struct OQS_SIG OQS_SIG;
+
+/** Stateful signature scheme object */
 #define OQS_SIG_STFL OQS_SIG
 #else
-/**
- * Stateful signature scheme object
- */
+
+/** Stateful signature scheme object */
 typedef struct OQS_SIG_STFL {
 
 	/**
@@ -372,16 +375,16 @@ typedef struct OQS_SIG_STFL {
 
 typedef struct OQS_SIG_STFL_SECRET_KEY {
 
-	/* The (maximum) length, in bytes, of secret keys for this signature scheme. */
+	/** The (maximum) length, in bytes, of secret keys for this signature scheme. */
 	size_t length_secret_key;
 
-	/* The variant-specific secret key data must be allocated at the initialization. */
+	/** The variant-specific secret key data must be allocated at the initialization. */
 	void *secret_key_data;
 
-	/* The mutual exclusion struct */
+	/** The mutual exclusion struct */
 	void *mutex;
 
-	/* Application-managed data related to secure storage of secret key data */
+	/** Application-managed data related to secure storage of secret key data */
 	void *context;
 
 	/**
@@ -551,6 +554,7 @@ OQS_API OQS_STATUS OQS_SIG_STFL_verify(const OQS_SIG_STFL *sig, const uint8_t *m
  * The remaining signatures are the number of signatures available before the private key runs out of its total signature and expires.
  *
  * @param[in] sig The OQS_SIG_STFL object representing the signature scheme.
+ * @param[in] remain The number of remaining signatures.
  * @param[in] secret_key The secret key object.
  * @return OQS_SUCCESS or OQS_ERROR
  */
@@ -588,7 +592,7 @@ OQS_API OQS_SIG_STFL_SECRET_KEY *OQS_SIG_STFL_SECRET_KEY_new(const char *method_
 /**
  * Free an OQS_SIG_STFL_SECRET_KEY object that was constructed by OQS_SECRET_KEY_new.
  *
- * @param[in] sig The OQS_SIG_STFL_SECRET_KEY object to free.
+ * @param[in] sk The OQS_SIG_STFL_SECRET_KEY object to free.
  * @return OQS_SUCCESS if successful, or OQS_ERROR if the object cannot be freed.
  */
 OQS_API void OQS_SIG_STFL_SECRET_KEY_free(OQS_SIG_STFL_SECRET_KEY *sk);

From 2b1ca4da2a934f77d91620fb94a7e5a93eef17b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Tue, 28 Jan 2025 16:08:40 +0100
Subject: [PATCH 11/13] fixed formatting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index 4dc27da183..9c8aef2b48 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -198,9 +198,7 @@ extern "C"
 /** Algorithm identifier for LMS-SHA256_H20_W8_H20_W8  */
 #define OQS_SIG_STFL_alg_lms_sha256_h20_w8_h20_w8 "LMS_SHA256_H20_W8_H20_W8" //"20/8, 20/8"
 
-/** 
- * Total number of stateful variants defined above, used to create the tracking array 
- */
+/** Total number of stateful variants defined above, used to create the tracking array */
 #define OQS_SIG_STFL_algs_length 70
 
 typedef struct OQS_SIG_STFL_SECRET_KEY OQS_SIG_STFL_SECRET_KEY;

From 0f1a9813c5654b5c9cd6f5e2a2d831456874da84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Tue, 28 Jan 2025 16:21:58 +0100
Subject: [PATCH 12/13] fixed return types errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>
---
 src/sig_stfl/sig_stfl.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/sig_stfl/sig_stfl.h b/src/sig_stfl/sig_stfl.h
index 9c8aef2b48..3e391c3db4 100644
--- a/src/sig_stfl/sig_stfl.h
+++ b/src/sig_stfl/sig_stfl.h
@@ -591,7 +591,6 @@ OQS_API OQS_SIG_STFL_SECRET_KEY *OQS_SIG_STFL_SECRET_KEY_new(const char *method_
  * Free an OQS_SIG_STFL_SECRET_KEY object that was constructed by OQS_SECRET_KEY_new.
  *
  * @param[in] sk The OQS_SIG_STFL_SECRET_KEY object to free.
- * @return OQS_SUCCESS if successful, or OQS_ERROR if the object cannot be freed.
  */
 OQS_API void OQS_SIG_STFL_SECRET_KEY_free(OQS_SIG_STFL_SECRET_KEY *sk);
 
@@ -603,7 +602,6 @@ OQS_API void OQS_SIG_STFL_SECRET_KEY_free(OQS_SIG_STFL_SECRET_KEY *sk);
  *
  * @param[in] sk Pointer to the secret key object whose lock function is to be set.
  * @param[in] lock Function pointer to the locking routine provided by the application.
- * @return None.
  *
  * @note It's not required to set the lock and unlock functions in a single-threaded environment.
  *
@@ -621,7 +619,6 @@ OQS_API void OQS_SIG_STFL_SECRET_KEY_SET_lock(OQS_SIG_STFL_SECRET_KEY *sk, lock_
  *
  * @param[in] sk Pointer to the secret key object whose unlock function is to be set.
  * @param[in] unlock Function pointer to the unlock routine provided by the application.
- * @return None.
  *
  * @note It's not required to set the lock and unlock functions in a single-threaded environment.
  *
@@ -638,7 +635,6 @@ OQS_API void OQS_SIG_STFL_SECRET_KEY_SET_unlock(OQS_SIG_STFL_SECRET_KEY *sk, unl
  *
  * @param[in] sk A pointer to the secret key that the mutex functionality will protect.
  * @param[in] mutex A function pointer to the desired concurrency control mechanism.
- * @return None.
  *
  * @note It's not required to set the lock and unlock functions in a single-threaded environment.
  *
@@ -700,7 +696,6 @@ OQS_STATUS OQS_SIG_STFL_SECRET_KEY_unlock(OQS_SIG_STFL_SECRET_KEY *sk);
  * @param[in] context Application-specific context that assists in the storage of secret key data.
  *                    This context is managed by the application, which allocates it, keeps track of it,
  *                    and deallocates it as necessary.
- * @return None.
  */
 OQS_API void OQS_SIG_STFL_SECRET_KEY_SET_store_cb(OQS_SIG_STFL_SECRET_KEY *sk, secure_store_sk store_cb, void *context);
 

From a22f7ea3da61b0748f41f8bab9b42227b7ab1360 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Guti=C3=A9rrez?= <pablogf@uma.es>
Date: Wed, 29 Jan 2025 10:41:53 +0100
Subject: [PATCH 13/13] included sig_stfl API Doxygen documentation [full
 tests]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pablo Gutiérrez <pablogf@uma.es>