From cb3701b09b4e0823f888dc8cc94c1fa6e6247769 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 09:55:44 -0400
Subject: [PATCH 01/37] Add markers for log1p

---
 include/boost/math/special_functions/log1p.hpp    | 4 ++--
 include/boost/math/special_functions/math_fwd.hpp | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp
index fc7308eb6..8fb379fe4 100644
--- a/include/boost/math/special_functions/log1p.hpp
+++ b/include/boost/math/special_functions/log1p.hpp
@@ -433,7 +433,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type log1p(T x)
 // Compute log(1+x)-x:
 //
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    log1pmx(T x, const Policy& pol)
 {
    typedef typename tools::promote_args<T>::type result_type;
@@ -465,7 +465,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type log1pmx(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type log1pmx(T x)
 {
    return log1pmx(x, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index e2ac58e85..fe61a420e 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -577,10 +577,10 @@ namespace boost
 
    // log1pmx is log(x + 1) - x
    template <class T>
-   tools::promote_args_t<T> log1pmx(T);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1pmx(T);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> log1pmx(T, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1pmx(T, const Policy&);
 
    // Exp (x) minus 1 functions.
    template <class T>
@@ -1471,10 +1471,10 @@ namespace boost
    inline boost::math::tools::promote_args_t<RT> cbrt(RT z){ return boost::math::cbrt(z, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> log1p(T x){ return boost::math::log1p(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> log1p(T x){ return boost::math::log1p(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\
 \
    template <class T>\
    BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> expm1(T x){ return boost::math::expm1(x, Policy()); }\

From 4df80c13a6ba8b3cb6ce5f249aadbd312308c164 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 09:55:58 -0400
Subject: [PATCH 02/37] Add log1p CUDA testing

---
 test/cuda_jamfile         |   2 +
 test/test_log1p_double.cu | 100 ++++++++++++++++++++++++++++++++++++++
 test/test_log1p_float.cu  | 100 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 test/test_log1p_double.cu
 create mode 100644 test/test_log1p_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 65c5d71be..52228ab55 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -41,6 +41,8 @@ run test_changesign_double.cu ;
 run test_changesign_float.cu ;
 run test_expm1_double.cu ;
 run test_expm1_float.cu ;
+run test_log1p_double.cu ;
+run test_log1p_float.cu ;
 run test_modf_double.cu ;
 run test_modf_float.cu ;
 run test_round_double.cu ;
diff --git a/test/test_log1p_double.cu b/test/test_log1p_double.cu
new file mode 100644
index 000000000..d164b5a98
--- /dev/null
+++ b/test/test_log1p_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::log1p(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_log1p_float.cu b/test/test_log1p_float.cu
new file mode 100644
index 000000000..d164b5a98
--- /dev/null
+++ b/test/test_log1p_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::log1p(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 78a8d3282508290faa88b9cd0a99b108fa36cbea Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:00:07 -0400
Subject: [PATCH 03/37] Add SYCL testing for log1p and log1pmx

---
 test/sycl_jamfile          |  1 +
 test/test_log1p_simple.cpp | 48 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 test/test_log1p_simple.cpp

diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index e8bd12f8c..1b0da5ca6 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -19,3 +19,4 @@ run test_cbrt.cpp ;
 run test_sign.cpp ;
 run test_round.cpp ;
 run test_expm1_simple.cpp;
+run test_log1p_simple.cpp;
diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp
new file mode 100644
index 000000000..c3acd58ed
--- /dev/null
+++ b/test/test_log1p_simple.cpp
@@ -0,0 +1,48 @@
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <random>
+#include <cmath>
+#include <boost/math/special_functions/log1p.hpp>
+#include "math_unit_test.hpp"
+
+constexpr int N = 50000;
+
+template <typename T>
+void test()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::log1p(value), boost::math::log1p(value), 10);
+    }
+}
+
+template <typename T>
+void test_log1pmx()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 100);
+    }
+}
+
+int main()
+{
+    test<float>();
+    test<double>();
+
+    test_log1pmx<float>();
+    test_log1pmx<double>();
+
+    return boost::math::test::report_errors();
+}

From 428d666c9638f6dcfaa0ccf1cfd247fa528da02a Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:00:21 -0400
Subject: [PATCH 04/37] Fix use of static const char* in log1pmx

---
 include/boost/math/special_functions/log1p.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp
index 8fb379fe4..1df3dd8fe 100644
--- a/include/boost/math/special_functions/log1p.hpp
+++ b/include/boost/math/special_functions/log1p.hpp
@@ -114,7 +114,7 @@ BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const std::int
 { // The function returns the natural logarithm of 1 + x.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -222,7 +222,7 @@ BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const std::int
 { // The function returns the natural logarithm of 1 + x.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -438,7 +438,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
 {
    typedef typename tools::promote_args<T>::type result_type;
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::log1pmx<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1pmx<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(

From 944f45e2661449ca88be10a5a579f1a501d20b6e Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:09:54 -0400
Subject: [PATCH 05/37] Loosen tolerance

---
 test/test_log1p_simple.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp
index c3acd58ed..ef6c204d4 100644
--- a/test/test_log1p_simple.cpp
+++ b/test/test_log1p_simple.cpp
@@ -32,7 +32,7 @@ void test_log1pmx()
     for (int n = 0; n < N; ++n)
     {
         const T value (dist(rng));
-        CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 100);
+        CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 1e9);
     }
 }
 

From 2e77757943165f9fd8253739dc21cf6e225056cf Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:21:36 -0400
Subject: [PATCH 06/37] Add GPU support to digamma

---
 .../boost/math/special_functions/digamma.hpp  | 48 ++++++++++---------
 .../boost/math/special_functions/math_fwd.hpp |  6 +--
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp
index 3922de7d2..98c939237 100644
--- a/include/boost/math/special_functions/digamma.hpp
+++ b/include/boost/math/special_functions/digamma.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,6 +13,7 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/series.hpp>
@@ -37,11 +39,11 @@ namespace detail{
 // Begin by defining the smallest value for which it is safe to
 // use the asymptotic expansion for digamma:
 //
-inline unsigned digamma_large_lim(const std::integral_constant<int, 0>*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const std::integral_constant<int, 0>*)
 {  return 20;  }
-inline unsigned digamma_large_lim(const std::integral_constant<int, 113>*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const std::integral_constant<int, 113>*)
 {  return 20;  }
-inline unsigned digamma_large_lim(const void*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const void*)
 {  return 10;  }
 //
 // Implementations of the asymptotic expansion come next,
@@ -114,10 +116,10 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 64>*)
 // 17-digit precision for x >= 10:
 //
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 53>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const std::integral_constant<int, 53>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       0.083333333333333333333333333333333333333333333333333,
       -0.0083333333333333333333333333333333333333333333333333,
       0.003968253968253968253968253968253968253968253968254,
@@ -138,10 +140,10 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 53>*)
 // 9-digit precision for x >= 10:
 //
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 24>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const std::integral_constant<int, 24>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       0.083333333333333333333333333333333333333333333333333f,
       -0.0083333333333333333333333333333333333333333333333333f,
       0.003968253968253968253968253968253968253968253968254f
@@ -310,7 +312,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 64>*)
 // 18-digit precision:
 //
 template <class T>
-T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
+BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
 {
    //
    // Now the approximation, we use the form:
@@ -325,13 +327,13 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
    // At double precision, max error found:  2.452e-17
    //
    // LCOV_EXCL_START
-   static const float Y = 0.99558162689208984F;
+   constexpr float Y = 0.99558162689208984F;
 
-   static const T root1 = T(1569415565) / 1073741824uL;
-   static const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
-   static const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
+   constexpr T root1 = T(1569415565) / 1073741824uL;
+   constexpr T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
+   constexpr T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
 
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.25479851061131551),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.32555031186804491),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.65031853770896507),
@@ -339,7 +341,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.045251321448739056),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.0020713321167745952)
    };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.0767117023730469),
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.4606242909763515),
@@ -361,7 +363,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
 // 9-digit precision:
 //
 template <class T>
-inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
 {
    //
    // Now the approximation, we use the form:
@@ -376,16 +378,16 @@ inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
    // At float precision, max error found:  2.008725e-008
    //
    // LCOV_EXCL_START
-   static const float Y = 0.99558162689208984f;
-   static const T root = 1532632.0f / 1048576;
-   static const T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
-   static const T P[] = {
+   constexpr float Y = 0.99558162689208984f;
+   constexpr T root = 1532632.0f / 1048576;
+   constexpr T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
+   constexpr T P[] = {
       0.25479851023250261e0f,
       -0.44981331915268368e0f,
       -0.43916936919946835e0f,
       -0.61041765350579073e-1f
    };
-   static const T Q[] = {
+   constexpr T Q[] = {
       0.1e1f,
       0.15890202430554952e1f,
       0.65341249856146947e0f,
@@ -401,7 +403,7 @@ inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
 }
 
 template <class T, class Tag, class Policy>
-T digamma_imp(T x, const Tag* t, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T digamma_imp(T x, const Tag* t, const Policy& pol)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -567,7 +569,7 @@ T digamma_imp(T x, const std::integral_constant<int, 0>* t, const Policy& pol)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    digamma(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
@@ -592,7 +594,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    digamma(T x)
 {
    return digamma(x, policies::policy<>());
diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index fe61a420e..e481692f1 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -533,10 +533,10 @@ namespace boost
 
    // digamma:
    template <class T>
-   tools::promote_args_t<T> digamma(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> digamma(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> digamma(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> digamma(T x, const Policy&);
 
    // trigamma:
    template <class T>
@@ -1455,7 +1455,7 @@ namespace boost
    inline boost::math::tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> digamma(T x){ return boost::math::digamma(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> digamma(T x){ return boost::math::digamma(x, Policy()); }\
 \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> trigamma(T x){ return boost::math::trigamma(x, Policy()); }\

From 8c97086236907d3893ba9ec99c2176606a4444a7 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:21:47 -0400
Subject: [PATCH 07/37] Add digamma CUDA testing

---
 test/cuda_jamfile           |   2 +
 test/test_digamma_double.cu | 100 ++++++++++++++++++++++++++++++++++++
 test/test_digamma_float.cu  | 100 ++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 test/test_digamma_double.cu
 create mode 100644 test/test_digamma_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 52228ab55..42b827cc2 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -39,6 +39,8 @@ run test_cbrt_double.cu ;
 run test_cbrt_float.cu ;
 run test_changesign_double.cu ;
 run test_changesign_float.cu ;
+run test_digamma_double.cu ;
+run test_digamma_float.cu ;
 run test_expm1_double.cu ;
 run test_expm1_float.cu ;
 run test_log1p_double.cu ;
diff --git a/test/test_digamma_double.cu b/test/test_digamma_double.cu
new file mode 100644
index 000000000..c88fe153c
--- /dev/null
+++ b/test/test_digamma_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::digamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_digamma_float.cu b/test/test_digamma_float.cu
new file mode 100644
index 000000000..ea1b1c68e
--- /dev/null
+++ b/test/test_digamma_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::digamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 9fb4677b1899828f6178b720b256aaf3008f415d Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 10:28:52 -0400
Subject: [PATCH 08/37] Add sycl test of digamma

---
 test/sycl_jamfile            |  5 ++--
 test/test_digamma_simple.cpp | 50 ++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 test/test_digamma_simple.cpp

diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index 1b0da5ca6..3354a776f 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -18,5 +18,6 @@ run test_beta_simple.cpp ;
 run test_cbrt.cpp ;
 run test_sign.cpp ;
 run test_round.cpp ;
-run test_expm1_simple.cpp;
-run test_log1p_simple.cpp;
+run test_expm1_simple.cpp ;
+run test_log1p_simple.cpp ;
+run test_digamma_simple.cpp ;
diff --git a/test/test_digamma_simple.cpp b/test/test_digamma_simple.cpp
new file mode 100644
index 000000000..bbe003a01
--- /dev/null
+++ b/test/test_digamma_simple.cpp
@@ -0,0 +1,50 @@
+//  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <boost/math/special_functions/digamma.hpp>
+#include "math_unit_test.hpp"
+
+template <class T>
+void test_spots(T, const char* t)
+{
+   std::cout << "Testing basic sanity checks for type " << t << std::endl;
+   //
+   // Basic sanity checks, tolerance is 3 epsilon:
+   //
+   T tolerance = 3;
+   //
+   // Special tolerance (200eps) for when we're very near the root,
+   // and T has more than 64-bits in it's mantissa:
+   //
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(0.125)), static_cast<T>(-8.3884926632958548678027429230863430000514460424495L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(0.5)), static_cast<T>(-1.9635100260214234794409763329987555671931596046604L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1)), static_cast<T>(-0.57721566490153286060651209008240243104215933593992L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1.5)), static_cast<T>(0.036489973978576520559023667001244432806840395339566L), tolerance * 40);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1.5) - static_cast<T>(1)/32), static_cast<T>(0.00686541147073577672813890866512415766586241385896200579891429L), tolerance * 200);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(2)), static_cast<T>(0.42278433509846713939348790991759756895784066406008L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(8)), static_cast<T>(2.0156414779556099965363450527747404261006978069172L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(12)), static_cast<T>(2.4426616799758120167383652547949424463027180089374L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(22)), static_cast<T>(3.0681430398611966699248760264450329818421699570581L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(50)), static_cast<T>(3.9019896734278921969539597028823666609284424880275L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(500)), static_cast<T>(6.2136077650889917423827750552855712637776544784569L), tolerance);
+   //
+   // negative values:
+   //
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-0.125)), static_cast<T>(7.1959829284523046176757814502538535827603450463013L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-10.125)), static_cast<T>(9.9480538258660761287008034071425343357982429855241L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-10.875)), static_cast<T>(-5.1527360383841562620205965901515879492020193154231L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-1.5)), static_cast<T>(0.70315664064524318722569033366791109947350706200623L), tolerance);
+}
+
+int main()
+{
+   test_spots(0.0F, "float");
+   test_spots(0.0, "double");
+
+   return boost::math::test::report_errors();
+}
+
+

From 6c1e9d7ce31d309e8d4b41258eeb49722015531a Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:11:18 -0400
Subject: [PATCH 09/37] Begin adding our own definition of numeric_limits

---
 include/boost/math/tools/numeric_limits.hpp | 117 ++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 include/boost/math/tools/numeric_limits.hpp

diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp
new file mode 100644
index 000000000..0ca02812a
--- /dev/null
+++ b/include/boost/math/tools/numeric_limits.hpp
@@ -0,0 +1,117 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Regular use of std::numeric_limits functions can not be used on 
+//  GPU platforms like CUDA since they are missing the __device__ marker
+//  and libcu++ does not provide something analogous.
+//  Rather than using giant if else blocks make our own version of numeric limits
+
+#include <boost/math/tools/config.hpp>
+#include <type_traits>
+#include <limits>
+#include <climits>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+struct numeric_limits : public std::numeric_limits<T> {};
+
+#ifdef BOOST_MATH_HAS_GPU_SUPPORT
+
+template <>
+struct numeric_limits<float>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<float>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<float>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<float>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<float>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<float>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<float>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<float>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<float>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<float>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<float>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<float>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<float>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<float>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<float>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<float>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<float>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<float>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<float>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<float>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<float>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<float>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min)         () { return FLT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max)         () { return FLT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest        () { return -FLT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon       () { return FLT_EPSILON; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error   () { return 0.5F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity      () { return static_cast<float>(INFINITY); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN     () { return static_cast<float>(NAN); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () 
+    { 
+        #ifdef FLT_SNAN
+        return FLT_SNAN;
+        #else
+        return static_cast<float>(NAN);
+        #endif
+    }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min    () { return FLT_TRUE_MIN; }
+};
+
+template <>
+struct numeric_limits<double>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<double>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<double>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<double>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<double>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<double>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<double>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<double>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<double>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<double>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<double>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<double>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<double>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<double>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<double>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<double>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<double>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<double>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<double>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<double>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<double>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<double>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min)         () { return DBL_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max)         () { return DBL_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest        () { return -DBL_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon       () { return DBL_EPSILON; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error   () { return 0.5; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity      () { return static_cast<double>(INFINITY); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN     () { return static_cast<double>(NAN); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () 
+    { 
+        #ifdef DBL_SNAN
+        return DBL_SNAN;
+        #else
+        return static_cast<double>(NAN);
+        #endif
+    }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min    () { return DBL_TRUE_MIN; }
+};
+
+#endif
+
+} // namespace math
+} // namespace boost

From 1629e75c2c9d47d3131d87fe64ed86e652e8a69d Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:11:43 -0400
Subject: [PATCH 10/37] Make pow GPU compatible

---
 .../boost/math/special_functions/math_fwd.hpp |  6 +++---
 include/boost/math/special_functions/pow.hpp  | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index e481692f1..2886621bb 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -1091,10 +1091,10 @@ namespace boost
 
    // pow:
    template <int N, typename T, class Policy>
-   BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base, const Policy& policy);
+   BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base, const Policy& policy);
 
    template <int N, typename T>
-   BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base);
+   BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base);
 
    // next:
    template <class T, class U, class Policy>
@@ -1625,7 +1625,7 @@ template <class OutputIterator, class T>\
    BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <int N, class T>\
-   inline boost::math::tools::promote_args_t<T> pow(T v){ return boost::math::pow<N>(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> pow(T v){ return boost::math::pow<N>(v, Policy()); }\
    \
    template <class T> T nextafter(const T& a, const T& b){ return static_cast<T>(boost::math::nextafter(a, b, Policy())); }\
    template <class T> T float_next(const T& a){ return static_cast<T>(boost::math::float_next(a, Policy())); }\
diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp
index 9c6488997..a056f6daf 100644
--- a/include/boost/math/special_functions/pow.hpp
+++ b/include/boost/math/special_functions/pow.hpp
@@ -2,6 +2,7 @@
 //   Computes a power with exponent known at compile-time
 
 //  (C) Copyright Bruno Lalande 2008.
+//  (C) Copyright Matt Borland 2024.
 //  Distributed under the Boost Software License, Version 1.0.
 //  (See accompanying file LICENSE_1_0.txt or copy at
 //  http://www.boost.org/LICENSE_1_0.txt)
@@ -12,7 +13,7 @@
 #ifndef BOOST_MATH_POW_HPP
 #define BOOST_MATH_POW_HPP
 
-
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/policies/policy.hpp>
 #include <boost/math/policies/error_handling.hpp>
@@ -34,7 +35,7 @@ template <int N, int M = N%2>
 struct positive_power
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
     {
         T power = positive_power<N/2>::result(base);
         return power * power;
@@ -45,7 +46,7 @@ template <int N>
 struct positive_power<N, 1>
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
     {
         T power = positive_power<N/2>::result(base);
         return base * power * power;
@@ -56,7 +57,7 @@ template <>
 struct positive_power<1, 1>
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; }
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; }
 };
 
 
@@ -64,7 +65,7 @@ template <int N, bool>
 struct power_if_positive
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&)
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&)
     { return positive_power<N>::result(base); }
 };
 
@@ -72,7 +73,7 @@ template <int N>
 struct power_if_positive<N, false>
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
     {
         if (base == 0)
         {
@@ -91,7 +92,7 @@ template <>
 struct power_if_positive<0, true>
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
+    BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
     {
         if (base == 0)
         {
@@ -120,14 +121,14 @@ struct select_power_if_positive
 
 
 template <int N, typename T, class Policy>
-BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base, const Policy& policy)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base, const Policy& policy)
 { 
    using result_type = typename tools::promote_args<T>::type;
    return detail::select_power_if_positive<N>::type::result(static_cast<result_type>(base), policy); 
 }
 
 template <int N, typename T>
-BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base)
 { return pow<N>(base, policies::policy<>()); }
 
 #ifdef _MSC_VER

From c887a74a7207c08388a941fb3b0ab87b246024ed Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:12:06 -0400
Subject: [PATCH 11/37] Make sin_pi GPU compatible

---
 include/boost/math/special_functions/math_fwd.hpp | 6 +++---
 include/boost/math/special_functions/sin_pi.hpp   | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index 2886621bb..f2af616ab 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -858,10 +858,10 @@ namespace boost
                      const Policy&);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> sin_pi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sin_pi(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> sin_pi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sin_pi(T x);
 
    template <class T, class Policy>
    tools::promote_args_t<T> cos_pi(T x, const Policy&);
@@ -1572,7 +1572,7 @@ template <class OutputIterator, class T>\
    { boost::math::cyl_neumann_zero(v, start_index, number_of_zeros, out_it, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\
 \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\
diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp
index 5b8eb6fcf..d0739fabd 100644
--- a/include/boost/math/special_functions/sin_pi.hpp
+++ b/include/boost/math/special_functions/sin_pi.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2007 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,7 +13,9 @@
 
 #include <cmath>
 #include <limits>
+#include <type_traits>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/promotion.hpp>
@@ -21,7 +24,7 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-inline T sin_pi_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
    if(x < 0)
@@ -39,7 +42,7 @@ inline T sin_pi_imp(T x, const Policy& pol)
       invert = false;
 
    T rem = floor(x);
-   if(abs(floor(rem/2)*2 - rem) > std::numeric_limits<T>::epsilon())
+   if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits<T>::epsilon())
    {
       invert = !invert;
    }
@@ -56,7 +59,7 @@ inline T sin_pi_imp(T x, const Policy& pol)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type sin_pi(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sin_pi(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;

From 5f4ab0d05253e177b7d08fe5c0e0c1e514e567c3 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:12:23 -0400
Subject: [PATCH 12/37] Make trigamma GPU compatible

---
 .../boost/math/special_functions/math_fwd.hpp |  6 +--
 .../boost/math/special_functions/trigamma.hpp | 52 ++++++++++---------
 2 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index f2af616ab..6c8b64507 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -540,10 +540,10 @@ namespace boost
 
    // trigamma:
    template <class T>
-   tools::promote_args_t<T> trigamma(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> trigamma(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> trigamma(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> trigamma(T x, const Policy&);
 
    // polygamma:
    template <class T>
@@ -1458,7 +1458,7 @@ namespace boost
    BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> digamma(T x){ return boost::math::digamma(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> trigamma(T x){ return boost::math::trigamma(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> trigamma(T x){ return boost::math::trigamma(x, Policy()); }\
 \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> polygamma(int n, T x){ return boost::math::polygamma(n, x, Policy()); }\
diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index f74b43db1..bca334df9 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,6 +11,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/series.hpp>
@@ -37,11 +39,11 @@ template<class T, class Policy>
 T polygamma_imp(const int n, T x, const Policy &pol);
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
 {
    // Max error in interpolated form: 3.736e-017
-   static const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
-   static const T P_1_2[] = {
+   constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
+   BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045),
       BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321),
       BOOST_MATH_BIG_CONSTANT(T, 53, -3.3703848401898283),
@@ -49,7 +51,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.6638069578676164),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.64468386819102836),
    };
-   static const T Q_1_2[] = {
+   BOOST_MATH_STATIC const T Q_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 3.4535389668541151),
       BOOST_MATH_BIG_CONSTANT(T, 53, 4.5208926987851437),
@@ -58,7 +60,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.20314516859987728e-6),
    };
    // Max error in interpolated form: 1.159e-017
-   static const T P_2_4[] = {
+   BOOST_MATH_STATIC const T P_2_4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.13803835004508849e-7),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.50000049158540261),
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.6077979838469348),
@@ -66,7 +68,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.0534873203680393),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.74566981111565923),
    };
-   static const T Q_2_4[] = {
+   BOOST_MATH_STATIC const T Q_2_4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.8822787662376169),
       BOOST_MATH_BIG_CONSTANT(T, 53, 4.1681660554090917),
@@ -77,7 +79,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
    // Maximum Deviation Found:                     6.896e-018
    // Expected Error Term :                       -6.895e-018
    // Maximum Relative Change in Control Points :  8.497e-004
-   static const T P_4_inf[] = {
+   BOOST_MATH_STATIC const T P_4_inf[] = {
       static_cast<T>(0.68947581948701249e-17L),
       static_cast<T>(0.49999999999998975L),
       static_cast<T>(1.0177274392923795L),
@@ -86,7 +88,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       static_cast<T>(1.5897035272532764L),
       static_cast<T>(0.40154388356961734L),
    };
-   static const T Q_4_inf[] = {
+   BOOST_MATH_STATIC const T Q_4_inf[] = {
       static_cast<T>(1.0L),
       static_cast<T>(1.7021215452463932L),
       static_cast<T>(4.4290431747556469L),
@@ -110,11 +112,11 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
 }
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
 {
    // Max error in interpolated form: 1.178e-020
-   static const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
-   static const T P_1_2[] = {
+   constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
+   BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341),
       BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052),
       BOOST_MATH_BIG_CONSTANT(T, 64, -4.63865531898487734531),
@@ -123,7 +125,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.21172611429185622377),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.259635673503366427284),
    };
-   static const T Q_1_2[] = {
+   BOOST_MATH_STATIC const T Q_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.77521119359546982995),
       BOOST_MATH_BIG_CONSTANT(T, 64, 5.664338024578956321),
@@ -133,7 +135,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.629642219810618032207e-8),
    };
    // Max error in interpolated form: 3.912e-020
-   static const T P_2_8[] = {
+   BOOST_MATH_STATIC const T P_2_8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.387540035162952880976e-11),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000276430504),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.21926880986360957306),
@@ -143,7 +145,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 13.4346512182925923978),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.98656291026448279118),
    };
-   static const T Q_2_8[] = {
+   BOOST_MATH_STATIC const T Q_2_8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 6.10520430478613667724),
       BOOST_MATH_BIG_CONSTANT(T, 64, 18.475001060603645512),
@@ -156,7 +158,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
    // Maximum Deviation Found:                     2.635e-020
    // Expected Error Term :                        2.635e-020
    // Maximum Relative Change in Control Points :  1.791e-003
-   static const T P_8_inf[] = {
+   BOOST_MATH_STATIC const T P_8_inf[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.263527875092466899848e-19),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000000000058145),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0730121433777364138677),
@@ -164,7 +166,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0517092358874932620529),
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.07995383547483921121),
    };
-   static const T Q_8_inf[] = {
+   BOOST_MATH_STATIC const T Q_8_inf[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.187309046577818095504),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.95255391645238842975),
@@ -357,7 +359,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
 }
 
 template <class T, class Tag, class Policy>
-T trigamma_imp(T x, const Tag* t, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T trigamma_imp(T x, const Tag* t, const Policy& pol)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -402,22 +404,24 @@ struct trigamma_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          typedef typename policies::precision<T, Policy>::type precision_type;
          do_init(std::integral_constant<bool, precision_type::value && (precision_type::value <= 113)>());
       }
-      void do_init(const std::true_type&)
+      BOOST_MATH_GPU_ENABLED void do_init(const std::true_type&)
       {
          boost::math::trigamma(T(2.5), Policy());
       }
-      void do_init(const std::false_type&){}
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void do_init(const std::false_type&){}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
    static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -427,7 +431,7 @@ const typename trigamma_initializer<T, Policy>::init trigamma_initializer<T, Pol
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    trigamma(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
@@ -455,7 +459,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    trigamma(T x)
 {
    return trigamma(x, policies::policy<>());

From 525b18c55f5a90164b0ce41eeb643dee1d0d5bc1 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:12:35 -0400
Subject: [PATCH 13/37] Add trigamma CUDA testing

---
 test/cuda_jamfile            |   2 +
 test/test_trigamma_double.cu | 100 +++++++++++++++++++++++++++++++++++
 test/test_trigamma_float.cu  | 100 +++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 test/test_trigamma_double.cu
 create mode 100644 test/test_trigamma_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 42b827cc2..490be5772 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -49,5 +49,7 @@ run test_modf_double.cu ;
 run test_modf_float.cu ;
 run test_round_double.cu ;
 run test_round_float.cu ;
+run test_trigamma_double.cu ;
+run test_trigamma_float.cu ;
 run test_trunc_double.cu ;
 run test_trunc_float.cu ;
diff --git a/test/test_trigamma_double.cu b/test/test_trigamma_double.cu
new file mode 100644
index 000000000..6780e3e92
--- /dev/null
+++ b/test/test_trigamma_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::trigamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trigamma_float.cu b/test/test_trigamma_float.cu
new file mode 100644
index 000000000..a407a0eb1
--- /dev/null
+++ b/test/test_trigamma_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::trigamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From db7fb655a5b735d28372d2ea633f7436e6ef7214 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:18:20 -0400
Subject: [PATCH 14/37] Rename dispatching function

---
 include/boost/math/special_functions/trigamma.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index bca334df9..c4c289515 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -359,7 +359,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
 }
 
 template <class T, class Tag, class Policy>
-BOOST_MATH_GPU_ENABLED T trigamma_imp(T x, const Tag* t, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -453,7 +453,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    // Force initialization of constants:
    detail::trigamma_initializer<value_type, forwarding_policy>::force_instantiate();
 
-   return policies::checked_narrowing_cast<result_type, Policy>(detail::trigamma_imp(
+   return policies::checked_narrowing_cast<result_type, Policy>(detail::trigamma_dispatch(
       static_cast<value_type>(x),
       static_cast<const tag_type*>(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)");
 }

From c049fabe60f97b579ce19c1d6bb9bcdb6aa4a95a Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:23:04 -0400
Subject: [PATCH 15/37] Eliminate recursion in sin_pi for SYCL

---
 include/boost/math/special_functions/sin_pi.hpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp
index d0739fabd..3c558ab7b 100644
--- a/include/boost/math/special_functions/sin_pi.hpp
+++ b/include/boost/math/special_functions/sin_pi.hpp
@@ -27,8 +27,6 @@ template <class T, class Policy>
 BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
-   if(x < 0)
-      return -sin_pi_imp(T(-x), pol);
    // sin of pi*x:
    if(x < T(0.5))
       return sin(constants::pi<T>() * x);
@@ -56,6 +54,19 @@ BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol)
    return invert ? T(-rem) : rem;
 }
 
+template <class T, class Policy>
+BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T sin_pi_dispatch(T x, const Policy& pol)
+{
+   if (x < 0)
+   {
+      return -sin_pi_imp(-x, pol);
+   }
+   else
+   {
+      return sin_pi_imp(x, pol);
+   }
+}
+
 } // namespace detail
 
 template <class T, class Policy>
@@ -72,7 +83,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sin_pi(T x,
       // We want to ignore overflows since the result is in [-1,1] and the 
       // check slows the code down considerably.
       policies::overflow_error<policies::ignore_error> >::type forwarding_policy;
-   return policies::checked_narrowing_cast<result_type, forwarding_policy>(boost::math::detail::sin_pi_imp<value_type>(x, forwarding_policy()), "sin_pi");
+   return policies::checked_narrowing_cast<result_type, forwarding_policy>(boost::math::detail::sin_pi_dispatch<value_type>(x, forwarding_policy()), "sin_pi");
 }
 
 template <class T>

From cfc1ad0351405abb99530d5b8a825fb4356618be Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:25:55 -0400
Subject: [PATCH 16/37] Remove recursion from trigamma

---
 include/boost/math/special_functions/trigamma.hpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index c4c289515..73dbc998b 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -375,13 +375,20 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol)
    {
       // Reflect:
       T z = 1 - x;
+
+      if(z < 1)
+      {
+         result = 1 / (z * z);
+         z += 1;
+      }
+
       // Argument reduction for tan:
       if(floor(x) == x)
       {
          return policies::raise_pole_error<T>("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol);
       }
       T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol);
-      return -trigamma_imp(z, t, pol) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
+      return result - trigamma_prec(z, t, pol) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
    }
    if(x < 1)
    {

From 0fe35eb4b839acb0709b2d0b63904688fcff54ec Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:26:03 -0400
Subject: [PATCH 17/37] Add trigamma SYCL testing

---
 test/sycl_jamfile      | 1 +
 test/test_trigamma.cpp | 4 ++++
 test/test_trigamma.hpp | 6 ++++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index 3354a776f..d9e7c9296 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -21,3 +21,4 @@ run test_round.cpp ;
 run test_expm1_simple.cpp ;
 run test_log1p_simple.cpp ;
 run test_digamma_simple.cpp ;
+run test_trigamma.cpp ;
diff --git a/test/test_trigamma.cpp b/test/test_trigamma.cpp
index dd89898d7..85ba8078a 100644
--- a/test/test_trigamma.cpp
+++ b/test/test_trigamma.cpp
@@ -1,9 +1,13 @@
 //  (C) Copyright John Maddock 2014.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_trigamma.hpp"
 
 void expected_results()
diff --git a/test/test_trigamma.hpp b/test/test_trigamma.hpp
index 94a129032..49b1bd550 100644
--- a/test/test_trigamma.hpp
+++ b/test/test_trigamma.hpp
@@ -1,4 +1,5 @@
-// Copyright John Maddock 2014
+//  Copyright John Maddock 2014
+//  Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,9 +10,10 @@
 #include <boost/math/special_functions/math_fwd.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
+#include <boost/math/special_functions/trigamma.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>

From 48f8734b4cfddf3bbc05e1b564af018c0a1d6cce Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:40:14 -0400
Subject: [PATCH 18/37] Add sycl testing of pow

---
 test/pow_test.cpp | 5 ++++-
 test/sycl_jamfile | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/pow_test.cpp b/test/pow_test.cpp
index ce3d036ab..9180ccab5 100644
--- a/test/pow_test.cpp
+++ b/test/pow_test.cpp
@@ -2,6 +2,7 @@
 //  Tests the pow function
 
 //  (C) Copyright Bruno Lalande 2008.
+//  (C) Copyright Matt Borland 2024.
 //  Distributed under the Boost Software License, Version 1.0.
 //  (See accompanying file LICENSE_1_0.txt or copy at
 //  http://www.boost.org/LICENSE_1_0.txt)
@@ -11,7 +12,7 @@
 #include <iostream>
 
 #include <boost/math/concepts/real_concept.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
@@ -37,7 +38,9 @@ void test_pow(T base)
 
     if ((base == 0) && N < 0)
     {
+       #ifndef BOOST_MATH_NO_EXCEPTIONS 
        BOOST_MATH_CHECK_THROW(math::pow<N>(base), std::overflow_error);
+       #endif
     }
     else
     {
diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index d9e7c9296..c0c67d1e5 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -14,6 +14,7 @@ run test_arcsine.cpp ;
 run test_cauchy.cpp ;
 
 # Special Functions
+run pow_test.cpp ;
 run test_beta_simple.cpp ;
 run test_cbrt.cpp ;
 run test_sign.cpp ;

From b51d31294fe50ca1c1683cad7b87da65b4fddead Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 11:56:12 -0400
Subject: [PATCH 19/37] Add CUDA sin_pi testing

---
 test/cuda_jamfile          |   2 +
 test/test_sin_pi_double.cu | 100 +++++++++++++++++++++++++++++++++++++
 test/test_sin_pi_float.cu  | 100 +++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 test/test_sin_pi_double.cu
 create mode 100644 test/test_sin_pi_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 490be5772..97102bcb1 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -49,6 +49,8 @@ run test_modf_double.cu ;
 run test_modf_float.cu ;
 run test_round_double.cu ;
 run test_round_float.cu ;
+run test_sin_pi_double.cu ; 
+run test_sin_pi_float.cu ;
 run test_trigamma_double.cu ;
 run test_trigamma_float.cu ;
 run test_trunc_double.cu ;
diff --git a/test/test_sin_pi_double.cu b/test/test_sin_pi_double.cu
new file mode 100644
index 000000000..0783d5536
--- /dev/null
+++ b/test/test_sin_pi_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sin_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sin_pi_float.cu b/test/test_sin_pi_float.cu
new file mode 100644
index 000000000..9a9f07580
--- /dev/null
+++ b/test/test_sin_pi_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sin_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From e4519a7b5191d421cef890fb51ba9ff6958330fa Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 13:15:47 -0400
Subject: [PATCH 20/37] Pass integral_constant directly instead of creating
 pointer

---
 .../boost/math/special_functions/trigamma.hpp | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index 73dbc998b..5d10135b9 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -11,6 +11,7 @@
 #pragma once
 #endif
 
+#include <type_traits>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/rational.hpp>
@@ -39,7 +40,13 @@ template<class T, class Policy>
 T polygamma_imp(const int n, T x, const Policy &pol);
 
 template <class T, class Policy>
-BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
+T trigamma_prec(T x, const Policy& pol, const std::integral_constant<int, 0>&)
+{
+   return polygamma_imp(1, x, pol);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant<int, 53>&)
 {
    // Max error in interpolated form: 3.736e-017
    constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
@@ -112,7 +119,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 53
 }
 
 template <class T, class Policy>
-BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant<int, 64>&)
 {
    // Max error in interpolated form: 1.178e-020
    constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
@@ -190,7 +197,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant<int, 64
 }
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
+T trigamma_prec(T x, const Policy&, const std::integral_constant<int, 113>&)
 {
    // Max error in interpolated form: 1.916e-035
 
@@ -358,8 +365,8 @@ T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
    return (1 + tools::evaluate_polynomial(P_16_inf, y) / tools::evaluate_polynomial(Q_16_inf, y)) / x;
 }
 
-template <class T, class Tag, class Policy>
-BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol)
+template <class T, class Policy, class Tag>
+BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& tag)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -388,21 +395,16 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol)
          return policies::raise_pole_error<T>("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol);
       }
       T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol);
-      return result - trigamma_prec(z, t, pol) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
+      return result - trigamma_prec(z, pol, tag) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
    }
    if(x < 1)
    {
       result = 1 / (x * x);
       x += 1;
    }
-   return result + trigamma_prec(x, t, pol);
+   return result + trigamma_prec(x, pol, tag);
 }
 
-template <class T, class Policy>
-T trigamma_imp(T x, const std::integral_constant<int, 0>*, const Policy& pol)
-{
-   return polygamma_imp(1, x, pol);
-}
 //
 // Initializer: ensure all our constants are initialized prior to the first call of main:
 //
@@ -462,7 +464,8 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
 
    return policies::checked_narrowing_cast<result_type, Policy>(detail::trigamma_dispatch(
       static_cast<value_type>(x),
-      static_cast<const tag_type*>(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)");
+      forwarding_policy(),
+      tag_type()), "boost::math::trigamma<%1%>(%1%)");
 }
 
 template <class T>

From 20f17d48819ae865d68283c88da89573a0743022 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 14:29:16 -0400
Subject: [PATCH 21/37] Add missing include guards for numeric limits

---
 include/boost/math/tools/numeric_limits.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp
index 0ca02812a..acfd476e4 100644
--- a/include/boost/math/tools/numeric_limits.hpp
+++ b/include/boost/math/tools/numeric_limits.hpp
@@ -8,6 +8,9 @@
 //  and libcu++ does not provide something analogous.
 //  Rather than using giant if else blocks make our own version of numeric limits
 
+#ifndef BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP
+#define BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP
+
 #include <boost/math/tools/config.hpp>
 #include <type_traits>
 #include <limits>
@@ -115,3 +118,5 @@ struct numeric_limits<double>
 
 } // namespace math
 } // namespace boost
+
+#endif

From 85e2e4c0f4a96cfd07731d680f5391d1ab994396 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 14:29:56 -0400
Subject: [PATCH 22/37] Make cos_pi GPU capable

---
 include/boost/math/special_functions/cos_pi.hpp   | 12 +++++++-----
 include/boost/math/special_functions/math_fwd.hpp |  6 +++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/boost/math/special_functions/cos_pi.hpp b/include/boost/math/special_functions/cos_pi.hpp
index e09700ec5..5a7c742b4 100644
--- a/include/boost/math/special_functions/cos_pi.hpp
+++ b/include/boost/math/special_functions/cos_pi.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2007 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,8 +13,9 @@
 
 #include <cmath>
 #include <limits>
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/promotion.hpp>
 #include <boost/math/constants/constants.hpp>
@@ -21,7 +23,7 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-T cos_pi_imp(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED T cos_pi_imp(T x, const Policy&)
 {
    BOOST_MATH_STD_USING // ADL of std names
    // cos of pi*x:
@@ -34,7 +36,7 @@ T cos_pi_imp(T x, const Policy&)
       x = -x;
    }
    T rem = floor(x);
-   if(abs(floor(rem/2)*2 - rem) > std::numeric_limits<T>::epsilon())
+   if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits<T>::epsilon())
    {
       invert = !invert;
    }
@@ -60,7 +62,7 @@ T cos_pi_imp(T x, const Policy&)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -77,7 +79,7 @@ inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type cos_pi(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cos_pi(T x)
 {
    return boost::math::cos_pi(x, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index 6c8b64507..5e8153734 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -864,10 +864,10 @@ namespace boost
    BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sin_pi(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> cos_pi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> cos_pi(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> cos_pi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> cos_pi(T x);
 
    template <class T>
    int fpclassify BOOST_NO_MACRO_EXPAND(T t);
@@ -1575,7 +1575,7 @@ template <class OutputIterator, class T>\
    BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\
 \
    using boost::math::fpclassify;\
    using boost::math::isfinite;\

From 3c598df2413707b2f7f6fa4f38e6076711059150 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 14:30:08 -0400
Subject: [PATCH 23/37] Add cos_pi CUDA testing

---
 test/cuda_jamfile          |   2 +
 test/test_cos_pi_double.cu | 100 +++++++++++++++++++++++++++++++++++++
 test/test_cos_pi_float.cu  | 100 +++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 test/test_cos_pi_double.cu
 create mode 100644 test/test_cos_pi_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 97102bcb1..709289389 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -39,6 +39,8 @@ run test_cbrt_double.cu ;
 run test_cbrt_float.cu ;
 run test_changesign_double.cu ;
 run test_changesign_float.cu ;
+run test_cos_pi_double.cu ;
+run test_cos_pi_float.cu ;
 run test_digamma_double.cu ;
 run test_digamma_float.cu ;
 run test_expm1_double.cu ;
diff --git a/test/test_cos_pi_double.cu b/test/test_cos_pi_double.cu
new file mode 100644
index 000000000..5a66b25ce
--- /dev/null
+++ b/test/test_cos_pi_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cos_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cos_pi_float.cu b/test/test_cos_pi_float.cu
new file mode 100644
index 000000000..6a04d8e04
--- /dev/null
+++ b/test/test_cos_pi_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cos_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 502f04fe45ce7cce102adca5a8da102c231e72fa Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 14:49:34 -0400
Subject: [PATCH 24/37] Add all the integer types to numeric limits

---
 include/boost/math/tools/numeric_limits.hpp | 306 +++++++++++++++++++-
 1 file changed, 305 insertions(+), 1 deletion(-)

diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp
index acfd476e4..c49d50bd8 100644
--- a/include/boost/math/tools/numeric_limits.hpp
+++ b/include/boost/math/tools/numeric_limits.hpp
@@ -114,7 +114,311 @@ struct numeric_limits<double>
     BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min    () { return DBL_TRUE_MIN; }
 };
 
-#endif
+template <>
+struct numeric_limits<short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<short>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<short>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<short>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<short>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<short>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<short>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<short>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<short>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<short>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<short>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<short>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<short>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<short>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<short>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<short>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<short>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<short>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<short>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<short>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<short>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<short>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min)         () { return SHRT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max)         () { return SHRT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest        () { return SHRT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned short>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned short>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned short>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned short>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned short>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned short>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned short>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned short>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned short>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned short>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned short>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned short>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned short>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned short>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned short>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned short>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned short>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned short>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned short>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned short>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned short>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max)         () { return USHRT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<int>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<int>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<int>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<int>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<int>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<int>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<int>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<int>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<int>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<int>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<int>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<int>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<int>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<int>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<int>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<int>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<int>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<int>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<int>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<int>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<int>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min)         () { return INT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max)         () { return INT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest        () { return INT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned int>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned int>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned int>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned int>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned int>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned int>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned int>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned int>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned int>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned int>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned int>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned int>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned int>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned int>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned int>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned int>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned int>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned int>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned int>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned int>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned int>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max)         () { return UINT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min)         () { return LONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max)         () { return LONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest        () { return LONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max)         () { return ULONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<long long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<long long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<long long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<long long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<long long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<long long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<long long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<long long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<long long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<long long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<long long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<long long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<long long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<long long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<long long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<long long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<long long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min)         () { return LLONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max)         () { return LLONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest        () { return LLONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned long long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned long long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned long long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned long long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max)         () { return ULLONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min    () { return 0; }
+};
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
 
 } // namespace math
 } // namespace boost

From 8a5796aead8e1ee7e95bf84acc3777b3e1d9c69e Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 14:51:37 -0400
Subject: [PATCH 25/37] Simplify error handling using the new numeric limits

---
 .../boost/math/policies/error_handling.hpp    | 76 ++++---------------
 1 file changed, 15 insertions(+), 61 deletions(-)

diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp
index 36ec8d9d6..f3811d919 100644
--- a/include/boost/math/policies/error_handling.hpp
+++ b/include/boost/math/policies/error_handling.hpp
@@ -1,6 +1,6 @@
 //  Copyright John Maddock 2007.
 //  Copyright Paul A. Bristow 2007.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,6 +9,7 @@
 #define BOOST_MATH_POLICY_ERROR_HANDLING_HPP
 
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <iomanip>
 #include <string>
 #include <cstring>
@@ -304,7 +305,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
 #else
    raise_error<std::overflow_error, T>(function, message ? message : "numeric overflow");
    // We should never get here:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 #endif
 }
 
@@ -320,7 +321,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
 #else
    raise_error<std::overflow_error, T>(function, message ? message : "numeric overflow", val);
    // We should never get here:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 #endif
 }
 
@@ -332,7 +333,7 @@ BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 #ifdef BOOST_MATH_HAS_GPU_SUPPORT
@@ -370,7 +371,7 @@ BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
@@ -382,7 +383,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
@@ -395,7 +396,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
@@ -404,7 +405,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* message,
            const  ::boost::math::policies::overflow_error< ::boost::math::policies::user_error>&)
 {
-   return user_overflow_error(function, message, std::numeric_limits<T>::infinity());
+   return user_overflow_error(function, message, boost::math::numeric_limits<T>::infinity());
 }
 
 template <class T>
@@ -418,7 +419,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
    std::string sval = prec_format(val);
    replace_all_in_string(m, "%1%", sval.c_str());
 
-   return user_overflow_error(function, m.c_str(), std::numeric_limits<T>::infinity());
+   return user_overflow_error(function, m.c_str(), boost::math::numeric_limits<T>::infinity());
 }
 
 template <class T>
@@ -597,55 +598,8 @@ BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   static_assert(std::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
-   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
-   return  val > 0 ? (std::numeric_limits<TargetType>::max)() : (std::numeric_limits<TargetType>::is_integer ? (std::numeric_limits<TargetType>::min)() : -(std::numeric_limits<TargetType>::max)());
-   #else
-   BOOST_IF_CONSTEXPR (std::is_same<TargetType, short>::value)
-   {
-      return val > 0 ? SHRT_MAX : SHRT_MIN;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, int>::value)
-   {
-      return val > 0 ? INT_MAX : INT_MIN;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, long>::value)
-   {
-      return val > 0 ? LONG_MAX : LONG_MIN;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, long long>::value)
-   {
-      return val > 0 ? LLONG_MAX : LLONG_MIN;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned short>::value)
-   {
-      return val > 0 ? USHRT_MAX : static_cast<unsigned short>(0U);
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned>::value)
-   {
-      return val > 0 ? UINT_MAX : 0U;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned long>::value)
-   {
-      return val > 0 ? ULONG_MAX : 0UL;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned long long>::value)
-   {
-      return val > 0 ? ULLONG_MAX : 0ULL;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, float>::value)
-   {
-      return val > 0 ? FLT_MAX : -FLT_MAX;
-   }
-   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, double>::value)
-   {
-      return val > 0 ? DBL_MAX : -DBL_MAX;
-   }
-   else
-   {
-      return val > 0 ? static_cast<TargetType>(-1) : static_cast<TargetType>(1);
-   }
-   #endif
+   static_assert(boost::math::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   return  val > 0 ? (boost::math::numeric_limits<TargetType>::max)() : (boost::math::numeric_limits<TargetType>::is_integer ? (boost::math::numeric_limits<TargetType>::min)() : -(boost::math::numeric_limits<TargetType>::max)());
 }
 
 template <class T, class TargetType>
@@ -659,8 +613,8 @@ BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   static_assert(std::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
-   return  val > 0 ? (std::numeric_limits<TargetType>::max)() : (std::numeric_limits<TargetType>::is_integer ? (std::numeric_limits<TargetType>::min)() : -(std::numeric_limits<TargetType>::max)());
+   static_assert(boost::math::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   return  val > 0 ? (boost::math::numeric_limits<TargetType>::max)() : (boost::math::numeric_limits<TargetType>::is_integer ? (boost::math::numeric_limits<TargetType>::min)() : -(boost::math::numeric_limits<TargetType>::max)());
 }
 template <class T, class TargetType>
 BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error(
@@ -686,7 +640,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error(
 #else
    raise_error<std::domain_error, T>(function, message, val);
    // we never get here:
-   return std::numeric_limits<T>::quiet_NaN();
+   return boost::math::numeric_limits<T>::quiet_NaN();
 #endif
 }
 

From 0210fa39bc9d2534ddfa8980d5c891c8a9b61031 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:26:40 -0400
Subject: [PATCH 26/37] Add GPU support to derived accessors

---
 .../detail/derived_accessors.hpp              | 32 ++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/boost/math/distributions/detail/derived_accessors.hpp b/include/boost/math/distributions/detail/derived_accessors.hpp
index eb76409a1..9d39f2d59 100644
--- a/include/boost/math/distributions/detail/derived_accessors.hpp
+++ b/include/boost/math/distributions/detail/derived_accessors.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -29,6 +30,7 @@
 
 #include <cmath>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/config.hpp>
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -39,24 +41,24 @@
 namespace boost{ namespace math{
 
 template <class Distribution>
-typename Distribution::value_type variance(const Distribution& dist);
+BOOST_MATH_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist);
 
 template <class Distribution>
-inline typename Distribution::value_type standard_deviation(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type standard_deviation(const Distribution& dist)
 {
    BOOST_MATH_STD_USING  // ADL of sqrt.
    return sqrt(variance(dist));
 }
 
 template <class Distribution>
-inline typename Distribution::value_type variance(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type variance(const Distribution& dist)
 {
    typename Distribution::value_type result = standard_deviation(dist);
    return result * result;
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x)
 { // hazard function
   // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ
    typedef typename Distribution::value_type value_type;
@@ -75,7 +77,7 @@ inline typename Distribution::value_type hazard(const Distribution& dist, const
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x)
 { // cumulative hazard function.
   // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ
    BOOST_MATH_STD_USING
@@ -83,7 +85,7 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea
 }
 
 template <class Distribution>
-inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist)
 {
    typedef typename Distribution::value_type value_type;
    typedef typename Distribution::policy_type policy_type;
@@ -104,33 +106,33 @@ inline typename Distribution::value_type coefficient_of_variation(const Distribu
 // implementation with all arguments of the same type:
 //
 template <class Distribution, class RealType>
-inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return pdf(dist, static_cast<value_type>(x));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x)
 {
    using std::log;
    typedef typename Distribution::value_type value_type;
    return log(pdf(dist, static_cast<value_type>(x)));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return cdf(dist, static_cast<value_type>(x));
 }
 template <class Distribution, class Realtype>
-inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x)
 {
    using std::log;
    using value_type = typename Distribution::value_type;
    return log(cdf(dist, static_cast<value_type>(x)));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return quantile(dist, static_cast<value_type>(x));
@@ -144,14 +146,14 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea
 }
 */
 template <class Distribution, class RealType>
-inline typename Distribution::value_type cdf(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const complemented2_type<Distribution, RealType>& c)
 {
    typedef typename Distribution::value_type value_type;
    return cdf(complement(c.dist, static_cast<value_type>(c.param)));
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type logcdf(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const complemented2_type<Distribution, RealType>& c)
 {
    using std::log;
    typedef typename Distribution::value_type value_type;
@@ -159,14 +161,14 @@ inline typename Distribution::value_type logcdf(const complemented2_type<Distrib
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type quantile(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const complemented2_type<Distribution, RealType>& c)
 {
    typedef typename Distribution::value_type value_type;
    return quantile(complement(c.dist, static_cast<value_type>(c.param)));
 }
 
 template <class Dist>
-inline typename Dist::value_type median(const Dist& d)
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type median(const Dist& d)
 { // median - default definition for those distributions for which a
   // simple closed form is not known,
   // and for which a domain_error and/or NaN generating function is NOT defined.

From 226e42ef09aefdef3b4ed620817a677e141fe9eb Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:26:54 -0400
Subject: [PATCH 27/37] Add GPU support to complement distributions

---
 .../boost/math/distributions/complement.hpp   | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp
index 5c062a7cd..c63b8a504 100644
--- a/include/boost/math/distributions/complement.hpp
+++ b/include/boost/math/distributions/complement.hpp
@@ -1,5 +1,6 @@
 //  (C) Copyright John Maddock 2006.
 //  (C) Copyright Paul A. Bristow 2006.
+//  (C) Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -7,6 +8,8 @@
 #ifndef BOOST_STATS_COMPLEMENT_HPP
 #define BOOST_STATS_COMPLEMENT_HPP
 
+#include <boost/math/tools/config.hpp>
+
 //
 // This code really defines our own tuple type.
 // It would be nice to reuse boost::math::tuple
@@ -19,7 +22,7 @@ namespace boost{ namespace math{
 template <class Dist, class RealType>
 struct complemented2_type
 {
-   complemented2_type(
+   BOOST_MATH_GPU_ENABLED complemented2_type(
       const Dist& d, 
       const RealType& p1)
       : dist(d), 
@@ -35,7 +38,7 @@ struct complemented2_type
 template <class Dist, class RealType1, class RealType2>
 struct complemented3_type
 {
-   complemented3_type(
+   BOOST_MATH_GPU_ENABLED complemented3_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2)
@@ -53,7 +56,7 @@ struct complemented3_type
 template <class Dist, class RealType1, class RealType2, class RealType3>
 struct complemented4_type
 {
-   complemented4_type(
+   BOOST_MATH_GPU_ENABLED complemented4_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -74,7 +77,7 @@ struct complemented4_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4>
 struct complemented5_type
 {
-   complemented5_type(
+   BOOST_MATH_GPU_ENABLED complemented5_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -98,7 +101,7 @@ struct complemented5_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5>
 struct complemented6_type
 {
-   complemented6_type(
+   BOOST_MATH_GPU_ENABLED complemented6_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -125,7 +128,7 @@ struct complemented6_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5, class RealType6>
 struct complemented7_type
 {
-   complemented7_type(
+   BOOST_MATH_GPU_ENABLED complemented7_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -153,37 +156,37 @@ struct complemented7_type
 };
 
 template <class Dist, class RealType>
-inline complemented2_type<Dist, RealType> complement(const Dist& d, const RealType& r)
+BOOST_MATH_GPU_ENABLED inline complemented2_type<Dist, RealType> complement(const Dist& d, const RealType& r)
 {
    return complemented2_type<Dist, RealType>(d, r);
 }
 
 template <class Dist, class RealType1, class RealType2>
-inline complemented3_type<Dist, RealType1, RealType2> complement(const Dist& d, const RealType1& r1, const RealType2& r2)
+BOOST_MATH_GPU_ENABLED inline complemented3_type<Dist, RealType1, RealType2> complement(const Dist& d, const RealType1& r1, const RealType2& r2)
 {
    return complemented3_type<Dist, RealType1, RealType2>(d, r1, r2);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3>
-inline complemented4_type<Dist, RealType1, RealType2, RealType3> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3)
+BOOST_MATH_GPU_ENABLED inline complemented4_type<Dist, RealType1, RealType2, RealType3> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3)
 {
    return complemented4_type<Dist, RealType1, RealType2, RealType3>(d, r1, r2, r3);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4>
-inline complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4)
+BOOST_MATH_GPU_ENABLED inline complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4)
 {
    return complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4>(d, r1, r2, r3, r4);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5>
-inline complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5)
+BOOST_MATH_GPU_ENABLED inline complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5)
 {
    return complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5>(d, r1, r2, r3, r4, r5);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5, class RealType6>
-inline complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6)
+BOOST_MATH_GPU_ENABLED inline complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6)
 {
    return complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6>(d, r1, r2, r3, r4, r5, r6);
 }

From fcbcf046e8333207188443d92c08f8418b7aefe7 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:27:09 -0400
Subject: [PATCH 28/37] Add GPU support to the bernoulli distribution

---
 .../boost/math/distributions/bernoulli.hpp    | 43 ++++++++++---------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/boost/math/distributions/bernoulli.hpp b/include/boost/math/distributions/bernoulli.hpp
index cce209a6f..bb7dd22a5 100644
--- a/include/boost/math/distributions/bernoulli.hpp
+++ b/include/boost/math/distributions/bernoulli.hpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2007.
+// Copyright Matt Borland 2024.
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -43,7 +44,7 @@ namespace boost
     {
       // Common error checking routines for bernoulli distribution functions:
       template <class RealType, class Policy>
-      inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
       {
         if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1))
         {
@@ -55,23 +56,23 @@ namespace boost
         return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&)
       {
         return check_success_fraction(function, p, result, Policy());
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&)
       {
          return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
       {
          return check_dist(function, p, result, Policy(), typename policies::constructor_error_check<Policy>::type());
       }
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
       {
         if(check_dist(function, p, result, Policy(), typename policies::method_error_check<Policy>::type()) == false)
         {
@@ -87,7 +88,7 @@ namespace boost
        return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */)
       {
         if((check_dist(function, p, result, Policy(), typename policies::method_error_check<Policy>::type()) && detail::check_probability(function, prob, result, Policy())) == false)
         {
@@ -105,7 +106,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      bernoulli_distribution(RealType p = 0.5) : m_p(p)
+      BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p = 0.5) : m_p(p)
       { // Default probability = half suits 'fair' coin tossing
         // where probability of heads == probability of tails.
         RealType result; // of checks.
@@ -115,7 +116,7 @@ namespace boost
           &result, Policy());
       } // bernoulli_distribution constructor.
 
-      RealType success_fraction() const
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const
       { // Probability.
         return m_p;
       }
@@ -132,21 +133,21 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const bernoulli_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const std::pair<RealType, RealType> range(const bernoulli_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable k = {0, 1}.
       using boost::math::tools::max_value;
       return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const bernoulli_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const std::pair<RealType, RealType> support(const bernoulli_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable k = {0, 1}.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
       return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const bernoulli_distribution<RealType, Policy>& dist)
     { // Mean of bernoulli distribution = p (n = 1).
       return dist.success_fraction();
     } // mean
@@ -159,13 +160,13 @@ namespace boost
     //} // median
 
     template <class RealType, class Policy>
-    inline RealType variance(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const bernoulli_distribution<RealType, Policy>& dist)
     { // Variance of bernoulli distribution =p * q.
       return  dist.success_fraction() * (1 - dist.success_fraction());
     } // variance
 
     template <class RealType, class Policy>
-    RealType pdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED RealType pdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
     { // Probability Density/Mass Function.
       BOOST_FPU_EXCEPTION_GUARD
       // Error check:
@@ -190,7 +191,7 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function Bernoulli.
       RealType p = dist.success_fraction();
       // Error check:
@@ -214,7 +215,7 @@ namespace boost
     } // bernoulli cdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function bernoulli.
       RealType const& k = c.param;
       bernoulli_distribution<RealType, Policy> const& dist = c.dist;
@@ -240,7 +241,7 @@ namespace boost
     } // bernoulli cdf complement
 
     template <class RealType, class Policy>
-    inline RealType quantile(const bernoulli_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const bernoulli_distribution<RealType, Policy>& dist, const RealType& p)
     { // Quantile or Percent Point Bernoulli function.
       // Return the number of expected successes k either 0 or 1.
       // for a given probability p.
@@ -265,7 +266,7 @@ namespace boost
     } // quantile
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
     { // Quantile or Percent Point bernoulli function.
       // Return the number of expected successes k for a given
       // complement of the probability q.
@@ -294,13 +295,13 @@ namespace boost
     } // quantile complemented.
 
     template <class RealType, class Policy>
-    inline RealType mode(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const bernoulli_distribution<RealType, Policy>& dist)
     {
       return static_cast<RealType>((dist.success_fraction() <= 0.5) ? 0 : 1); // p = 0.5 can be 0 or 1
     }
 
     template <class RealType, class Policy>
-    inline RealType skewness(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const bernoulli_distribution<RealType, Policy>& dist)
     {
       BOOST_MATH_STD_USING; // Aid ADL for sqrt.
       RealType p = dist.success_fraction();
@@ -308,7 +309,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const bernoulli_distribution<RealType, Policy>& dist)
     {
       RealType p = dist.success_fraction();
       // Note Wolfram says this is kurtosis in text, but gamma2 is the kurtosis excess,
@@ -319,7 +320,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const bernoulli_distribution<RealType, Policy>& dist)
     {
       RealType p = dist.success_fraction();
       return 1 / (1 - p) + 1/p -6 + 3;

From 6bf19ff85918dfc97ac676810cd9f6eb7a48bb90 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:27:26 -0400
Subject: [PATCH 29/37] Add SYCL testing of bernoulli distribution

---
 test/sycl_jamfile       |  1 +
 test/test_bernoulli.cpp | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index c0c67d1e5..92580c93b 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -11,6 +11,7 @@ project : requirements
 
 # Distributions
 run test_arcsine.cpp ;
+run test_bernoulli.cpp ;
 run test_cauchy.cpp ;
 
 # Special Functions
diff --git a/test/test_bernoulli.cpp b/test/test_bernoulli.cpp
index d8c663399..8513cec36 100644
--- a/test/test_bernoulli.cpp
+++ b/test/test_bernoulli.cpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2006.
 // Copyright  Paul A. Bristow 2007, 2012.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -22,7 +23,7 @@
 
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 
 #include <boost/math/distributions/bernoulli.hpp> // for bernoulli_distribution
 using boost::math::bernoulli_distribution;
@@ -74,6 +75,7 @@ void test_spots(RealType)
   BOOST_CHECK_EQUAL(bernoulli_distribution<RealType>(static_cast<RealType>(0.1L)).success_fraction(), static_cast<RealType>(0.1L));
   BOOST_CHECK_EQUAL(bernoulli_distribution<RealType>(static_cast<RealType>(0.9L)).success_fraction(), static_cast<RealType>(0.9L));
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
   BOOST_MATH_CHECK_THROW( // Constructor success_fraction outside 0 to 1.
        bernoulli_distribution<RealType>(static_cast<RealType>(2)), std::domain_error);
   BOOST_MATH_CHECK_THROW(
@@ -86,7 +88,8 @@ void test_spots(RealType)
   BOOST_MATH_CHECK_THROW(
        pdf( // pdf k neither 0 nor 1.
           bernoulli_distribution<RealType>(static_cast<RealType>(0.25L)), static_cast<RealType>(2)), std::domain_error);
- 
+#endif
+
   BOOST_CHECK_EQUAL(
     pdf( // OK k (or n)
     bernoulli_distribution<RealType>(static_cast<RealType>(0.5L)), static_cast<RealType>(0)),
@@ -134,6 +137,7 @@ void test_spots(RealType)
        static_cast<RealType>(5.11111111111111111111111111111111111111111111L),
        tolerance);
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
   BOOST_MATH_CHECK_THROW(
      quantile(
         bernoulli_distribution<RealType>(static_cast<RealType>(2)), // prob >1
@@ -154,6 +158,7 @@ void test_spots(RealType)
         bernoulli_distribution<RealType>(static_cast<RealType>(0.5L)), // k < 0
         static_cast<RealType>(2)), std::domain_error
      );
+#endif
 
   BOOST_CHECK_CLOSE_FRACTION(
      cdf(
@@ -217,6 +222,7 @@ void test_spots(RealType)
 
    // Checks for 'bad' parameters.
    // Construction.
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_MATH_CHECK_THROW(bernoulli_distribution<RealType>(-1), std::domain_error); // p outside 0 to 1.
    BOOST_MATH_CHECK_THROW(bernoulli_distribution<RealType>(+2), std::domain_error); // p outside 0 to 1.
 
@@ -269,7 +275,7 @@ void test_spots(RealType)
      BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf
      BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf
    } // has_infinity
-
+   #endif
 } // template <class RealType>void test_spots(RealType)
 
 BOOST_AUTO_TEST_CASE( test_main )
@@ -302,7 +308,9 @@ BOOST_AUTO_TEST_CASE( test_main )
   // (Parameter value, arbitrarily zero, only communicates the floating point type).
   test_spots(0.0F); // Test float.
   test_spots(0.0); // Test double.
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
   test_spots(0.0L); // Test long double.
+#endif
 #if !BOOST_WORKAROUND(BOOST_BORLANDC, BOOST_TESTED_AT(0x582)) && !defined(BOOST_MATH_NO_REAL_CONCEPT_TESTS)
   test_spots(boost::math::concepts::real_concept(0.)); // Test real concept.
 #endif

From 0e7ab64840283cb4b5e10fd8f2fa62f9222d1b8c Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:45:21 -0400
Subject: [PATCH 30/37] Add bernoulli cdf CUDA testing

---
 test/cuda_jamfile                 |   2 +
 test/test_bernoulli_cdf_double.cu | 110 ++++++++++++++++++++++++++++++
 test/test_bernoulli_cdf_float.cu  | 110 ++++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+)
 create mode 100644 test/test_bernoulli_cdf_double.cu
 create mode 100644 test/test_bernoulli_cdf_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 709289389..5741b61dc 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -23,6 +23,8 @@ run test_arcsine_pdf_double.cu ;
 run test_arcsine_pdf_float.cu ;
 run test_arcsine_quan_double.cu ;
 run test_arcsine_quan_float.cu ;
+run test_bernoulli_cdf_double.cu ;
+run test_bernoulli_cdf_float.cu ;
 run test_binomial.cpp ;
 run test_cauchy_cdf_double.cu ;
 run test_cauchy_cdf_float.cu ;
diff --git a/test/test_bernoulli_cdf_double.cu b/test/test_bernoulli_cdf_double.cu
new file mode 100644
index 000000000..e4c21ca06
--- /dev/null
+++ b/test/test_bernoulli_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_cdf_float.cu b/test/test_bernoulli_cdf_float.cu
new file mode 100644
index 000000000..82c0eabc0
--- /dev/null
+++ b/test/test_bernoulli_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}

From 044f164fb0d61eb5aa257f7fde8fcdb1dfdf5407 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Thu, 25 Jul 2024 15:47:15 -0400
Subject: [PATCH 31/37] Add bernoulli pdf CUDA testing

---
 test/cuda_jamfile                 |   2 +
 test/test_bernoulli_pdf_double.cu | 110 ++++++++++++++++++++++++++++++
 test/test_bernoulli_pdf_float.cu  | 110 ++++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+)
 create mode 100644 test/test_bernoulli_pdf_double.cu
 create mode 100644 test/test_bernoulli_pdf_float.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 5741b61dc..90ecd3c16 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -25,6 +25,8 @@ run test_arcsine_quan_double.cu ;
 run test_arcsine_quan_float.cu ;
 run test_bernoulli_cdf_double.cu ;
 run test_bernoulli_cdf_float.cu ;
+run test_bernoulli_pdf_double.cu ;
+run test_bernoulli_pdf_float.cu ;
 run test_binomial.cpp ;
 run test_cauchy_cdf_double.cu ;
 run test_cauchy_cdf_float.cu ;
diff --git a/test/test_bernoulli_pdf_double.cu b/test/test_bernoulli_pdf_double.cu
new file mode 100644
index 000000000..24b33c16c
--- /dev/null
+++ b/test/test_bernoulli_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_pdf_float.cu b/test/test_bernoulli_pdf_float.cu
new file mode 100644
index 000000000..08d2ca5a0
--- /dev/null
+++ b/test/test_bernoulli_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}

From 8e696df7a289e108ce28a3f67b0a06c99299f77b Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 11:08:54 -0400
Subject: [PATCH 32/37] Add bool to numeric limits for completeness

---
 include/boost/math/tools/numeric_limits.hpp | 38 +++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp
index c49d50bd8..7d3d5db26 100644
--- a/include/boost/math/tools/numeric_limits.hpp
+++ b/include/boost/math/tools/numeric_limits.hpp
@@ -418,6 +418,44 @@ struct numeric_limits<unsigned long long>
     BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min    () { return 0; }
 };
 
+template <>
+struct numeric_limits<bool>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<bool>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<bool>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<bool>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<bool>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<bool>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<bool>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<bool>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<bool>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<bool>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<bool>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<bool>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<bool>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<bool>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<bool>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<bool>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<bool>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<bool>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<bool>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<bool>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<bool>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<bool>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min)         () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max)         () { return true; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest        () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon       () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error   () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity      () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN     () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min    () { return false; }
+};
+
 #endif // BOOST_MATH_HAS_GPU_SUPPORT
 
 } // namespace math

From f8a0f5485867c7f0646d96283015fe9218ad59e1 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 11:54:29 -0400
Subject: [PATCH 33/37] Fix test poisson ambiguity with numeric_limits

---
 test/test_poisson.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_poisson.cpp b/test/test_poisson.cpp
index 9b75ce162..73de4751e 100644
--- a/test/test_poisson.cpp
+++ b/test/test_poisson.cpp
@@ -53,12 +53,12 @@ void test_spots(RealType)
    // guaranteed for type RealType, eg 6 for float, 15 for double,
    // expressed as a percentage (so -2) for BOOST_CHECK_CLOSE,
 
-   int decdigits = numeric_limits<RealType>::digits10;
+   int decdigits = std::numeric_limits<RealType>::digits10;
   // May eb >15 for 80 and 128-bit FP types.
   if (decdigits <= 0)
   { // decdigits is not defined, for example real concept,
     // so assume precision of most test data is double (for example, MathCAD).
-     decdigits = numeric_limits<double>::digits10; // == 15 for 64-bit
+     decdigits = std::numeric_limits<double>::digits10; // == 15 for 64-bit
   }
   if (decdigits > 15 ) // numeric_limits<double>::digits10)
   { // 15 is the accuracy of the MathCAD test data.
@@ -644,7 +644,7 @@ BOOST_AUTO_TEST_CASE( test_main )
   test_spots(0.0); // Test double.
 #endif
 #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
-  if (numeric_limits<long double>::digits10 > numeric_limits<double>::digits10)
+  if (std::numeric_limits<long double>::digits10 > std::numeric_limits<double>::digits10)
   { // long double is better than double (so not MSVC where they are same).
 #ifdef TEST_LDOUBLE
      test_spots(0.0L); // Test long double.

From 746f939aec102b75765ad539d73c016d14c612f5 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 12:00:03 -0400
Subject: [PATCH 34/37] Fix header paths

---
 test/test_bernoulli_constants.cpp | 2 +-
 test/test_float_io.cpp            | 2 +-
 test/test_owens_t.cpp             | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/test_bernoulli_constants.cpp b/test/test_bernoulli_constants.cpp
index 6d73d82a4..f5ae15a3e 100644
--- a/test/test_bernoulli_constants.cpp
+++ b/test/test_bernoulli_constants.cpp
@@ -10,7 +10,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/bernoulli.hpp>
-#include <libs/math/test/table_type.hpp>
+#include "table_type.hpp"
 #include <boost/math/tools/test.hpp>
 #include <iostream>
 #include <iomanip>
diff --git a/test/test_float_io.cpp b/test/test_float_io.cpp
index 107cc39d4..e1b5c67a1 100644
--- a/test/test_float_io.cpp
+++ b/test/test_float_io.cpp
@@ -301,7 +301,7 @@ void test()
          std::ios_base::fixed | std::ios_base::showpos}};
 
    std::array<std::array<const char*, 13 * 9>, 40> string_data = {{
-#include "libs/math/test/string_data.ipp"
+#include "string_data.ipp"
    }};
 
    double num   = 123456789.0;
diff --git a/test/test_owens_t.cpp b/test/test_owens_t.cpp
index 8c33e77f0..11389dd20 100644
--- a/test/test_owens_t.cpp
+++ b/test/test_owens_t.cpp
@@ -38,9 +38,9 @@ using boost::math::owens_t;
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/array.hpp>
 
-#include "libs/math/test/handle_test_result.hpp"
-#include "libs/math/test/table_type.hpp"
-#include "libs/math/test/functor.hpp"
+#include "handle_test_result.hpp"
+#include "table_type.hpp"
+#include "functor.hpp"
 #include "boost/math/tools/test_value.hpp"
 #include "test_owens_t.hpp"
 

From 2eaca5f6bd95cbcf4c53565b1d25ce53410a2f69 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 13:36:07 -0400
Subject: [PATCH 35/37] Fix invalid constexpr variables

---
 .../boost/math/special_functions/digamma.hpp   | 18 +++++++++---------
 .../boost/math/special_functions/sin_pi.hpp    |  8 ++++----
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp
index 98c939237..bde80f7dd 100644
--- a/include/boost/math/special_functions/digamma.hpp
+++ b/include/boost/math/special_functions/digamma.hpp
@@ -327,11 +327,11 @@ BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const std::integral_constant<int,
    // At double precision, max error found:  2.452e-17
    //
    // LCOV_EXCL_START
-   constexpr float Y = 0.99558162689208984F;
+   BOOST_MATH_STATIC const float Y = 0.99558162689208984F;
 
-   constexpr T root1 = T(1569415565) / 1073741824uL;
-   constexpr T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
-   constexpr T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
+   BOOST_MATH_STATIC const T root1 = T(1569415565) / 1073741824uL;
+   BOOST_MATH_STATIC const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
+   BOOST_MATH_STATIC const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
 
    BOOST_MATH_STATIC const T P[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.25479851061131551),
@@ -378,16 +378,16 @@ BOOST_MATH_GPU_ENABLED inline T digamma_imp_1_2(T x, const std::integral_constan
    // At float precision, max error found:  2.008725e-008
    //
    // LCOV_EXCL_START
-   constexpr float Y = 0.99558162689208984f;
-   constexpr T root = 1532632.0f / 1048576;
-   constexpr T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
-   constexpr T P[] = {
+   BOOST_MATH_STATIC const float Y = 0.99558162689208984f;
+   BOOST_MATH_STATIC const T root = 1532632.0f / 1048576;
+   BOOST_MATH_STATIC const T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
+   BOOST_MATH_STATIC const T P[] = {
       0.25479851023250261e0f,
       -0.44981331915268368e0f,
       -0.43916936919946835e0f,
       -0.61041765350579073e-1f
    };
-   constexpr T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       0.1e1f,
       0.15890202430554952e1f,
       0.65341249856146947e0f,
diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp
index 3c558ab7b..acb5db29e 100644
--- a/include/boost/math/special_functions/sin_pi.hpp
+++ b/include/boost/math/special_functions/sin_pi.hpp
@@ -55,15 +55,15 @@ BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T sin_pi_dispatch(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sin_pi_dispatch(T x, const Policy& pol)
 {
-   if (x < 0)
+   if (x < T(0))
    {
-      return -sin_pi_imp(-x, pol);
+      return -sin_pi_imp(T(-x), pol);
    }
    else
    {
-      return sin_pi_imp(x, pol);
+      return sin_pi_imp(T(x), pol);
    }
 }
 

From 480df5f63d813fe4af160b53ebc90c9e9d076276 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 13:46:25 -0400
Subject: [PATCH 36/37] Fix new trigamma dispatch function and illegal
 constexpr

---
 include/boost/math/special_functions/trigamma.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index 5d10135b9..2ec6c5256 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -49,7 +49,7 @@ template <class T, class Policy>
 BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant<int, 53>&)
 {
    // Max error in interpolated form: 3.736e-017
-   constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
+   BOOST_MATH_STATIC const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
    BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045),
       BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321),
@@ -122,7 +122,7 @@ template <class T, class Policy>
 BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant<int, 64>&)
 {
    // Max error in interpolated form: 1.178e-020
-   constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
+   BOOST_MATH_STATIC const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
    BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341),
       BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052),
@@ -395,7 +395,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& ta
          return policies::raise_pole_error<T>("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol);
       }
       T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol);
-      return result - trigamma_prec(z, pol, tag) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
+      return result - trigamma_prec(T(z), pol, tag) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
    }
    if(x < 1)
    {

From bdc7c259af9b1aaa40183db26c7e89b94408e61c Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 26 Jul 2024 14:02:31 -0400
Subject: [PATCH 37/37] Speed up CI builds

---
 .github/workflows/cuda.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 53c551ee1..d57c681a4 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Build tests
         run: |
           cd ../boost-root/__build__
-          cmake --build . --target tests
+          cmake --build . --target tests -j $(nproc)
       - name: Run tests
         run: |
           cd ../boost-root/__build__
@@ -136,7 +136,7 @@ jobs:
       - name: Build tests
         run: |
           cd ../boost-root/__build__
-          cmake --build . --target tests
+          cmake --build . --target tests -j $(nproc)
       - name: Run tests
         run: |
           cd ../boost-root/__build__