diff --git a/include/boost/math/distributions/cauchy.hpp b/include/boost/math/distributions/cauchy.hpp
index d914cca77..15a015838 100644
--- a/include/boost/math/distributions/cauchy.hpp
+++ b/include/boost/math/distributions/cauchy.hpp
@@ -13,6 +13,7 @@
 #pragma warning(disable : 4127) // conditional expression is constant
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/distributions/complement.hpp>
@@ -30,7 +31,7 @@ namespace detail
 {
 
 template <class RealType, class Policy>
-RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealType& x, bool complement)
+BOOST_MATH_GPU_ENABLED RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealType& x, bool complement)
 {
    //
    // This calculates the cdf of the Cauchy distribution and/or its complement.
@@ -54,7 +55,7 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
    // to get the result.
    //
    BOOST_MATH_STD_USING // for ADL of std functions
-   static const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(cauchy<%1%>&, %1%)";
    RealType result = 0;
    RealType location = dist.location();
    RealType scale = dist.scale();
@@ -66,6 +67,16 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
    {
       return result;
    }
+   #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+   if(x > tools::max_value<RealType>())
+   {
+      return static_cast<RealType>((complement) ? 0 : 1);
+   }
+   if(x < -tools::max_value<RealType>())
+   {
+      return static_cast<RealType>((complement) ? 1 : 0);
+   }
+   #else
    if(std::numeric_limits<RealType>::has_infinity && x == std::numeric_limits<RealType>::infinity())
    { // cdf +infinity is unity.
      return static_cast<RealType>((complement) ? 0 : 1);
@@ -74,6 +85,7 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
    { // cdf -infinity is zero.
      return static_cast<RealType>((complement) ? 1 : 0);
    }
+   #endif
    if(false == detail::check_x(function, x, &result, Policy()))
    { // Catches x == NaN
       return result;
@@ -88,7 +100,7 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
 } // cdf
 
 template <class RealType, class Policy>
-RealType quantile_imp(
+BOOST_MATH_GPU_ENABLED RealType quantile_imp(
       const cauchy_distribution<RealType, Policy>& dist,
       const RealType& p,
       bool complement)
@@ -101,7 +113,7 @@ RealType quantile_imp(
    // mid-point of the distribution.  This is either added or subtracted
    // from the location parameter depending on whether `complement` is true.
    //
-   static const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(cauchy<%1%>&, %1%)";
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
@@ -151,20 +163,20 @@ class cauchy_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   cauchy_distribution(RealType l_location = 0, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType l_location = 0, RealType l_scale = 1)
       : m_a(l_location), m_hg(l_scale)
    {
-    static const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution";
+    constexpr auto function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution";
      RealType result;
      detail::check_location(function, l_location, &result, Policy());
      detail::check_scale(function, l_scale, &result, Policy());
    } // cauchy_distribution
 
-   RealType location()const
+   BOOST_MATH_GPU_ENABLED RealType location()const
    {
       return m_a;
    }
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_hg;
    }
@@ -184,13 +196,15 @@ cauchy_distribution(RealType,RealType)->cauchy_distribution<typename boost::math
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const cauchy_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline const std::pair<RealType, RealType> range(const cauchy_distribution<RealType, Policy>&)
 { // Range of permissible values for random variable x.
-  if (std::numeric_limits<RealType>::has_infinity)
+  #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+  BOOST_MATH_IF_CONSTEXPR (std::numeric_limits<RealType>::has_infinity)
   { 
      return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
+  #endif
   { // Can only use max_value.
    using boost::math::tools::max_value;
    return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
@@ -198,14 +212,16 @@ inline const std::pair<RealType, RealType> range(const cauchy_distribution<RealT
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const cauchy_distribution<RealType, Policy>& )
+BOOST_MATH_GPU_ENABLED inline const std::pair<RealType, RealType> support(const cauchy_distribution<RealType, Policy>& )
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-  if (std::numeric_limits<RealType>::has_infinity)
+  #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+  BOOST_MATH_IF_CONSTEXPR (std::numeric_limits<RealType>::has_infinity)
   { 
      return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
+  #endif
   { // Can only use max_value.
      using boost::math::tools::max_value;
      return std::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
@@ -213,19 +229,19 @@ inline const std::pair<RealType, RealType> support(const cauchy_distribution<Rea
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
 {  
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(cauchy<%1%>&, %1%)";
    RealType result = 0;
    RealType location = dist.location();
    RealType scale = dist.scale();
-   if(false == detail::check_scale("boost::math::pdf(cauchy<%1%>&, %1%)", scale, &result, Policy()))
+   if(false == detail::check_scale(function, scale, &result, Policy()))
    {
       return result;
    }
-   if(false == detail::check_location("boost::math::pdf(cauchy<%1%>&, %1%)", location, &result, Policy()))
+   if(false == detail::check_location(function, location, &result, Policy()))
    {
       return result;
    }
@@ -250,31 +266,31 @@ inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const Rea
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
 {
    return detail::cdf_imp(dist, x, false);
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const cauchy_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const cauchy_distribution<RealType, Policy>& dist, const RealType& p)
 {
    return detail::quantile_imp(dist, p, false);
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
 {
    return detail::cdf_imp(c.dist, c.param, true);
 } //  cdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
 {
    return detail::quantile_imp(c.dist, c.param, true);
 } // quantile complement
 
 template <class RealType, class Policy>
-inline RealType mean(const cauchy_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const cauchy_distribution<RealType, Policy>&)
 {  // There is no mean:
    typedef typename Policy::assert_undefined_type assert_type;
    static_assert(assert_type::value == 0, "assert type is undefined");
@@ -287,7 +303,7 @@ inline RealType mean(const cauchy_distribution<RealType, Policy>&)
 }
 
 template <class RealType, class Policy>
-inline RealType variance(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no variance:
    typedef typename Policy::assert_undefined_type assert_type;
@@ -301,18 +317,19 @@ inline RealType variance(const cauchy_distribution<RealType, Policy>& /*dist*/)
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const cauchy_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const cauchy_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const cauchy_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const cauchy_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
+
 template <class RealType, class Policy>
-inline RealType skewness(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no skewness:
    typedef typename Policy::assert_undefined_type assert_type;
@@ -326,7 +343,7 @@ inline RealType skewness(const cauchy_distribution<RealType, Policy>& /*dist*/)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no kurtosis:
    typedef typename Policy::assert_undefined_type assert_type;
@@ -340,7 +357,7 @@ inline RealType kurtosis(const cauchy_distribution<RealType, Policy>& /*dist*/)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no kurtosis excess:
    typedef typename Policy::assert_undefined_type assert_type;
@@ -354,7 +371,7 @@ inline RealType kurtosis_excess(const cauchy_distribution<RealType, Policy>& /*d
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const cauchy_distribution<RealType, Policy> & dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const cauchy_distribution<RealType, Policy> & dist)
 {
    using std::log;
    return log(2*constants::two_pi<RealType>()*dist.scale());
diff --git a/include/boost/math/distributions/detail/common_error_handling.hpp b/include/boost/math/distributions/detail/common_error_handling.hpp
index f03f2c49b..9922396d5 100644
--- a/include/boost/math/distributions/detail/common_error_handling.hpp
+++ b/include/boost/math/distributions/detail/common_error_handling.hpp
@@ -1,5 +1,6 @@
 // Copyright John Maddock 2006, 2007.
 // Copyright Paul A. Bristow 2006, 2007, 2012.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -9,6 +10,7 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP
 #define BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
 // using boost::math::isfinite;
@@ -23,7 +25,7 @@ namespace boost{ namespace math{ namespace detail
 {
 
 template <class RealType, class Policy>
-inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol)
 {
    if((prob < 0) || (prob > 1) || !(boost::math::isfinite)(prob))
    {
@@ -36,7 +38,7 @@ inline bool check_probability(const char* function, RealType const& prob, RealTy
 }
 
 template <class RealType, class Policy>
-inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol)
 { //  df > 0 but NOT +infinity allowed.
    if((df <= 0) || !(boost::math::isfinite)(df))
    {
@@ -49,7 +51,7 @@ inline bool check_df(const char* function, RealType const& df, RealType* result,
 }
 
 template <class RealType, class Policy>
-inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol)
 {  // df > 0 or +infinity are allowed.
    if( (df <= 0) || (boost::math::isnan)(df) )
    { // is bad df <= 0 or NaN or -infinity.
@@ -63,7 +65,7 @@ inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealTy
 
 
 template <class RealType, class Policy>
-inline bool check_scale(
+BOOST_MATH_GPU_ENABLED inline bool check_scale(
       const char* function,
       RealType scale,
       RealType* result,
@@ -80,7 +82,7 @@ inline bool check_scale(
 }
 
 template <class RealType, class Policy>
-inline bool check_location(
+BOOST_MATH_GPU_ENABLED inline bool check_location(
       const char* function,
       RealType location,
       RealType* result,
@@ -97,7 +99,7 @@ inline bool check_location(
 }
 
 template <class RealType, class Policy>
-inline bool check_x(
+BOOST_MATH_GPU_ENABLED inline bool check_x(
       const char* function,
       RealType x,
       RealType* result,
@@ -118,7 +120,7 @@ inline bool check_x(
 } // bool check_x
 
 template <class RealType, class Policy>
-inline bool check_x_not_NaN(
+BOOST_MATH_GPU_ENABLED inline bool check_x_not_NaN(
   const char* function,
   RealType x,
   RealType* result,
@@ -138,7 +140,7 @@ inline bool check_x_not_NaN(
 } // bool check_x_not_NaN
 
 template <class RealType, class Policy>
-inline bool check_x_gt0(
+BOOST_MATH_GPU_ENABLED inline bool check_x_gt0(
       const char* function,
       RealType x,
       RealType* result,
@@ -159,7 +161,7 @@ inline bool check_x_gt0(
 } // bool check_x_gt0
 
 template <class RealType, class Policy>
-inline bool check_positive_x(
+BOOST_MATH_GPU_ENABLED inline bool check_positive_x(
       const char* function,
       RealType x,
       RealType* result,
@@ -179,13 +181,18 @@ inline bool check_positive_x(
 }
 
 template <class RealType, class Policy>
-inline bool check_non_centrality(
+BOOST_MATH_GPU_ENABLED inline bool check_non_centrality(
       const char* function,
       RealType ncp,
       RealType* result,
       const Policy& pol)
 {
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    static const RealType upper_limit = static_cast<RealType>((std::numeric_limits<long long>::max)()) - boost::math::policies::get_max_root_iterations<Policy>();
+   #else
+   constexpr RealType upper_limit = static_cast<RealType>(LONG_LONG_MAX) - boost::math::policies::get_max_root_iterations<Policy>();
+   #endif
+
    if((ncp < 0) || !(boost::math::isfinite)(ncp) || ncp > upper_limit)
    {
       *result = policies::raise_domain_error<RealType>(
@@ -197,7 +204,7 @@ inline bool check_non_centrality(
 }
 
 template <class RealType, class Policy>
-inline bool check_finite(
+BOOST_MATH_GPU_ENABLED inline bool check_finite(
       const char* function,
       RealType x,
       RealType* result,
diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp
index 1329dba10..36ec8d9d6 100644
--- a/include/boost/math/policies/error_handling.hpp
+++ b/include/boost/math/policies/error_handling.hpp
@@ -598,7 +598,54 @@ BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
    static_assert(std::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    return  val > 0 ? (std::numeric_limits<TargetType>::max)() : (std::numeric_limits<TargetType>::is_integer ? (std::numeric_limits<TargetType>::min)() : -(std::numeric_limits<TargetType>::max)());
+   #else
+   BOOST_IF_CONSTEXPR (std::is_same<TargetType, short>::value)
+   {
+      return val > 0 ? SHRT_MAX : SHRT_MIN;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, int>::value)
+   {
+      return val > 0 ? INT_MAX : INT_MIN;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, long>::value)
+   {
+      return val > 0 ? LONG_MAX : LONG_MIN;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, long long>::value)
+   {
+      return val > 0 ? LLONG_MAX : LLONG_MIN;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned short>::value)
+   {
+      return val > 0 ? USHRT_MAX : static_cast<unsigned short>(0U);
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned>::value)
+   {
+      return val > 0 ? UINT_MAX : 0U;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned long>::value)
+   {
+      return val > 0 ? ULONG_MAX : 0UL;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, unsigned long long>::value)
+   {
+      return val > 0 ? ULLONG_MAX : 0ULL;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, float>::value)
+   {
+      return val > 0 ? FLT_MAX : -FLT_MAX;
+   }
+   else BOOST_IF_CONSTEXPR (std::is_same<TargetType, double>::value)
+   {
+      return val > 0 ? DBL_MAX : -DBL_MAX;
+   }
+   else
+   {
+      return val > 0 ? static_cast<TargetType>(-1) : static_cast<TargetType>(1);
+   }
+   #endif
 }
 
 template <class T, class TargetType>
diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp
index a819a4cb6..f12837a2a 100644
--- a/include/boost/math/policies/policy.hpp
+++ b/include/boost/math/policies/policy.hpp
@@ -45,13 +45,6 @@ namespace policies{
 #  ifndef BOOST_MATH_PROMOTE_DOUBLE_POLICY
 #     define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
 #  endif
-#endif
-
-//
-// Refined support for sycl since it does not support errno
-//
-
-#ifdef BOOST_MATH_ENABLE_SYCL
 #  ifndef BOOST_MATH_DOMAIN_ERROR_POLICY
 #    define BOOST_MATH_DOMAIN_ERROR_POLICY ignore_error
 #  endif
diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp
index 77cd5f0ae..2488bb5d4 100644
--- a/include/boost/math/special_functions/cbrt.hpp
+++ b/include/boost/math/special_functions/cbrt.hpp
@@ -10,6 +10,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -38,7 +39,7 @@ struct largest_cbrt_int_type
 };
 
 template <typename T, typename Policy>
-T cbrt_imp(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T cbrt_imp(T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    //
@@ -51,7 +52,7 @@ T cbrt_imp(T z, const Policy& pol)
    // Expected Error Term:                         -1.231e-006
    // Maximum Relative Change in Control Points:   5.982e-004
    //
-   static const T P[] = { 
+   BOOST_MATH_STATIC const T P[] = { 
       static_cast<T>(0.37568269008611818),
       static_cast<T>(1.3304968705558024),
       static_cast<T>(-1.4897101632445036),
@@ -59,7 +60,7 @@ T cbrt_imp(T z, const Policy& pol)
       static_cast<T>(-0.6398703759826468),
       static_cast<T>(0.13584489959258635),
    };
-   static const T correction[] = {
+   BOOST_MATH_STATIC const T correction[] = {
       static_cast<T>(0.62996052494743658238360530363911),  // 2^-2/3
       static_cast<T>(0.79370052598409973737585281963615),  // 2^-1/3
       static_cast<T>(1),
@@ -154,7 +155,7 @@ T cbrt_imp(T z, const Policy& pol)
 } // namespace detail
 
 template <typename T, typename Policy>
-inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
 {
    using result_type = typename tools::promote_args<T>::type;
    using value_type = typename policies::evaluation<result_type, Policy>::type;
@@ -162,7 +163,7 @@ inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
 }
 
 template <typename T>
-inline typename tools::promote_args<T>::type cbrt(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cbrt(T z)
 {
    return cbrt(z, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/detail/fp_traits.hpp b/include/boost/math/special_functions/detail/fp_traits.hpp
index 2947a32a2..015ea9cd3 100644
--- a/include/boost/math/special_functions/detail/fp_traits.hpp
+++ b/include/boost/math/special_functions/detail/fp_traits.hpp
@@ -4,6 +4,7 @@
 #define BOOST_MATH_FP_TRAITS_HPP
 
 // Copyright (c) 2006 Johan Rade
+// Copyright (c) 2024 Matt Borland
 
 // Distributed under the Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -24,6 +25,7 @@ With these techniques, the code could be simplified.
 #include <cstdint>
 #include <limits>
 #include <type_traits>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/is_standalone.hpp>
 #include <boost/math/tools/assert.hpp>
 
@@ -202,14 +204,14 @@ template<> struct fp_traits_non_native<float, single_precision>
 {
     typedef ieee_copy_all_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7f800000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x007fffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7f800000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x007fffff;
 
     typedef uint32_t bits;
-    static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); }
-    static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); }
+    BOOST_MATH_GPU_ENABLED static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); }
+    BOOST_MATH_GPU_ENABLED static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); }
 };
 
 // ieee_tag version, double (64 bits) ----------------------------------------------
@@ -250,15 +252,15 @@ template<> struct fp_traits_non_native<double, double_precision>
 {
     typedef ieee_copy_all_bits_tag method;
 
-    static constexpr uint64_t sign     = static_cast<uint64_t>(0x80000000u) << 32;
-    static constexpr uint64_t exponent = static_cast<uint64_t>(0x7ff00000) << 32;
-    static constexpr uint64_t flag     = 0;
-    static constexpr uint64_t significand
+    BOOST_MATH_STATIC constexpr uint64_t sign     = static_cast<uint64_t>(0x80000000u) << 32;
+    BOOST_MATH_STATIC constexpr uint64_t exponent = static_cast<uint64_t>(0x7ff00000) << 32;
+    BOOST_MATH_STATIC constexpr uint64_t flag     = 0;
+    BOOST_MATH_STATIC constexpr uint64_t significand
         = (static_cast<uint64_t>(0x000fffff) << 32) + static_cast<uint64_t>(0xffffffffu);
 
     typedef uint64_t bits;
-    static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); }
-    static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); }
+    BOOST_MATH_GPU_ENABLED static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); }
+    BOOST_MATH_GPU_ENABLED static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); }
 };
 
 #endif
@@ -330,10 +332,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00008000;
-    static constexpr uint32_t significand = 0x00007fff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00008000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff;
 
     typedef uint32_t bits;
 
@@ -381,10 +383,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7ff00000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x000fffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7ff00000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x000fffff;
 
     typedef uint32_t bits;
 
@@ -399,7 +401,7 @@ struct fp_traits_non_native<long double, extended_double_precision>
     }
 
 private:
-    static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
+    BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
 };
 
 
@@ -419,10 +421,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00008000;
-    static constexpr uint32_t significand = 0x00007fff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00008000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff;
 
     // copy 1st, 2nd, 5th and 6th byte. 3rd and 4th byte are padding.
 
@@ -455,10 +457,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x0000ffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x0000ffff;
 
     typedef uint32_t bits;
 
@@ -473,7 +475,7 @@ struct fp_traits_non_native<long double, extended_double_precision>
     }
 
 private:
-    static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
+    BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
 };
 
 #endif
@@ -553,7 +555,8 @@ struct select_native<long double>
    && !defined(BOOST_MATH_DISABLE_STD_FPCLASSIFY)\
    && !defined(__INTEL_COMPILER)\
    && !defined(sun)\
-   && !defined(__VXWORKS__)
+   && !defined(__VXWORKS__)\
+   && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
 #  define BOOST_MATH_USE_STD_FPCLASSIFY
 #endif
 
diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp
index c58459e36..7d69f8b9c 100644
--- a/include/boost/math/special_functions/detail/round_fwd.hpp
+++ b/include/boost/math/special_functions/detail/round_fwd.hpp
@@ -1,4 +1,5 @@
 // Copyright John Maddock 2008.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -21,53 +22,53 @@ namespace boost
    { 
 
    template <class T, class Policy>
-   typename tools::promote_args<T>::type trunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type trunc(const T& v, const Policy& pol);
    template <class T>
-   typename tools::promote_args<T>::type trunc(const T& v);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type trunc(const T& v);
    template <class T, class Policy>
-   int itrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED int itrunc(const T& v, const Policy& pol);
    template <class T>
-   int itrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED int itrunc(const T& v);
    template <class T, class Policy>
-   long ltrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long ltrunc(const T& v, const Policy& pol);
    template <class T>
-   long ltrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED long ltrunc(const T& v);
    template <class T, class Policy>
-   long long lltrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v, const Policy& pol);
    template <class T>
-   long long lltrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v);
    template <class T, class Policy>
-   typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
    template <class T>
-   typename tools::promote_args<T>::type round(const T& v);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v);
    template <class T, class Policy>
-   int iround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED int iround(const T& v, const Policy& pol);
    template <class T>
-   int iround(const T& v);
+   BOOST_MATH_GPU_ENABLED int iround(const T& v);
    template <class T, class Policy>
-   long lround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long lround(const T& v, const Policy& pol);
    template <class T>
-   long lround(const T& v);
+   BOOST_MATH_GPU_ENABLED long lround(const T& v);
    template <class T, class Policy>
-   long long llround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long long llround(const T& v, const Policy& pol);
    template <class T>
-   long long llround(const T& v);
+   BOOST_MATH_GPU_ENABLED long long llround(const T& v);
    template <class T, class Policy>
-   T modf(const T& v, T* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, T* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart);
    template <class T, class Policy>
-   T modf(const T& v, int* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, int* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart);
    template <class T, class Policy>
-   T modf(const T& v, long* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, long* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart);
    template <class T, class Policy>
-   T modf(const T& v, long long* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, long long* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart);
    }
 }
 
diff --git a/include/boost/math/special_functions/ellint_1.hpp b/include/boost/math/special_functions/ellint_1.hpp
index dfc1815f7..f7fbbce40 100644
--- a/include/boost/math/special_functions/ellint_1.hpp
+++ b/include/boost/math/special_functions/ellint_1.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2006 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,6 +19,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/constants/constants.hpp>
@@ -36,13 +38,13 @@ typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy&
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 0> const&);
+BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 0> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 1> const&);
+BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 1> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&);
+BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, T one_minus_k2);
+BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, T one_minus_k2);
 
 // Elliptic integral (Legendre form) of the first kind
 template <typename T, typename Policy>
@@ -760,7 +762,7 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons
 }
 
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_1(T k, const Policy& pol, const std::true_type&)
+typename tools::promote_args<T>::type ellint_1(T k, const Policy& pol, const std::true_type&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -776,7 +778,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_1(T k, const
 }
 
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const std::false_type&)
+typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const std::false_type&)
 {
    return boost::math::ellint_1(k, phi, policies::policy<>());
 }
@@ -785,14 +787,14 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k,
 
 // Complete elliptic integral (Legendre form) of the first kind
 template <typename T>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_1(T k)
+typename tools::promote_args<T>::type ellint_1(T k)
 {
    return ellint_1(k, policies::policy<>());
 }
 
 // Elliptic integral (Legendre form) of the first kind
 template <class T1, class T2, class Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
+typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -800,7 +802,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k,
 }
 
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi)
+typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi)
 {
    typedef typename policies::is_policy<T2>::type tag_type;
    return detail::ellint_1(k, phi, tag_type());
diff --git a/include/boost/math/special_functions/ellint_2.hpp b/include/boost/math/special_functions/ellint_2.hpp
index b09cdd490..5e2552cec 100644
--- a/include/boost/math/special_functions/ellint_2.hpp
+++ b/include/boost/math/special_functions/ellint_2.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2006 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -38,11 +39,11 @@ typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy&
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 0>&);
+BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 0>&);
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 1>&);
+BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 1>&);
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 2>&);
+BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 2>&);
 
 // Elliptic integral (Legendre form) of the second kind
 template <typename T, typename Policy>
@@ -701,7 +702,7 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons
 }
 
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_2(T k, const Policy& pol, const std::true_type&)
+typename tools::promote_args<T>::type ellint_2(T k, const Policy& pol, const std::true_type&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -714,7 +715,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_2(T k, const
 
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const std::false_type&)
+typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const std::false_type&)
 {
    return boost::math::ellint_2(k, phi, policies::policy<>());
 }
@@ -723,21 +724,21 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k,
 
 // Complete elliptic integral (Legendre form) of the second kind
 template <typename T>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_2(T k)
+typename tools::promote_args<T>::type ellint_2(T k)
 {
    return ellint_2(k, policies::policy<>());
 }
 
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi)
+typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi)
 {
    typedef typename policies::is_policy<T2>::type tag_type;
    return detail::ellint_2(k, phi, tag_type());
 }
 
 template <class T1, class T2, class Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
+typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
diff --git a/include/boost/math/special_functions/expm1.hpp b/include/boost/math/special_functions/expm1.hpp
index eec635603..a5af573cd 100644
--- a/include/boost/math/special_functions/expm1.hpp
+++ b/include/boost/math/special_functions/expm1.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -294,18 +295,18 @@ inline typename tools::promote_args<T>::type expm1(T x, const Policy& /* pol */)
 
 #if defined(BOOST_HAS_EXPM1) && !(defined(__osf__) && defined(__DECCXX_VER))
 #  ifdef BOOST_MATH_USE_C99
-inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); }
+BOOST_MATH_GPU_ENABLED inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); }
 #     ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
 inline long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); }
 #     endif
 #  else
 inline float expm1(float x, const policies::policy<>&){ return static_cast<float>(::expm1(x)); }
 #  endif
-inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); }
+BOOST_MATH_GPU_ENABLED inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); }
 #endif
 
 template <class T>
-inline typename tools::promote_args<T>::type expm1(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type expm1(T x)
 {
    return expm1(x, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index 6119c8e86..e2ac58e85 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -4,6 +4,7 @@
 
 // Copyright Paul A. Bristow 2006.
 // Copyright John Maddock 2006.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -26,6 +27,7 @@
 #include <vector>
 #include <complex>
 #include <type_traits>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/detail/round_fwd.hpp>
 #include <boost/math/tools/promotion.hpp> // for argument promotion.
 #include <boost/math/policies/policy.hpp>
@@ -561,10 +563,10 @@ namespace boost
 
    // cbrt - cube root.
    template <class RT>
-   tools::promote_args_t<RT> cbrt(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> cbrt(RT z);
 
    template <class RT, class Policy>
-   tools::promote_args_t<RT> cbrt(RT z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> cbrt(RT z, const Policy&);
 
    // log1p is log(x + 1)
    template <class T>
@@ -582,10 +584,10 @@ namespace boost
 
    // Exp (x) minus 1 functions.
    template <class T>
-   tools::promote_args_t<T> expm1(T);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expm1(T);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> expm1(T, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expm1(T, const Policy&);
 
    // Power - 1
    template <class T1, class T2>
@@ -883,16 +885,18 @@ namespace boost
    bool isnormal BOOST_NO_MACRO_EXPAND(T t);
 
    template<class T>
-   int signbit BOOST_NO_MACRO_EXPAND(T x);
+   BOOST_MATH_GPU_ENABLED int signbit BOOST_NO_MACRO_EXPAND(T x);
 
    template <class T>
-   int sign BOOST_NO_MACRO_EXPAND(const T& z);
+   BOOST_MATH_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z);
 
    template <class T, class U>
-   typename tools::promote_args_permissive<T, U>::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T, U>::type 
+   copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y);
 
    template <class T>
-   typename tools::promote_args_permissive<T>::type changesign BOOST_NO_MACRO_EXPAND(const T& z);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T>::type 
+   changesign BOOST_NO_MACRO_EXPAND(const T& z);
 
    // Exponential integrals:
    namespace detail{
@@ -1191,13 +1195,13 @@ namespace boost
 #define BOOST_MATH_DETAIL_LL_FUNC(Policy)\
    \
    template <class T>\
-   inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\
    \
    template <class T>\
-   inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\
 
 #  define BOOST_MATH_DETAIL_11_FUNC(Policy)\
    template <class T, class U, class V>\
@@ -1473,7 +1477,7 @@ namespace boost
    inline boost::math::tools::promote_args_t<T> log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> expm1(T x){ return boost::math::expm1(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> expm1(T x){ return boost::math::expm1(x, Policy()); }\
 \
    template <class T1, class T2>\
    inline boost::math::tools::promote_args_t<T1, T2> \
@@ -1594,31 +1598,31 @@ template <class OutputIterator, class T>\
    inline boost::math::tools::promote_args_t<T> zeta(T s){ return boost::math::zeta(s, Policy()); }\
    \
    template <class T>\
-   inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\
    \
    template <class T>\
-   inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\
    \
    template <class T>\
-   inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\
    \
    template <class T>\
-   inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\
    \
    template <class T>\
-   inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\
    \
    template <class T>\
-   inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <int N, class T>\
    inline boost::math::tools::promote_args_t<T> pow(T v){ return boost::math::pow<N>(v, Policy()); }\
diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp
index 75e6be9f4..e08945dca 100644
--- a/include/boost/math/special_functions/modf.hpp
+++ b/include/boost/math/special_functions/modf.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2007.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -17,49 +18,49 @@
 namespace boost{ namespace math{
 
 template <class T, class Policy>
-inline T modf(const T& v, T* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart, const Policy& pol)
 {
    *ipart = trunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, T* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, int* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart, const Policy& pol)
 {
    *ipart = itrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, int* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, long* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart, const Policy& pol)
 {
    *ipart = ltrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, long* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, long long* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart, const Policy& pol)
 {
    *ipart = lltrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, long long* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp
index e74acba85..3df0145b6 100644
--- a/include/boost/math/special_functions/round.hpp
+++ b/include/boost/math/special_functions/round.hpp
@@ -30,7 +30,7 @@ namespace boost{ namespace math{
 namespace detail{
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::false_type&)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
@@ -65,7 +65,7 @@ inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::
    }
 }
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true_type&)
 {
    return v;
 }
@@ -73,12 +73,12 @@ inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true
 } // namespace detail
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy& pol)
 {
    return detail::round(v, pol, std::integral_constant<bool, detail::is_integer_for_rounding<T>::value>());
 }
 template <class T>
-inline tools::promote_args_t<T> round(const T& v)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v)
 {
    return round(v, policies::policy<>());
 }
@@ -103,7 +103,7 @@ inline int iround(const T& v, const Policy& pol)
 
    result_type r = boost::math::round(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -127,7 +127,7 @@ inline int iround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -138,20 +138,20 @@ inline int iround(const T& v, const Policy& pol)
    return static_cast<int>(r);
 }
 template <class T>
-inline int iround(const T& v)
+BOOST_MATH_GPU_ENABLED inline int iround(const T& v)
 {
    return iround(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long lround(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long lround(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
 
    result_type r = boost::math::round(v, pol);
    
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -175,7 +175,7 @@ inline long lround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -186,20 +186,20 @@ inline long lround(const T& v, const Policy& pol)
    return static_cast<long>(r);
 }
 template <class T>
-inline long lround(const T& v)
+BOOST_MATH_GPU_ENABLED inline long lround(const T& v)
 {
    return lround(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long long llround(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long long llround(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = boost::math::tools::promote_args_t<T>;
 
    result_type r = boost::math::round(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -223,7 +223,7 @@ inline long long llround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -234,7 +234,7 @@ inline long long llround(const T& v, const Policy& pol)
    return static_cast<long long>(r);
 }
 template <class T>
-inline long long llround(const T& v)
+BOOST_MATH_GPU_ENABLED inline long long llround(const T& v)
 {
    return llround(v, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp
index 8f9fc4793..cd7fdfb2a 100644
--- a/include/boost/math/special_functions/sign.hpp
+++ b/include/boost/math/special_functions/sign.hpp
@@ -1,6 +1,7 @@
 //  (C) Copyright John Maddock 2006.
 //  (C) Copyright Johan Rade 2006.
 //  (C) Copyright Paul A. Bristow 2011 (added changesign).
+//  (C) Copyright Matt Borland 2024
 
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
@@ -35,13 +36,13 @@ namespace detail {
     // signed zero or NaN.
 
     template<class T>
-    inline int signbit_impl(T x, generic_tag<true> const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag<true> const&)
     {
         return x < 0;
     }
 
     template<class T> 
-    inline int signbit_impl(T x, generic_tag<false> const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag<false> const&)
     {
         return x < 0;
     }
@@ -91,13 +92,13 @@ namespace detail {
     // signed zero or NaN.
 
     template<class T>
-    inline T (changesign_impl)(T x, generic_tag<true> const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag<true> const&)
     {
         return -x;
     }
 
     template<class T>
-    inline T (changesign_impl)(T x, generic_tag<false> const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag<false> const&)
     {
         return -x;
     }
@@ -124,7 +125,7 @@ namespace detail {
 #endif
 
     template<class T>
-    inline T changesign_impl(T x, ieee_copy_all_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline T changesign_impl(T x, ieee_copy_all_bits_tag const&)
     {
         typedef typename fp_traits<T>::sign_change_type traits;
 
@@ -136,7 +137,7 @@ namespace detail {
     }
 
     template<class T>
-    inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&)
     {
         typedef typename fp_traits<T>::sign_change_type traits;
 
@@ -150,7 +151,8 @@ namespace detail {
 
 }   // namespace detail
 
-template<class T> int (signbit)(T x)
+template<class T> 
+BOOST_MATH_GPU_ENABLED int (signbit)(T x)
 { 
    typedef typename detail::fp_traits<T>::type traits;
    typedef typename traits::method method;
@@ -160,12 +162,13 @@ template<class T> int (signbit)(T x)
 }
 
 template <class T>
-inline int sign BOOST_NO_MACRO_EXPAND(const T& z)
+BOOST_MATH_GPU_ENABLED inline int sign BOOST_NO_MACRO_EXPAND(const T& z)
 {
    return (z == 0) ? 0 : (boost::math::signbit)(z) ? -1 : 1;
 }
 
-template <class T> typename tools::promote_args_permissive<T>::type (changesign)(const T& x)
+template <class T> 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T>::type (changesign)(const T& x)
 { //!< \brief return unchanged binary pattern of x, except for change of sign bit. 
    typedef typename detail::fp_traits<T>::sign_change_type traits;
    typedef typename traits::method method;
@@ -176,7 +179,7 @@ template <class T> typename tools::promote_args_permissive<T>::type (changesign)
 }
 
 template <class T, class U>
-inline typename tools::promote_args_permissive<T, U>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args_permissive<T, U>::type 
    copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y)
 {
    BOOST_MATH_STD_USING
diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp
index a084de560..4288225f1 100644
--- a/include/boost/math/special_functions/trunc.hpp
+++ b/include/boost/math/special_functions/trunc.hpp
@@ -27,7 +27,7 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::false_type&)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
@@ -39,20 +39,21 @@ inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::
 }
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy&, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy&, const std::true_type&)
 {
    return v;
 }
 
-}
+} // Namespace detail
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol)
 {
    return detail::trunc(v, pol, std::integral_constant<bool, detail::is_integer_for_rounding<T>::value>());
 }
+
 template <class T>
-inline tools::promote_args_t<T> trunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v)
 {
    return trunc(v, policies::policy<>());
 }
@@ -70,13 +71,13 @@ inline tools::promote_args_t<T> trunc(const T& v)
 // https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax
 //
 template <class T, class Policy>
-inline int itrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -100,7 +101,7 @@ inline int itrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -110,20 +111,21 @@ inline int itrunc(const T& v, const Policy& pol)
 
    return static_cast<int>(r);
 }
+
 template <class T>
-inline int itrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v)
 {
    return itrunc(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long ltrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -147,7 +149,7 @@ inline long ltrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -157,20 +159,21 @@ inline long ltrunc(const T& v, const Policy& pol)
 
    return static_cast<long>(r);
 }
+
 template <class T>
-inline long ltrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v)
 {
    return ltrunc(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long long lltrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -194,7 +197,7 @@ inline long long lltrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -204,21 +207,22 @@ inline long long lltrunc(const T& v, const Policy& pol)
 
    return static_cast<long long>(r);
 }
+
 template <class T>
-inline long long lltrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v)
 {
    return lltrunc(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<int, T>::value, int>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<int, T>::value, int>::type
    iconvert(const T& v, const Policy&)
 {
    return static_cast<int>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<int, T>::value, int>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<!std::is_constructible<int, T>::value, int>::type
    iconvert(const T& v, const Policy& pol)
 {
    using boost::math::itrunc;
@@ -226,14 +230,14 @@ inline typename std::enable_if<!std::is_constructible<int, T>::value, int>::type
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<long, T>::value, long>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<long, T>::value, long>::type
    lconvert(const T& v, const Policy&)
 {
    return static_cast<long>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<long, T>::value, long>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<!std::is_constructible<long, T>::value, long>::type
    lconvert(const T& v, const Policy& pol)
 {
    using boost::math::ltrunc;
@@ -241,14 +245,14 @@ inline typename std::enable_if<!std::is_constructible<long, T>::value, long>::ty
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<long long, T>::value, long long>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<long long, T>::value, long long>::type
    llconvertert(const T& v, const Policy&)
 {
    return static_cast<long long>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<long long, T>::value, long long>::type
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<!std::is_constructible<long long, T>::value, long long>::type
    llconvertert(const T& v, const Policy& pol)
 {
    using boost::math::lltrunc;
diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
index e5b339483..e7f3e54fd 100644
--- a/include/boost/math/tools/config.hpp
+++ b/include/boost/math/tools/config.hpp
@@ -699,6 +699,7 @@ namespace boost{ namespace math{
 
 // spir64 does not support long double
 #  define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#  define BOOST_MATH_NO_REAL_CONCEPT_TESTS
 
 #  undef BOOST_MATH_FORCEINLINE
 #  define BOOST_MATH_FORCEINLINE inline
@@ -731,15 +732,15 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_min(const T& a, const T& b) { return
 template <class T>
 BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { return a > b ? a : b; }
 
-#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b);
-#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b);
-#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b);
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b)
 
 #else
 
-#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b);
-#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b);
-#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b);
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b)
 
 #endif
 
@@ -749,13 +750,20 @@ BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { retur
 #if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
 #  define BOOST_MATH_STATIC_CONSTEXPR inline constexpr
 #  define BOOST_MATH_STATIC static
+#  ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
+#  else
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE
+#  endif
 #else
 #  ifndef BOOST_MATH_HAS_GPU_SUPPORT
 #    define BOOST_MATH_STATIC_CONSTEXPR static constexpr
 #    define BOOST_MATH_STATIC static
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE
 #  else
 #    define BOOST_MATH_STATIC_CONSTEXPR constexpr
 #    define BOOST_MATH_STATIC constexpr
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
 #  endif
 #endif
 
diff --git a/include_private/boost/math/tools/test.hpp b/include_private/boost/math/tools/test.hpp
index 10f6143e2..8f4db18c1 100644
--- a/include_private/boost/math/tools/test.hpp
+++ b/include_private/boost/math/tools/test.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -253,6 +254,7 @@ test_result<Real> test_hetero(const A& a, F1 test_func, F2 expect_func)
    return result;
 }
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
 template <class Val, class Exception>
 void test_check_throw(Val, Exception)
 {
@@ -293,6 +295,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*)
       BOOST_CHECK((v == boost::math::tools::max_value<Val>()) || (v == -boost::math::tools::max_value<Val>()));
    }
 }
+#endif
 
 } // namespace tools
 } // namespace math
@@ -303,7 +306,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*)
   // exception-free testing support, ideally we'd only define this in our tests,
   // but to keep things simple we really need it somewhere that's always included:
   //
-#ifdef BOOST_NO_EXCEPTIONS
+#ifdef BOOST_MATH_NO_EXCEPTIONS
 #  define BOOST_MATH_CHECK_THROW(x, ExceptionType) boost::math::tools::test_check_throw(x, static_cast<ExceptionType const*>(nullptr));
 #else
 #  define BOOST_MATH_CHECK_THROW(x, y) BOOST_CHECK_THROW(x, y)
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index f4740a210..65c5d71be 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -24,8 +24,26 @@ run test_arcsine_pdf_float.cu ;
 run test_arcsine_quan_double.cu ;
 run test_arcsine_quan_float.cu ;
 run test_binomial.cpp ;
+run test_cauchy_cdf_double.cu ;
+run test_cauchy_cdf_float.cu ;
+run test_cauchy_pdf_double.cu ;
+run test_cauchy_pdf_float.cu ;
+run test_cauchy_quan_double.cu ;
+run test_cauchy_quan_float.cu ;
 
 # Special Functions
 # run test_beta_simple.cpp ;
 run test_beta_double.cu ;
-run test_beta_float.cu ;
\ No newline at end of file
+run test_beta_float.cu ;
+run test_cbrt_double.cu ;
+run test_cbrt_float.cu ;
+run test_changesign_double.cu ;
+run test_changesign_float.cu ;
+run test_expm1_double.cu ;
+run test_expm1_float.cu ;
+run test_modf_double.cu ;
+run test_modf_float.cu ;
+run test_round_double.cu ;
+run test_round_float.cu ;
+run test_trunc_double.cu ;
+run test_trunc_float.cu ;
diff --git a/test/handle_test_result.hpp b/test/handle_test_result.hpp
index e909d6458..66bfe557b 100644
--- a/test/handle_test_result.hpp
+++ b/test/handle_test_result.hpp
@@ -6,8 +6,8 @@
 #ifndef BOOST_MATH_HANDLE_TEST_RESULT
 #define BOOST_MATH_HANDLE_TEST_RESULT
 
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/regex.hpp>
 #include <boost/test/test_tools.hpp>
diff --git a/test/sycl_jamfile b/test/sycl_jamfile
index 679bae0dc..e8bd12f8c 100644
--- a/test/sycl_jamfile
+++ b/test/sycl_jamfile
@@ -11,6 +11,11 @@ project : requirements
 
 # Distributions
 run test_arcsine.cpp ;
+run test_cauchy.cpp ;
 
 # Special Functions
 run test_beta_simple.cpp ;
+run test_cbrt.cpp ;
+run test_sign.cpp ;
+run test_round.cpp ;
+run test_expm1_simple.cpp;
diff --git a/test/test_cauchy.cpp b/test/test_cauchy.cpp
index 002690e5f..75c8c4f1e 100644
--- a/test/test_cauchy.cpp
+++ b/test/test_cauchy.cpp
@@ -18,17 +18,17 @@
 
 // #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false 
 // To compile even if Cauchy mean is used.
-#include <boost/math/tools/test.hpp>
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 #include <boost/math/distributions/cauchy.hpp>
     using boost::math::cauchy_distribution;
 
-#include "test_out_of_range.hpp"
-
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
+
+#include "test_out_of_range.hpp"
+
 #include <iostream>
    using std::cout;
    using std::endl;
@@ -38,11 +38,11 @@ void test_spots(RealType T)
 {
   // Check some bad parameters to construct the distribution,
 #ifndef BOOST_NO_EXCEPTIONS
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, 0), std::domain_error); // zero scale.
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, -1), std::domain_error); // negative scale (shape).
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, 0), std::domain_error); // zero scale.
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, -1), std::domain_error); // negative scale (shape).
 #else
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, 0), std::domain_error); // zero scale.
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, -1), std::domain_error); // negative scale (shape).
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, 0), std::domain_error); // zero scale.
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, -1), std::domain_error); // negative scale (shape).
 #endif
   cauchy_distribution<RealType> C01;
 
@@ -667,35 +667,35 @@ void test_spots(RealType T)
    // To compile even if Cauchy mean is used.
    // See policy reference, mathematically undefined function policies
    //
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    mean(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    variance(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    standard_deviation(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    kurtosis(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    kurtosis_excess(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    skewness(dist),
    //    std::domain_error);
 
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(dist, RealType(0.0)),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(dist, RealType(1.0)),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(complement(dist, RealType(0.0))),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(complement(dist, RealType(1.0))),
        std::overflow_error);
 
@@ -705,7 +705,7 @@ void test_spots(RealType T)
 
 } // template <class RealType>void test_spots(RealType)
 
-BOOST_AUTO_TEST_CASE( test_main )
+BOOST_AUTO_TEST_CASE(test_main)
 {
   BOOST_MATH_CONTROL_FP;
    // Check that can generate cauchy distribution using the two convenience methods:
diff --git a/test/test_cauchy_cdf_double.cu b/test/test_cauchy_cdf_double.cu
new file mode 100644
index 000000000..dc99cbe33
--- /dev/null
+++ b/test/test_cauchy_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_cdf_float.cu b/test/test_cauchy_cdf_float.cu
new file mode 100644
index 000000000..dc99cbe33
--- /dev/null
+++ b/test/test_cauchy_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_pdf_double.cu b/test/test_cauchy_pdf_double.cu
new file mode 100644
index 000000000..7a7fe5ba6
--- /dev/null
+++ b/test/test_cauchy_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_pdf_float.cu b/test/test_cauchy_pdf_float.cu
new file mode 100644
index 000000000..5ec3b604b
--- /dev/null
+++ b/test/test_cauchy_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_quan_double.cu b/test/test_cauchy_quan_double.cu
new file mode 100644
index 000000000..21f4b4dda
--- /dev/null
+++ b/test/test_cauchy_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_quan_float.cu b/test/test_cauchy_quan_float.cu
new file mode 100644
index 000000000..b6bed1520
--- /dev/null
+++ b/test/test_cauchy_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cbrt.cpp b/test/test_cbrt.cpp
index 8b36a765d..6abb9bd88 100644
--- a/test/test_cbrt.cpp
+++ b/test/test_cbrt.cpp
@@ -9,7 +9,10 @@
 #  pragma warning (disable : 4224)
 #endif
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp> // include /libs/math/src/
+#endif
+
 #include "test_cbrt.hpp"
 
 #include <boost/math/special_functions/cbrt.hpp> // Added to avoid link failure missing cbrt variants.
diff --git a/test/test_cbrt.hpp b/test/test_cbrt.hpp
index f606a5840..77e4aed51 100644
--- a/test/test_cbrt.hpp
+++ b/test/test_cbrt.hpp
@@ -1,5 +1,6 @@
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2007, 2009
+// Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,7 +10,6 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -93,7 +93,9 @@ void test_cbrt(T, const char* name)
    }
    BOOST_IF_CONSTEXPR(std::numeric_limits<T>::has_quiet_NaN)
    {
+      #ifndef BOOST_MATH_NO_EXCEPTIONS
       BOOST_CHECK_THROW(boost::math::cbrt(std::numeric_limits<T>::quiet_NaN()), std::domain_error);
+      #endif
    }
 
 }
diff --git a/test/test_cbrt_double.cu b/test/test_cbrt_double.cu
new file mode 100644
index 000000000..cc2c32685
--- /dev/null
+++ b/test/test_cbrt_double.cu
@@ -0,0 +1,99 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cbrt(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cbrt_float.cu b/test/test_cbrt_float.cu
new file mode 100644
index 000000000..a4e98cce8
--- /dev/null
+++ b/test/test_cbrt_float.cu
@@ -0,0 +1,99 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cbrt(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_changesign_double.cu b/test/test_changesign_double.cu
new file mode 100644
index 000000000..bfb2ade1e
--- /dev/null
+++ b/test/test_changesign_double.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::changesign(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+        switch(i % 55)
+        {
+        case 1:
+           h_A[i] = 0;
+           break;
+        case 2:
+           h_A[i] = std::numeric_limits<float_type>::infinity();
+           break;
+        case 3:
+           h_A[i] = -std::numeric_limits<float_type>::infinity();
+           break;
+        }
+        if(i % 1)
+           h_A[i] = -h_A[i];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::changesign(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (h_C[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_changesign_float.cu b/test/test_changesign_float.cu
new file mode 100644
index 000000000..d7e1764bd
--- /dev/null
+++ b/test/test_changesign_float.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::changesign(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+        switch(i % 55)
+        {
+        case 1:
+           h_A[i] = 0;
+           break;
+        case 2:
+           h_A[i] = std::numeric_limits<float_type>::infinity();
+           break;
+        case 3:
+           h_A[i] = -std::numeric_limits<float_type>::infinity();
+           break;
+        }
+        if(i % 1)
+           h_A[i] = -h_A[i];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::changesign(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (h_C[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expm1_double.cu b/test/test_expm1_double.cu
new file mode 100644
index 000000000..cfed7d840
--- /dev/null
+++ b/test/test_expm1_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expm1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expm1_float.cu b/test/test_expm1_float.cu
new file mode 100644
index 000000000..3d439b887
--- /dev/null
+++ b/test/test_expm1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expm1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expm1_simple.cpp b/test/test_expm1_simple.cpp
new file mode 100644
index 000000000..00513ea40
--- /dev/null
+++ b/test/test_expm1_simple.cpp
@@ -0,0 +1,32 @@
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <random>
+#include <cmath>
+#include <boost/math/special_functions/expm1.hpp>
+#include "math_unit_test.hpp"
+
+constexpr int N = 50000;
+
+template <typename T>
+void test()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::expm1(value), boost::math::expm1(value), 10);
+    }
+}
+
+int main()
+{
+    test<float>();
+    test<double>();
+
+    return boost::math::test::report_errors();
+}
diff --git a/test/test_modf_double.cu b/test/test_modf_double.cu
new file mode 100644
index 000000000..06e65c106
--- /dev/null
+++ b/test/test_modf_double.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type fract;
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::modf(h_A[i], &fract));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_modf_float.cu b/test/test_modf_float.cu
new file mode 100644
index 000000000..06e65c106
--- /dev/null
+++ b/test/test_modf_float.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type fract;
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::modf(h_A[i], &fract));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_round.cpp b/test/test_round.cpp
index 95ff4d234..e363efd56 100644
--- a/test/test_round.cpp
+++ b/test/test_round.cpp
@@ -3,12 +3,15 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
 
 #include <boost/math/concepts/real_concept.hpp>
 #define BOOST_TEST_MAIN
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/test/unit_test.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/round.hpp>
 #include <boost/math/special_functions/next.hpp>
@@ -222,6 +225,7 @@ void test_round(T, const char* name )
    //
    // Finish off by testing the error handlers:
    //
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_MATH_CHECK_THROW(iround(static_cast<T>(1e20)), boost::math::rounding_error);
    BOOST_MATH_CHECK_THROW(iround(static_cast<T>(-1e20)), boost::math::rounding_error);
    BOOST_MATH_CHECK_THROW(lround(static_cast<T>(1e20)), boost::math::rounding_error);
@@ -314,6 +318,7 @@ void test_round(T, const char* name )
       BOOST_MATH_CHECK_THROW(llround(static_cast<T>((std::numeric_limits<boost::long_long_type>::min)()) - 1), boost::math::rounding_error);
    }
 #endif
+   #endif
    //
    // try non-throwing error handlers:
    //
diff --git a/test/test_round_double.cu b/test/test_round_double.cu
new file mode 100644
index 000000000..3dae4342d
--- /dev/null
+++ b/test/test_round_double.cu
@@ -0,0 +1,98 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::round(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_round_float.cu b/test/test_round_float.cu
new file mode 100644
index 000000000..45dd14c03
--- /dev/null
+++ b/test/test_round_float.cu
@@ -0,0 +1,98 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::round(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_sign.cpp b/test/test_sign.cpp
index 864d2dd12..530a60d50 100644
--- a/test/test_sign.cpp
+++ b/test/test_sign.cpp
@@ -1,5 +1,6 @@
-#define BOOST_TEST_MAIN// Copyright John Maddock 2008
+// Copyright John Maddock 2008
 //  (C) Copyright Paul A. Bristow 2011 (added tests for changesign)
+// Copyright Matt Borland 2024
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -147,7 +148,9 @@ BOOST_AUTO_TEST_CASE( test_main )
    test_spots(0.0, "double"); // Test double. OK at decdigits 7, tolerance = 1e07 %
    // long double support for the sign functions is considered "core" so we always test it
    // even when long double support is turned off via BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
    test_spots(0.0L, "long double"); // Test long double.
+#endif
 #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
    test_spots(boost::math::concepts::real_concept(0), "real_concept"); // Test real_concept.
 #endif
diff --git a/test/test_trunc_double.cu b/test/test_trunc_double.cu
new file mode 100644
index 000000000..5a2d7b622
--- /dev/null
+++ b/test/test_trunc_double.cu
@@ -0,0 +1,97 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::trunc(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trunc_float.cu b/test/test_trunc_float.cu
new file mode 100644
index 000000000..d6fe4d352
--- /dev/null
+++ b/test/test_trunc_float.cu
@@ -0,0 +1,97 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::trunc(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}