From cb3701b09b4e0823f888dc8cc94c1fa6e6247769 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 09:55:44 -0400 Subject: [PATCH 01/37] Add markers for log1p --- include/boost/math/special_functions/log1p.hpp | 4 ++-- include/boost/math/special_functions/math_fwd.hpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp index fc7308eb6..8fb379fe4 100644 --- a/include/boost/math/special_functions/log1p.hpp +++ b/include/boost/math/special_functions/log1p.hpp @@ -433,7 +433,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1p(T x) // Compute log(1+x)-x: // template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1pmx(T x, const Policy& pol) { typedef typename tools::promote_args::type result_type; @@ -465,7 +465,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type log1pmx(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type log1pmx(T x) { return log1pmx(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index e2ac58e85..fe61a420e 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -577,10 +577,10 @@ namespace boost // log1pmx is log(x + 1) - x template - tools::promote_args_t log1pmx(T); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1pmx(T); template - tools::promote_args_t log1pmx(T, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t log1pmx(T, const Policy&); // Exp (x) minus 1 functions. template @@ -1471,10 +1471,10 @@ namespace boost inline boost::math::tools::promote_args_t cbrt(RT z){ return boost::math::cbrt(z, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t log1p(T x){ return boost::math::log1p(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t log1p(T x){ return boost::math::log1p(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\ \ template \ BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t expm1(T x){ return boost::math::expm1(x, Policy()); }\ From 4df80c13a6ba8b3cb6ce5f249aadbd312308c164 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 09:55:58 -0400 Subject: [PATCH 02/37] Add log1p CUDA testing --- test/cuda_jamfile | 2 + test/test_log1p_double.cu | 100 ++++++++++++++++++++++++++++++++++++++ test/test_log1p_float.cu | 100 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 test/test_log1p_double.cu create mode 100644 test/test_log1p_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 65c5d71be..52228ab55 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -41,6 +41,8 @@ run test_changesign_double.cu ; run test_changesign_float.cu ; run test_expm1_double.cu ; run test_expm1_float.cu ; +run test_log1p_double.cu ; +run test_log1p_float.cu ; run test_modf_double.cu ; run test_modf_float.cu ; run test_round_double.cu ; diff --git a/test/test_log1p_double.cu b/test/test_log1p_double.cu new file mode 100644 index 000000000..d164b5a98 --- /dev/null +++ b/test/test_log1p_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::log1p(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::log1p(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_log1p_float.cu b/test/test_log1p_float.cu new file mode 100644 index 000000000..d164b5a98 --- /dev/null +++ b/test/test_log1p_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::log1p(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::log1p(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 78a8d3282508290faa88b9cd0a99b108fa36cbea Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:00:07 -0400 Subject: [PATCH 03/37] Add SYCL testing for log1p and log1pmx --- test/sycl_jamfile | 1 + test/test_log1p_simple.cpp | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 test/test_log1p_simple.cpp diff --git a/test/sycl_jamfile b/test/sycl_jamfile index e8bd12f8c..1b0da5ca6 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -19,3 +19,4 @@ run test_cbrt.cpp ; run test_sign.cpp ; run test_round.cpp ; run test_expm1_simple.cpp; +run test_log1p_simple.cpp; diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp new file mode 100644 index 000000000..c3acd58ed --- /dev/null +++ b/test/test_log1p_simple.cpp @@ -0,0 +1,48 @@ +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include "math_unit_test.hpp" + +constexpr int N = 50000; + +template +void test() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::log1p(value), boost::math::log1p(value), 10); + } +} + +template +void test_log1pmx() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 100); + } +} + +int main() +{ + test(); + test(); + + test_log1pmx(); + test_log1pmx(); + + return boost::math::test::report_errors(); +} From 428d666c9638f6dcfaa0ccf1cfd247fa528da02a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:00:21 -0400 Subject: [PATCH 04/37] Fix use of static const char* in log1pmx --- include/boost/math/special_functions/log1p.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp index 8fb379fe4..1df3dd8fe 100644 --- a/include/boost/math/special_functions/log1p.hpp +++ b/include/boost/math/special_functions/log1p.hpp @@ -114,7 +114,7 @@ BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const std::int { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -222,7 +222,7 @@ BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const std::int { // The function returns the natural logarithm of 1 + x. BOOST_MATH_STD_USING - static const char* function = "boost::math::log1p<%1%>(%1%)"; + constexpr auto function = "boost::math::log1p<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( @@ -438,7 +438,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type { typedef typename tools::promote_args::type result_type; BOOST_MATH_STD_USING - static const char* function = "boost::math::log1pmx<%1%>(%1%)"; + constexpr auto function = "boost::math::log1pmx<%1%>(%1%)"; if(x < -1) return policies::raise_domain_error( From 944f45e2661449ca88be10a5a579f1a501d20b6e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:09:54 -0400 Subject: [PATCH 05/37] Loosen tolerance --- test/test_log1p_simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp index c3acd58ed..ef6c204d4 100644 --- a/test/test_log1p_simple.cpp +++ b/test/test_log1p_simple.cpp @@ -32,7 +32,7 @@ void test_log1pmx() for (int n = 0; n < N; ++n) { const T value (dist(rng)); - CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 100); + CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 1e9); } } From 2e77757943165f9fd8253739dc21cf6e225056cf Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:21:36 -0400 Subject: [PATCH 06/37] Add GPU support to digamma --- .../boost/math/special_functions/digamma.hpp | 48 ++++++++++--------- .../boost/math/special_functions/math_fwd.hpp | 6 +-- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp index 3922de7d2..98c939237 100644 --- a/include/boost/math/special_functions/digamma.hpp +++ b/include/boost/math/special_functions/digamma.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,6 +13,7 @@ #pragma warning(disable:4702) // Unreachable code (release mode only warning) #endif +#include #include #include #include @@ -37,11 +39,11 @@ namespace detail{ // Begin by defining the smallest value for which it is safe to // use the asymptotic expansion for digamma: // -inline unsigned digamma_large_lim(const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const std::integral_constant*) { return 20; } -inline unsigned digamma_large_lim(const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const std::integral_constant*) { return 20; } -inline unsigned digamma_large_lim(const void*) +BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const void*) { return 10; } // // Implementations of the asymptotic expansion come next, @@ -114,10 +116,10 @@ inline T digamma_imp_large(T x, const std::integral_constant*) // 17-digit precision for x >= 10: // template -inline T digamma_imp_large(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const std::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { 0.083333333333333333333333333333333333333333333333333, -0.0083333333333333333333333333333333333333333333333333, 0.003968253968253968253968253968253968253968253968254, @@ -138,10 +140,10 @@ inline T digamma_imp_large(T x, const std::integral_constant*) // 9-digit precision for x >= 10: // template -inline T digamma_imp_large(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const std::integral_constant*) { BOOST_MATH_STD_USING // ADL of std functions. - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { 0.083333333333333333333333333333333333333333333333333f, -0.0083333333333333333333333333333333333333333333333333f, 0.003968253968253968253968253968253968253968253968254f @@ -310,7 +312,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // 18-digit precision: // template -T digamma_imp_1_2(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const std::integral_constant*) { // // Now the approximation, we use the form: @@ -325,13 +327,13 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // At double precision, max error found: 2.452e-17 // // LCOV_EXCL_START - static const float Y = 0.99558162689208984F; + constexpr float Y = 0.99558162689208984F; - static const T root1 = T(1569415565) / 1073741824uL; - static const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL; - static const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19); + constexpr T root1 = T(1569415565) / 1073741824uL; + constexpr T root2 = (T(381566830) / 1073741824uL) / 1073741824uL; + constexpr T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19); - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 0.25479851061131551), BOOST_MATH_BIG_CONSTANT(T, 53, -0.32555031186804491), BOOST_MATH_BIG_CONSTANT(T, 53, -0.65031853770896507), @@ -339,7 +341,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) BOOST_MATH_BIG_CONSTANT(T, 53, -0.045251321448739056), BOOST_MATH_BIG_CONSTANT(T, 53, -0.0020713321167745952) }; - static const T Q[] = { + BOOST_MATH_STATIC const T Q[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.0767117023730469), BOOST_MATH_BIG_CONSTANT(T, 53, 1.4606242909763515), @@ -361,7 +363,7 @@ T digamma_imp_1_2(T x, const std::integral_constant*) // 9-digit precision: // template -inline T digamma_imp_1_2(T x, const std::integral_constant*) +BOOST_MATH_GPU_ENABLED inline T digamma_imp_1_2(T x, const std::integral_constant*) { // // Now the approximation, we use the form: @@ -376,16 +378,16 @@ inline T digamma_imp_1_2(T x, const std::integral_constant*) // At float precision, max error found: 2.008725e-008 // // LCOV_EXCL_START - static const float Y = 0.99558162689208984f; - static const T root = 1532632.0f / 1048576; - static const T root_minor = static_cast(0.3700660185912626595423257213284682051735604e-6L); - static const T P[] = { + constexpr float Y = 0.99558162689208984f; + constexpr T root = 1532632.0f / 1048576; + constexpr T root_minor = static_cast(0.3700660185912626595423257213284682051735604e-6L); + constexpr T P[] = { 0.25479851023250261e0f, -0.44981331915268368e0f, -0.43916936919946835e0f, -0.61041765350579073e-1f }; - static const T Q[] = { + constexpr T Q[] = { 0.1e1f, 0.15890202430554952e1f, 0.65341249856146947e0f, @@ -401,7 +403,7 @@ inline T digamma_imp_1_2(T x, const std::integral_constant*) } template -T digamma_imp(T x, const Tag* t, const Policy& pol) +BOOST_MATH_GPU_ENABLED T digamma_imp(T x, const Tag* t, const Policy& pol) { // // This handles reflection of negative arguments, and all our @@ -567,7 +569,7 @@ T digamma_imp(T x, const std::integral_constant* t, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type digamma(T x, const Policy&) { typedef typename tools::promote_args::type result_type; @@ -592,7 +594,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type digamma(T x) { return digamma(x, policies::policy<>()); diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index fe61a420e..e481692f1 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -533,10 +533,10 @@ namespace boost // digamma: template - tools::promote_args_t digamma(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t digamma(T x); template - tools::promote_args_t digamma(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t digamma(T x, const Policy&); // trigamma: template @@ -1455,7 +1455,7 @@ namespace boost inline boost::math::tools::promote_args_t gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t digamma(T x){ return boost::math::digamma(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t digamma(T x){ return boost::math::digamma(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t trigamma(T x){ return boost::math::trigamma(x, Policy()); }\ From 8c97086236907d3893ba9ec99c2176606a4444a7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:21:47 -0400 Subject: [PATCH 07/37] Add digamma CUDA testing --- test/cuda_jamfile | 2 + test/test_digamma_double.cu | 100 ++++++++++++++++++++++++++++++++++++ test/test_digamma_float.cu | 100 ++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 test/test_digamma_double.cu create mode 100644 test/test_digamma_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 52228ab55..42b827cc2 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -39,6 +39,8 @@ run test_cbrt_double.cu ; run test_cbrt_float.cu ; run test_changesign_double.cu ; run test_changesign_float.cu ; +run test_digamma_double.cu ; +run test_digamma_float.cu ; run test_expm1_double.cu ; run test_expm1_float.cu ; run test_log1p_double.cu ; diff --git a/test/test_digamma_double.cu b/test/test_digamma_double.cu new file mode 100644 index 000000000..c88fe153c --- /dev/null +++ b/test/test_digamma_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::digamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::digamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_digamma_float.cu b/test/test_digamma_float.cu new file mode 100644 index 000000000..ea1b1c68e --- /dev/null +++ b/test/test_digamma_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::digamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::digamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 9fb4677b1899828f6178b720b256aaf3008f415d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 10:28:52 -0400 Subject: [PATCH 08/37] Add sycl test of digamma --- test/sycl_jamfile | 5 ++-- test/test_digamma_simple.cpp | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 test/test_digamma_simple.cpp diff --git a/test/sycl_jamfile b/test/sycl_jamfile index 1b0da5ca6..3354a776f 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -18,5 +18,6 @@ run test_beta_simple.cpp ; run test_cbrt.cpp ; run test_sign.cpp ; run test_round.cpp ; -run test_expm1_simple.cpp; -run test_log1p_simple.cpp; +run test_expm1_simple.cpp ; +run test_log1p_simple.cpp ; +run test_digamma_simple.cpp ; diff --git a/test/test_digamma_simple.cpp b/test/test_digamma_simple.cpp new file mode 100644 index 000000000..bbe003a01 --- /dev/null +++ b/test/test_digamma_simple.cpp @@ -0,0 +1,50 @@ +// (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include "math_unit_test.hpp" + +template +void test_spots(T, const char* t) +{ + std::cout << "Testing basic sanity checks for type " << t << std::endl; + // + // Basic sanity checks, tolerance is 3 epsilon: + // + T tolerance = 3; + // + // Special tolerance (200eps) for when we're very near the root, + // and T has more than 64-bits in it's mantissa: + // + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(0.125)), static_cast(-8.3884926632958548678027429230863430000514460424495L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(0.5)), static_cast(-1.9635100260214234794409763329987555671931596046604L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1)), static_cast(-0.57721566490153286060651209008240243104215933593992L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1.5)), static_cast(0.036489973978576520559023667001244432806840395339566L), tolerance * 40); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(1.5) - static_cast(1)/32), static_cast(0.00686541147073577672813890866512415766586241385896200579891429L), tolerance * 200); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(2)), static_cast(0.42278433509846713939348790991759756895784066406008L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(8)), static_cast(2.0156414779556099965363450527747404261006978069172L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(12)), static_cast(2.4426616799758120167383652547949424463027180089374L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(22)), static_cast(3.0681430398611966699248760264450329818421699570581L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(50)), static_cast(3.9019896734278921969539597028823666609284424880275L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(500)), static_cast(6.2136077650889917423827750552855712637776544784569L), tolerance); + // + // negative values: + // + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-0.125)), static_cast(7.1959829284523046176757814502538535827603450463013L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-10.125)), static_cast(9.9480538258660761287008034071425343357982429855241L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-10.875)), static_cast(-5.1527360383841562620205965901515879492020193154231L), tolerance); + CHECK_ULP_CLOSE(::boost::math::digamma(static_cast(-1.5)), static_cast(0.70315664064524318722569033366791109947350706200623L), tolerance); +} + +int main() +{ + test_spots(0.0F, "float"); + test_spots(0.0, "double"); + + return boost::math::test::report_errors(); +} + + From 6c1e9d7ce31d309e8d4b41258eeb49722015531a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:11:18 -0400 Subject: [PATCH 09/37] Begin adding our own definition of numeric_limits --- include/boost/math/tools/numeric_limits.hpp | 117 ++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 include/boost/math/tools/numeric_limits.hpp diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp new file mode 100644 index 000000000..0ca02812a --- /dev/null +++ b/include/boost/math/tools/numeric_limits.hpp @@ -0,0 +1,117 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Regular use of std::numeric_limits functions can not be used on +// GPU platforms like CUDA since they are missing the __device__ marker +// and libcu++ does not provide something analogous. +// Rather than using giant if else blocks make our own version of numeric limits + +#include +#include +#include +#include + +namespace boost { +namespace math { + +template +struct numeric_limits : public std::numeric_limits {}; + +#ifdef BOOST_MATH_HAS_GPU_SUPPORT + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min) () { return FLT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max) () { return FLT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest () { return -FLT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon () { return FLT_EPSILON; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error () { return 0.5F; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity () { return static_cast(INFINITY); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN () { return static_cast(NAN); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () + { + #ifdef FLT_SNAN + return FLT_SNAN; + #else + return static_cast(NAN); + #endif + } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min () { return FLT_TRUE_MIN; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min) () { return DBL_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max) () { return DBL_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest () { return -DBL_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon () { return DBL_EPSILON; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error () { return 0.5; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity () { return static_cast(INFINITY); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN () { return static_cast(NAN); } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () + { + #ifdef DBL_SNAN + return DBL_SNAN; + #else + return static_cast(NAN); + #endif + } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min () { return DBL_TRUE_MIN; } +}; + +#endif + +} // namespace math +} // namespace boost From 1629e75c2c9d47d3131d87fe64ed86e652e8a69d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:11:43 -0400 Subject: [PATCH 10/37] Make pow GPU compatible --- .../boost/math/special_functions/math_fwd.hpp | 6 +++--- include/boost/math/special_functions/pow.hpp | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index e481692f1..2886621bb 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -1091,10 +1091,10 @@ namespace boost // pow: template - BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base, const Policy& policy); + BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base, const Policy& policy); template - BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base); + BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t pow(T base); // next: template @@ -1625,7 +1625,7 @@ template \ BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t pow(T v){ return boost::math::pow(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t pow(T v){ return boost::math::pow(v, Policy()); }\ \ template T nextafter(const T& a, const T& b){ return static_cast(boost::math::nextafter(a, b, Policy())); }\ template T float_next(const T& a){ return static_cast(boost::math::float_next(a, Policy())); }\ diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp index 9c6488997..a056f6daf 100644 --- a/include/boost/math/special_functions/pow.hpp +++ b/include/boost/math/special_functions/pow.hpp @@ -2,6 +2,7 @@ // Computes a power with exponent known at compile-time // (C) Copyright Bruno Lalande 2008. +// (C) Copyright Matt Borland 2024. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -12,7 +13,7 @@ #ifndef BOOST_MATH_POW_HPP #define BOOST_MATH_POW_HPP - +#include #include #include #include @@ -34,7 +35,7 @@ template struct positive_power { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base) { T power = positive_power::result(base); return power * power; @@ -45,7 +46,7 @@ template struct positive_power { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base) { T power = positive_power::result(base); return base * power * power; @@ -56,7 +57,7 @@ template <> struct positive_power<1, 1> { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; } + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; } }; @@ -64,7 +65,7 @@ template struct power_if_positive { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&) + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&) { return positive_power::result(base); } }; @@ -72,7 +73,7 @@ template struct power_if_positive { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) { if (base == 0) { @@ -91,7 +92,7 @@ template <> struct power_if_positive<0, true> { template - static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) { if (base == 0) { @@ -120,14 +121,14 @@ struct select_power_if_positive template -BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base, const Policy& policy) +BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base, const Policy& policy) { using result_type = typename tools::promote_args::type; return detail::select_power_if_positive::type::result(static_cast(base), policy); } template -BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base) +BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base) { return pow(base, policies::policy<>()); } #ifdef _MSC_VER From c887a74a7207c08388a941fb3b0ab87b246024ed Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:12:06 -0400 Subject: [PATCH 11/37] Make sin_pi GPU compatible --- include/boost/math/special_functions/math_fwd.hpp | 6 +++--- include/boost/math/special_functions/sin_pi.hpp | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index 2886621bb..f2af616ab 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -858,10 +858,10 @@ namespace boost const Policy&); template - tools::promote_args_t sin_pi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sin_pi(T x, const Policy&); template - tools::promote_args_t sin_pi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t sin_pi(T x); template tools::promote_args_t cos_pi(T x, const Policy&); @@ -1572,7 +1572,7 @@ template \ { boost::math::cyl_neumann_zero(v, start_index, number_of_zeros, out_it, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\ diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp index 5b8eb6fcf..d0739fabd 100644 --- a/include/boost/math/special_functions/sin_pi.hpp +++ b/include/boost/math/special_functions/sin_pi.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2007 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,7 +13,9 @@ #include #include +#include #include +#include #include #include #include @@ -21,7 +24,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline T sin_pi_imp(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names if(x < 0) @@ -39,7 +42,7 @@ inline T sin_pi_imp(T x, const Policy& pol) invert = false; T rem = floor(x); - if(abs(floor(rem/2)*2 - rem) > std::numeric_limits::epsilon()) + if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits::epsilon()) { invert = !invert; } @@ -56,7 +59,7 @@ inline T sin_pi_imp(T x, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type sin_pi(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sin_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; From 5f4ab0d05253e177b7d08fe5c0e0c1e514e567c3 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:12:23 -0400 Subject: [PATCH 12/37] Make trigamma GPU compatible --- .../boost/math/special_functions/math_fwd.hpp | 6 +-- .../boost/math/special_functions/trigamma.hpp | 52 ++++++++++--------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index f2af616ab..6c8b64507 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -540,10 +540,10 @@ namespace boost // trigamma: template - tools::promote_args_t trigamma(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t trigamma(T x); template - tools::promote_args_t trigamma(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t trigamma(T x, const Policy&); // polygamma: template @@ -1458,7 +1458,7 @@ namespace boost BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t digamma(T x){ return boost::math::digamma(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t trigamma(T x){ return boost::math::trigamma(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t trigamma(T x){ return boost::math::trigamma(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t polygamma(int n, T x){ return boost::math::polygamma(n, x, Policy()); }\ diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index f74b43db1..bca334df9 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -10,6 +11,7 @@ #pragma once #endif +#include #include #include #include @@ -37,11 +39,11 @@ template T polygamma_imp(const int n, T x, const Policy &pol); template -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant*, const Policy&) { // Max error in interpolated form: 3.736e-017 - static const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); - static const T P_1_2[] = { + constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); + BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045), BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321), BOOST_MATH_BIG_CONSTANT(T, 53, -3.3703848401898283), @@ -49,7 +51,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, 1.6638069578676164), BOOST_MATH_BIG_CONSTANT(T, 53, 0.64468386819102836), }; - static const T Q_1_2[] = { + BOOST_MATH_STATIC const T Q_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 3.4535389668541151), BOOST_MATH_BIG_CONSTANT(T, 53, 4.5208926987851437), @@ -58,7 +60,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, -0.20314516859987728e-6), }; // Max error in interpolated form: 1.159e-017 - static const T P_2_4[] = { + BOOST_MATH_STATIC const T P_2_4[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -0.13803835004508849e-7), BOOST_MATH_BIG_CONSTANT(T, 53, 0.50000049158540261), BOOST_MATH_BIG_CONSTANT(T, 53, 1.6077979838469348), @@ -66,7 +68,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 53, 2.0534873203680393), BOOST_MATH_BIG_CONSTANT(T, 53, 0.74566981111565923), }; - static const T Q_2_4[] = { + BOOST_MATH_STATIC const T Q_2_4[] = { BOOST_MATH_BIG_CONSTANT(T, 53, 1.0), BOOST_MATH_BIG_CONSTANT(T, 53, 2.8822787662376169), BOOST_MATH_BIG_CONSTANT(T, 53, 4.1681660554090917), @@ -77,7 +79,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) // Maximum Deviation Found: 6.896e-018 // Expected Error Term : -6.895e-018 // Maximum Relative Change in Control Points : 8.497e-004 - static const T P_4_inf[] = { + BOOST_MATH_STATIC const T P_4_inf[] = { static_cast(0.68947581948701249e-17L), static_cast(0.49999999999998975L), static_cast(1.0177274392923795L), @@ -86,7 +88,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) static_cast(1.5897035272532764L), static_cast(0.40154388356961734L), }; - static const T Q_4_inf[] = { + BOOST_MATH_STATIC const T Q_4_inf[] = { static_cast(1.0L), static_cast(1.7021215452463932L), static_cast(4.4290431747556469L), @@ -110,11 +112,11 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) } template -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant*, const Policy&) { // Max error in interpolated form: 1.178e-020 - static const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); - static const T P_1_2[] = { + constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); + BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341), BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052), BOOST_MATH_BIG_CONSTANT(T, 64, -4.63865531898487734531), @@ -123,7 +125,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 1.21172611429185622377), BOOST_MATH_BIG_CONSTANT(T, 64, 0.259635673503366427284), }; - static const T Q_1_2[] = { + BOOST_MATH_STATIC const T Q_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 3.77521119359546982995), BOOST_MATH_BIG_CONSTANT(T, 64, 5.664338024578956321), @@ -133,7 +135,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 0.629642219810618032207e-8), }; // Max error in interpolated form: 3.912e-020 - static const T P_2_8[] = { + BOOST_MATH_STATIC const T P_2_8[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.387540035162952880976e-11), BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000276430504), BOOST_MATH_BIG_CONSTANT(T, 64, 3.21926880986360957306), @@ -143,7 +145,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 13.4346512182925923978), BOOST_MATH_BIG_CONSTANT(T, 64, 3.98656291026448279118), }; - static const T Q_2_8[] = { + BOOST_MATH_STATIC const T Q_2_8[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, 6.10520430478613667724), BOOST_MATH_BIG_CONSTANT(T, 64, 18.475001060603645512), @@ -156,7 +158,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) // Maximum Deviation Found: 2.635e-020 // Expected Error Term : 2.635e-020 // Maximum Relative Change in Control Points : 1.791e-003 - static const T P_8_inf[] = { + BOOST_MATH_STATIC const T P_8_inf[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -0.263527875092466899848e-19), BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000000000058145), BOOST_MATH_BIG_CONSTANT(T, 64, 0.0730121433777364138677), @@ -164,7 +166,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) BOOST_MATH_BIG_CONSTANT(T, 64, 0.0517092358874932620529), BOOST_MATH_BIG_CONSTANT(T, 64, 1.07995383547483921121), }; - static const T Q_8_inf[] = { + BOOST_MATH_STATIC const T Q_8_inf[] = { BOOST_MATH_BIG_CONSTANT(T, 64, 1.0), BOOST_MATH_BIG_CONSTANT(T, 64, -0.187309046577818095504), BOOST_MATH_BIG_CONSTANT(T, 64, 3.95255391645238842975), @@ -357,7 +359,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) } template -T trigamma_imp(T x, const Tag* t, const Policy& pol) +BOOST_MATH_GPU_ENABLED T trigamma_imp(T x, const Tag* t, const Policy& pol) { // // This handles reflection of negative arguments, and all our @@ -402,22 +404,24 @@ struct trigamma_initializer { struct init { - init() + BOOST_MATH_GPU_ENABLED init() { typedef typename policies::precision::type precision_type; do_init(std::integral_constant()); } - void do_init(const std::true_type&) + BOOST_MATH_GPU_ENABLED void do_init(const std::true_type&) { boost::math::trigamma(T(2.5), Policy()); } - void do_init(const std::false_type&){} - void force_instantiate()const{} + BOOST_MATH_GPU_ENABLED void do_init(const std::false_type&){} + BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; - static void force_instantiate() + BOOST_MATH_GPU_ENABLED static void force_instantiate() { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT initializer.force_instantiate(); + #endif } }; @@ -427,7 +431,7 @@ const typename trigamma_initializer::init trigamma_initializer -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type trigamma(T x, const Policy&) { typedef typename tools::promote_args::type result_type; @@ -455,7 +459,7 @@ inline typename tools::promote_args::type } template -inline typename tools::promote_args::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type trigamma(T x) { return trigamma(x, policies::policy<>()); From 525b18c55f5a90164b0ce41eeb643dee1d0d5bc1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:12:35 -0400 Subject: [PATCH 13/37] Add trigamma CUDA testing --- test/cuda_jamfile | 2 + test/test_trigamma_double.cu | 100 +++++++++++++++++++++++++++++++++++ test/test_trigamma_float.cu | 100 +++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 test/test_trigamma_double.cu create mode 100644 test/test_trigamma_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 42b827cc2..490be5772 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -49,5 +49,7 @@ run test_modf_double.cu ; run test_modf_float.cu ; run test_round_double.cu ; run test_round_float.cu ; +run test_trigamma_double.cu ; +run test_trigamma_float.cu ; run test_trunc_double.cu ; run test_trunc_float.cu ; diff --git a/test/test_trigamma_double.cu b/test/test_trigamma_double.cu new file mode 100644 index 000000000..6780e3e92 --- /dev/null +++ b/test/test_trigamma_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trigamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::trigamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trigamma_float.cu b/test/test_trigamma_float.cu new file mode 100644 index 000000000..a407a0eb1 --- /dev/null +++ b/test/test_trigamma_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trigamma(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::trigamma(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From db7fb655a5b735d28372d2ea633f7436e6ef7214 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:18:20 -0400 Subject: [PATCH 14/37] Rename dispatching function --- include/boost/math/special_functions/trigamma.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index bca334df9..c4c289515 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -359,7 +359,7 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) } template -BOOST_MATH_GPU_ENABLED T trigamma_imp(T x, const Tag* t, const Policy& pol) +BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol) { // // This handles reflection of negative arguments, and all our @@ -453,7 +453,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type // Force initialization of constants: detail::trigamma_initializer::force_instantiate(); - return policies::checked_narrowing_cast(detail::trigamma_imp( + return policies::checked_narrowing_cast(detail::trigamma_dispatch( static_cast(x), static_cast(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)"); } From c049fabe60f97b579ce19c1d6bb9bcdb6aa4a95a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:23:04 -0400 Subject: [PATCH 15/37] Eliminate recursion in sin_pi for SYCL --- include/boost/math/special_functions/sin_pi.hpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp index d0739fabd..3c558ab7b 100644 --- a/include/boost/math/special_functions/sin_pi.hpp +++ b/include/boost/math/special_functions/sin_pi.hpp @@ -27,8 +27,6 @@ template BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol) { BOOST_MATH_STD_USING // ADL of std names - if(x < 0) - return -sin_pi_imp(T(-x), pol); // sin of pi*x: if(x < T(0.5)) return sin(constants::pi() * x); @@ -56,6 +54,19 @@ BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol) return invert ? T(-rem) : rem; } +template +BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T sin_pi_dispatch(T x, const Policy& pol) +{ + if (x < 0) + { + return -sin_pi_imp(-x, pol); + } + else + { + return sin_pi_imp(x, pol); + } +} + } // namespace detail template @@ -72,7 +83,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type sin_pi(T x, // We want to ignore overflows since the result is in [-1,1] and the // check slows the code down considerably. policies::overflow_error >::type forwarding_policy; - return policies::checked_narrowing_cast(boost::math::detail::sin_pi_imp(x, forwarding_policy()), "sin_pi"); + return policies::checked_narrowing_cast(boost::math::detail::sin_pi_dispatch(x, forwarding_policy()), "sin_pi"); } template From cfc1ad0351405abb99530d5b8a825fb4356618be Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:25:55 -0400 Subject: [PATCH 16/37] Remove recursion from trigamma --- include/boost/math/special_functions/trigamma.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index c4c289515..73dbc998b 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -375,13 +375,20 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol) { // Reflect: T z = 1 - x; + + if(z < 1) + { + result = 1 / (z * z); + z += 1; + } + // Argument reduction for tan: if(floor(x) == x) { return policies::raise_pole_error("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol); } T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol); - return -trigamma_imp(z, t, pol) + boost::math::pow<2>(constants::pi()) / (s * s); + return result - trigamma_prec(z, t, pol) + boost::math::pow<2>(constants::pi()) / (s * s); } if(x < 1) { From 0fe35eb4b839acb0709b2d0b63904688fcff54ec Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:26:03 -0400 Subject: [PATCH 17/37] Add trigamma SYCL testing --- test/sycl_jamfile | 1 + test/test_trigamma.cpp | 4 ++++ test/test_trigamma.hpp | 6 ++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/sycl_jamfile b/test/sycl_jamfile index 3354a776f..d9e7c9296 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -21,3 +21,4 @@ run test_round.cpp ; run test_expm1_simple.cpp ; run test_log1p_simple.cpp ; run test_digamma_simple.cpp ; +run test_trigamma.cpp ; diff --git a/test/test_trigamma.cpp b/test/test_trigamma.cpp index dd89898d7..85ba8078a 100644 --- a/test/test_trigamma.cpp +++ b/test/test_trigamma.cpp @@ -1,9 +1,13 @@ // (C) Copyright John Maddock 2014. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif + #include "test_trigamma.hpp" void expected_results() diff --git a/test/test_trigamma.hpp b/test/test_trigamma.hpp index 94a129032..49b1bd550 100644 --- a/test/test_trigamma.hpp +++ b/test/test_trigamma.hpp @@ -1,4 +1,5 @@ -// Copyright John Maddock 2014 +// Copyright John Maddock 2014 +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,9 +10,10 @@ #include #define BOOST_TEST_MAIN #include +#include +#include "../include_private/boost/math/tools/test.hpp" #include #include -#include #include #include #include From 48f8734b4cfddf3bbc05e1b564af018c0a1d6cce Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:40:14 -0400 Subject: [PATCH 18/37] Add sycl testing of pow --- test/pow_test.cpp | 5 ++++- test/sycl_jamfile | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/pow_test.cpp b/test/pow_test.cpp index ce3d036ab..9180ccab5 100644 --- a/test/pow_test.cpp +++ b/test/pow_test.cpp @@ -2,6 +2,7 @@ // Tests the pow function // (C) Copyright Bruno Lalande 2008. +// (C) Copyright Matt Borland 2024. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -11,7 +12,7 @@ #include #include -#include +#include "../include_private/boost/math/tools/test.hpp" #define BOOST_TEST_MAIN #include #include @@ -37,7 +38,9 @@ void test_pow(T base) if ((base == 0) && N < 0) { + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(math::pow(base), std::overflow_error); + #endif } else { diff --git a/test/sycl_jamfile b/test/sycl_jamfile index d9e7c9296..c0c67d1e5 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -14,6 +14,7 @@ run test_arcsine.cpp ; run test_cauchy.cpp ; # Special Functions +run pow_test.cpp ; run test_beta_simple.cpp ; run test_cbrt.cpp ; run test_sign.cpp ; From b51d31294fe50ca1c1683cad7b87da65b4fddead Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 11:56:12 -0400 Subject: [PATCH 19/37] Add CUDA sin_pi testing --- test/cuda_jamfile | 2 + test/test_sin_pi_double.cu | 100 +++++++++++++++++++++++++++++++++++++ test/test_sin_pi_float.cu | 100 +++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 test/test_sin_pi_double.cu create mode 100644 test/test_sin_pi_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 490be5772..97102bcb1 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -49,6 +49,8 @@ run test_modf_double.cu ; run test_modf_float.cu ; run test_round_double.cu ; run test_round_float.cu ; +run test_sin_pi_double.cu ; +run test_sin_pi_float.cu ; run test_trigamma_double.cu ; run test_trigamma_float.cu ; run test_trunc_double.cu ; diff --git a/test/test_sin_pi_double.cu b/test/test_sin_pi_double.cu new file mode 100644 index 000000000..0783d5536 --- /dev/null +++ b/test/test_sin_pi_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sin_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sin_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_sin_pi_float.cu b/test/test_sin_pi_float.cu new file mode 100644 index 000000000..9a9f07580 --- /dev/null +++ b/test/test_sin_pi_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::sin_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::sin_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From e4519a7b5191d421cef890fb51ba9ff6958330fa Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 13:15:47 -0400 Subject: [PATCH 20/37] Pass integral_constant directly instead of creating pointer --- .../boost/math/special_functions/trigamma.hpp | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index 73dbc998b..5d10135b9 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -11,6 +11,7 @@ #pragma once #endif +#include #include #include #include @@ -39,7 +40,13 @@ template T polygamma_imp(const int n, T x, const Policy &pol); template -BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant*, const Policy&) +T trigamma_prec(T x, const Policy& pol, const std::integral_constant&) +{ + return polygamma_imp(1, x, pol); +} + +template +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) { // Max error in interpolated form: 3.736e-017 constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); @@ -112,7 +119,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant -BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant*, const Policy&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) { // Max error in interpolated form: 1.178e-020 constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); @@ -190,7 +197,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const std::integral_constant -T trigamma_prec(T x, const std::integral_constant*, const Policy&) +T trigamma_prec(T x, const Policy&, const std::integral_constant&) { // Max error in interpolated form: 1.916e-035 @@ -358,8 +365,8 @@ T trigamma_prec(T x, const std::integral_constant*, const Policy&) return (1 + tools::evaluate_polynomial(P_16_inf, y) / tools::evaluate_polynomial(Q_16_inf, y)) / x; } -template -BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol) +template +BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& tag) { // // This handles reflection of negative arguments, and all our @@ -388,21 +395,16 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Tag* t, const Policy& pol) return policies::raise_pole_error("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol); } T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol); - return result - trigamma_prec(z, t, pol) + boost::math::pow<2>(constants::pi()) / (s * s); + return result - trigamma_prec(z, pol, tag) + boost::math::pow<2>(constants::pi()) / (s * s); } if(x < 1) { result = 1 / (x * x); x += 1; } - return result + trigamma_prec(x, t, pol); + return result + trigamma_prec(x, pol, tag); } -template -T trigamma_imp(T x, const std::integral_constant*, const Policy& pol) -{ - return polygamma_imp(1, x, pol); -} // // Initializer: ensure all our constants are initialized prior to the first call of main: // @@ -462,7 +464,8 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type return policies::checked_narrowing_cast(detail::trigamma_dispatch( static_cast(x), - static_cast(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)"); + forwarding_policy(), + tag_type()), "boost::math::trigamma<%1%>(%1%)"); } template From 20f17d48819ae865d68283c88da89573a0743022 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 14:29:16 -0400 Subject: [PATCH 21/37] Add missing include guards for numeric limits --- include/boost/math/tools/numeric_limits.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp index 0ca02812a..acfd476e4 100644 --- a/include/boost/math/tools/numeric_limits.hpp +++ b/include/boost/math/tools/numeric_limits.hpp @@ -8,6 +8,9 @@ // and libcu++ does not provide something analogous. // Rather than using giant if else blocks make our own version of numeric limits +#ifndef BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP +#define BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP + #include #include #include @@ -115,3 +118,5 @@ struct numeric_limits } // namespace math } // namespace boost + +#endif From 85e2e4c0f4a96cfd07731d680f5391d1ab994396 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 14:29:56 -0400 Subject: [PATCH 22/37] Make cos_pi GPU capable --- include/boost/math/special_functions/cos_pi.hpp | 12 +++++++----- include/boost/math/special_functions/math_fwd.hpp | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/boost/math/special_functions/cos_pi.hpp b/include/boost/math/special_functions/cos_pi.hpp index e09700ec5..5a7c742b4 100644 --- a/include/boost/math/special_functions/cos_pi.hpp +++ b/include/boost/math/special_functions/cos_pi.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2007 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -12,8 +13,9 @@ #include #include -#include #include +#include +#include #include #include #include @@ -21,7 +23,7 @@ namespace boost{ namespace math{ namespace detail{ template -T cos_pi_imp(T x, const Policy&) +BOOST_MATH_GPU_ENABLED T cos_pi_imp(T x, const Policy&) { BOOST_MATH_STD_USING // ADL of std names // cos of pi*x: @@ -34,7 +36,7 @@ T cos_pi_imp(T x, const Policy&) x = -x; } T rem = floor(x); - if(abs(floor(rem/2)*2 - rem) > std::numeric_limits::epsilon()) + if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits::epsilon()) { invert = !invert; } @@ -60,7 +62,7 @@ T cos_pi_imp(T x, const Policy&) } // namespace detail template -inline typename tools::promote_args::type cos_pi(T x, const Policy&) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cos_pi(T x, const Policy&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -77,7 +79,7 @@ inline typename tools::promote_args::type cos_pi(T x, const Policy&) } template -inline typename tools::promote_args::type cos_pi(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cos_pi(T x) { return boost::math::cos_pi(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index 6c8b64507..5e8153734 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -864,10 +864,10 @@ namespace boost BOOST_MATH_GPU_ENABLED tools::promote_args_t sin_pi(T x); template - tools::promote_args_t cos_pi(T x, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cos_pi(T x, const Policy&); template - tools::promote_args_t cos_pi(T x); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cos_pi(T x); template int fpclassify BOOST_NO_MACRO_EXPAND(T t); @@ -1575,7 +1575,7 @@ template \ BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\ \ using boost::math::fpclassify;\ using boost::math::isfinite;\ From 3c598df2413707b2f7f6fa4f38e6076711059150 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 14:30:08 -0400 Subject: [PATCH 23/37] Add cos_pi CUDA testing --- test/cuda_jamfile | 2 + test/test_cos_pi_double.cu | 100 +++++++++++++++++++++++++++++++++++++ test/test_cos_pi_float.cu | 100 +++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 test/test_cos_pi_double.cu create mode 100644 test/test_cos_pi_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 97102bcb1..709289389 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -39,6 +39,8 @@ run test_cbrt_double.cu ; run test_cbrt_float.cu ; run test_changesign_double.cu ; run test_changesign_float.cu ; +run test_cos_pi_double.cu ; +run test_cos_pi_float.cu ; run test_digamma_double.cu ; run test_digamma_float.cu ; run test_expm1_double.cu ; diff --git a/test/test_cos_pi_double.cu b/test/test_cos_pi_double.cu new file mode 100644 index 000000000..5a66b25ce --- /dev/null +++ b/test/test_cos_pi_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cos_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cos_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cos_pi_float.cu b/test/test_cos_pi_float.cu new file mode 100644 index 000000000..6a04d8e04 --- /dev/null +++ b/test/test_cos_pi_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cos_pi(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cos_pi(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 502f04fe45ce7cce102adca5a8da102c231e72fa Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 14:49:34 -0400 Subject: [PATCH 24/37] Add all the integer types to numeric limits --- include/boost/math/tools/numeric_limits.hpp | 306 +++++++++++++++++++- 1 file changed, 305 insertions(+), 1 deletion(-) diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp index acfd476e4..c49d50bd8 100644 --- a/include/boost/math/tools/numeric_limits.hpp +++ b/include/boost/math/tools/numeric_limits.hpp @@ -114,7 +114,311 @@ struct numeric_limits BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min () { return DBL_TRUE_MIN; } }; -#endif +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min) () { return SHRT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max) () { return SHRT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest () { return SHRT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max) () { return USHRT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min) () { return INT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max) () { return INT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest () { return INT_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max) () { return UINT_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min) () { return LONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max) () { return LONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest () { return LONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max) () { return ULONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min) () { return LLONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max) () { return LLONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest () { return LLONG_MIN; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min () { return 0; } +}; + +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min) () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max) () { return ULLONG_MAX; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min () { return 0; } +}; + +#endif // BOOST_MATH_HAS_GPU_SUPPORT } // namespace math } // namespace boost From 8a5796aead8e1ee7e95bf84acc3777b3e1d9c69e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 14:51:37 -0400 Subject: [PATCH 25/37] Simplify error handling using the new numeric limits --- .../boost/math/policies/error_handling.hpp | 76 ++++--------------- 1 file changed, 15 insertions(+), 61 deletions(-) diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index 36ec8d9d6..f3811d919 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -1,6 +1,6 @@ // Copyright John Maddock 2007. // Copyright Paul A. Bristow 2007. - +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,6 +9,7 @@ #define BOOST_MATH_POLICY_ERROR_HANDLING_HPP #include +#include #include #include #include @@ -304,7 +305,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( #else raise_error(function, message ? message : "numeric overflow"); // We should never get here: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); #endif } @@ -320,7 +321,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( #else raise_error(function, message ? message : "numeric overflow", val); // We should never get here: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); #endif } @@ -332,7 +333,7 @@ BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } #ifdef BOOST_MATH_HAS_GPU_SUPPORT @@ -370,7 +371,7 @@ BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template @@ -382,7 +383,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template @@ -395,7 +396,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - return std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : boost::math::tools::max_value(); + return boost::math::numeric_limits::has_infinity ? boost::math::numeric_limits::infinity() : boost::math::tools::max_value(); } template @@ -404,7 +405,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( const char* message, const ::boost::math::policies::overflow_error< ::boost::math::policies::user_error>&) { - return user_overflow_error(function, message, std::numeric_limits::infinity()); + return user_overflow_error(function, message, boost::math::numeric_limits::infinity()); } template @@ -418,7 +419,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_overflow_error( std::string sval = prec_format(val); replace_all_in_string(m, "%1%", sval.c_str()); - return user_overflow_error(function, m.c_str(), std::numeric_limits::infinity()); + return user_overflow_error(function, m.c_str(), boost::math::numeric_limits::infinity()); } template @@ -597,55 +598,8 @@ BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error( { // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: - static_assert(std::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); - #ifndef BOOST_MATH_HAS_GPU_SUPPORT - return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); - #else - BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? SHRT_MAX : SHRT_MIN; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? INT_MAX : INT_MIN; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? LONG_MAX : LONG_MIN; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? LLONG_MAX : LLONG_MIN; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? USHRT_MAX : static_cast(0U); - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? UINT_MAX : 0U; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? ULONG_MAX : 0UL; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? ULLONG_MAX : 0ULL; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? FLT_MAX : -FLT_MAX; - } - else BOOST_IF_CONSTEXPR (std::is_same::value) - { - return val > 0 ? DBL_MAX : -DBL_MAX; - } - else - { - return val > 0 ? static_cast(-1) : static_cast(1); - } - #endif + static_assert(boost::math::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + return val > 0 ? (boost::math::numeric_limits::max)() : (boost::math::numeric_limits::is_integer ? (boost::math::numeric_limits::min)() : -(boost::math::numeric_limits::max)()); } template @@ -659,8 +613,8 @@ BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error( errno = ERANGE; // This may or may not do the right thing, but the user asked for the error // to be silent so here we go anyway: - static_assert(std::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); - return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); + static_assert(boost::math::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + return val > 0 ? (boost::math::numeric_limits::max)() : (boost::math::numeric_limits::is_integer ? (boost::math::numeric_limits::min)() : -(boost::math::numeric_limits::max)()); } template BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error( @@ -686,7 +640,7 @@ BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error( #else raise_error(function, message, val); // we never get here: - return std::numeric_limits::quiet_NaN(); + return boost::math::numeric_limits::quiet_NaN(); #endif } From 0210fa39bc9d2534ddfa8980d5c891c8a9b61031 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:26:40 -0400 Subject: [PATCH 26/37] Add GPU support to derived accessors --- .../detail/derived_accessors.hpp | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/include/boost/math/distributions/detail/derived_accessors.hpp b/include/boost/math/distributions/detail/derived_accessors.hpp index eb76409a1..9d39f2d59 100644 --- a/include/boost/math/distributions/detail/derived_accessors.hpp +++ b/include/boost/math/distributions/detail/derived_accessors.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2006. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -29,6 +30,7 @@ #include #include +#include #ifdef _MSC_VER # pragma warning(push) @@ -39,24 +41,24 @@ namespace boost{ namespace math{ template -typename Distribution::value_type variance(const Distribution& dist); +BOOST_MATH_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist); template -inline typename Distribution::value_type standard_deviation(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type standard_deviation(const Distribution& dist) { BOOST_MATH_STD_USING // ADL of sqrt. return sqrt(variance(dist)); } template -inline typename Distribution::value_type variance(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type variance(const Distribution& dist) { typename Distribution::value_type result = standard_deviation(dist); return result * result; } template -inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x) { // hazard function // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ typedef typename Distribution::value_type value_type; @@ -75,7 +77,7 @@ inline typename Distribution::value_type hazard(const Distribution& dist, const } template -inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x) { // cumulative hazard function. // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ BOOST_MATH_STD_USING @@ -83,7 +85,7 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } template -inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist) { typedef typename Distribution::value_type value_type; typedef typename Distribution::policy_type policy_type; @@ -104,33 +106,33 @@ inline typename Distribution::value_type coefficient_of_variation(const Distribu // implementation with all arguments of the same type: // template -inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return pdf(dist, static_cast(x)); } template -inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x) { using std::log; typedef typename Distribution::value_type value_type; return log(pdf(dist, static_cast(x))); } template -inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return cdf(dist, static_cast(x)); } template -inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x) { using std::log; using value_type = typename Distribution::value_type; return log(cdf(dist, static_cast(x))); } template -inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x) { typedef typename Distribution::value_type value_type; return quantile(dist, static_cast(x)); @@ -144,14 +146,14 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea } */ template -inline typename Distribution::value_type cdf(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return cdf(complement(c.dist, static_cast(c.param))); } template -inline typename Distribution::value_type logcdf(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const complemented2_type& c) { using std::log; typedef typename Distribution::value_type value_type; @@ -159,14 +161,14 @@ inline typename Distribution::value_type logcdf(const complemented2_type -inline typename Distribution::value_type quantile(const complemented2_type& c) +BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const complemented2_type& c) { typedef typename Distribution::value_type value_type; return quantile(complement(c.dist, static_cast(c.param))); } template -inline typename Dist::value_type median(const Dist& d) +BOOST_MATH_GPU_ENABLED inline typename Dist::value_type median(const Dist& d) { // median - default definition for those distributions for which a // simple closed form is not known, // and for which a domain_error and/or NaN generating function is NOT defined. From 226e42ef09aefdef3b4ed620817a677e141fe9eb Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:26:54 -0400 Subject: [PATCH 27/37] Add GPU support to complement distributions --- .../boost/math/distributions/complement.hpp | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp index 5c062a7cd..c63b8a504 100644 --- a/include/boost/math/distributions/complement.hpp +++ b/include/boost/math/distributions/complement.hpp @@ -1,5 +1,6 @@ // (C) Copyright John Maddock 2006. // (C) Copyright Paul A. Bristow 2006. +// (C) Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -7,6 +8,8 @@ #ifndef BOOST_STATS_COMPLEMENT_HPP #define BOOST_STATS_COMPLEMENT_HPP +#include + // // This code really defines our own tuple type. // It would be nice to reuse boost::math::tuple @@ -19,7 +22,7 @@ namespace boost{ namespace math{ template struct complemented2_type { - complemented2_type( + BOOST_MATH_GPU_ENABLED complemented2_type( const Dist& d, const RealType& p1) : dist(d), @@ -35,7 +38,7 @@ struct complemented2_type template struct complemented3_type { - complemented3_type( + BOOST_MATH_GPU_ENABLED complemented3_type( const Dist& d, const RealType1& p1, const RealType2& p2) @@ -53,7 +56,7 @@ struct complemented3_type template struct complemented4_type { - complemented4_type( + BOOST_MATH_GPU_ENABLED complemented4_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -74,7 +77,7 @@ struct complemented4_type template struct complemented5_type { - complemented5_type( + BOOST_MATH_GPU_ENABLED complemented5_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -98,7 +101,7 @@ struct complemented5_type template struct complemented6_type { - complemented6_type( + BOOST_MATH_GPU_ENABLED complemented6_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -125,7 +128,7 @@ struct complemented6_type template struct complemented7_type { - complemented7_type( + BOOST_MATH_GPU_ENABLED complemented7_type( const Dist& d, const RealType1& p1, const RealType2& p2, @@ -153,37 +156,37 @@ struct complemented7_type }; template -inline complemented2_type complement(const Dist& d, const RealType& r) +BOOST_MATH_GPU_ENABLED inline complemented2_type complement(const Dist& d, const RealType& r) { return complemented2_type(d, r); } template -inline complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) +BOOST_MATH_GPU_ENABLED inline complemented3_type complement(const Dist& d, const RealType1& r1, const RealType2& r2) { return complemented3_type(d, r1, r2); } template -inline complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) +BOOST_MATH_GPU_ENABLED inline complemented4_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3) { return complemented4_type(d, r1, r2, r3); } template -inline complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) +BOOST_MATH_GPU_ENABLED inline complemented5_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4) { return complemented5_type(d, r1, r2, r3, r4); } template -inline complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) +BOOST_MATH_GPU_ENABLED inline complemented6_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5) { return complemented6_type(d, r1, r2, r3, r4, r5); } template -inline complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) +BOOST_MATH_GPU_ENABLED inline complemented7_type complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6) { return complemented7_type(d, r1, r2, r3, r4, r5, r6); } From fcbcf046e8333207188443d92c08f8418b7aefe7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:27:09 -0400 Subject: [PATCH 28/37] Add GPU support to the bernoulli distribution --- .../boost/math/distributions/bernoulli.hpp | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/include/boost/math/distributions/bernoulli.hpp b/include/boost/math/distributions/bernoulli.hpp index cce209a6f..bb7dd22a5 100644 --- a/include/boost/math/distributions/bernoulli.hpp +++ b/include/boost/math/distributions/bernoulli.hpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -43,7 +44,7 @@ namespace boost { // Common error checking routines for bernoulli distribution functions: template - inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) { if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1)) { @@ -55,23 +56,23 @@ namespace boost return true; } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&) { return check_success_fraction(function, p, result, Policy()); } template - inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&) { return true; } template - inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */) { return check_dist(function, p, result, Policy(), typename policies::constructor_error_check::type()); } template - inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol) { if(check_dist(function, p, result, Policy(), typename policies::method_error_check::type()) == false) { @@ -87,7 +88,7 @@ namespace boost return true; } template - inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */) + BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */) { if((check_dist(function, p, result, Policy(), typename policies::method_error_check::type()) && detail::check_probability(function, prob, result, Policy())) == false) { @@ -105,7 +106,7 @@ namespace boost typedef RealType value_type; typedef Policy policy_type; - bernoulli_distribution(RealType p = 0.5) : m_p(p) + BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p = 0.5) : m_p(p) { // Default probability = half suits 'fair' coin tossing // where probability of heads == probability of tails. RealType result; // of checks. @@ -115,7 +116,7 @@ namespace boost &result, Policy()); } // bernoulli_distribution constructor. - RealType success_fraction() const + BOOST_MATH_GPU_ENABLED RealType success_fraction() const { // Probability. return m_p; } @@ -132,21 +133,21 @@ namespace boost #endif template - inline const std::pair range(const bernoulli_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const std::pair range(const bernoulli_distribution& /* dist */) { // Range of permissible values for random variable k = {0, 1}. using boost::math::tools::max_value; return std::pair(static_cast(0), static_cast(1)); } template - inline const std::pair support(const bernoulli_distribution& /* dist */) + BOOST_MATH_GPU_ENABLED inline const std::pair support(const bernoulli_distribution& /* dist */) { // Range of supported values for random variable k = {0, 1}. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. return std::pair(static_cast(0), static_cast(1)); } template - inline RealType mean(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mean(const bernoulli_distribution& dist) { // Mean of bernoulli distribution = p (n = 1). return dist.success_fraction(); } // mean @@ -159,13 +160,13 @@ namespace boost //} // median template - inline RealType variance(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType variance(const bernoulli_distribution& dist) { // Variance of bernoulli distribution =p * q. return dist.success_fraction() * (1 - dist.success_fraction()); } // variance template - RealType pdf(const bernoulli_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED RealType pdf(const bernoulli_distribution& dist, const RealType& k) { // Probability Density/Mass Function. BOOST_FPU_EXCEPTION_GUARD // Error check: @@ -190,7 +191,7 @@ namespace boost } // pdf template - inline RealType cdf(const bernoulli_distribution& dist, const RealType& k) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const bernoulli_distribution& dist, const RealType& k) { // Cumulative Distribution Function Bernoulli. RealType p = dist.success_fraction(); // Error check: @@ -214,7 +215,7 @@ namespace boost } // bernoulli cdf template - inline RealType cdf(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { // Complemented Cumulative Distribution Function bernoulli. RealType const& k = c.param; bernoulli_distribution const& dist = c.dist; @@ -240,7 +241,7 @@ namespace boost } // bernoulli cdf complement template - inline RealType quantile(const bernoulli_distribution& dist, const RealType& p) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const bernoulli_distribution& dist, const RealType& p) { // Quantile or Percent Point Bernoulli function. // Return the number of expected successes k either 0 or 1. // for a given probability p. @@ -265,7 +266,7 @@ namespace boost } // quantile template - inline RealType quantile(const complemented2_type, RealType>& c) + BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { // Quantile or Percent Point bernoulli function. // Return the number of expected successes k for a given // complement of the probability q. @@ -294,13 +295,13 @@ namespace boost } // quantile complemented. template - inline RealType mode(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType mode(const bernoulli_distribution& dist) { return static_cast((dist.success_fraction() <= 0.5) ? 0 : 1); // p = 0.5 can be 0 or 1 } template - inline RealType skewness(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType skewness(const bernoulli_distribution& dist) { BOOST_MATH_STD_USING; // Aid ADL for sqrt. RealType p = dist.success_fraction(); @@ -308,7 +309,7 @@ namespace boost } template - inline RealType kurtosis_excess(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const bernoulli_distribution& dist) { RealType p = dist.success_fraction(); // Note Wolfram says this is kurtosis in text, but gamma2 is the kurtosis excess, @@ -319,7 +320,7 @@ namespace boost } template - inline RealType kurtosis(const bernoulli_distribution& dist) + BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const bernoulli_distribution& dist) { RealType p = dist.success_fraction(); return 1 / (1 - p) + 1/p -6 + 3; From 6bf19ff85918dfc97ac676810cd9f6eb7a48bb90 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:27:26 -0400 Subject: [PATCH 29/37] Add SYCL testing of bernoulli distribution --- test/sycl_jamfile | 1 + test/test_bernoulli.cpp | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/sycl_jamfile b/test/sycl_jamfile index c0c67d1e5..92580c93b 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -11,6 +11,7 @@ project : requirements # Distributions run test_arcsine.cpp ; +run test_bernoulli.cpp ; run test_cauchy.cpp ; # Special Functions diff --git a/test/test_bernoulli.cpp b/test/test_bernoulli.cpp index d8c663399..8513cec36 100644 --- a/test/test_bernoulli.cpp +++ b/test/test_bernoulli.cpp @@ -2,6 +2,7 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007, 2012. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -22,7 +23,7 @@ #include // for real_concept using ::boost::math::concepts::real_concept; -#include +#include "../include_private/boost/math/tools/test.hpp" #include // for bernoulli_distribution using boost::math::bernoulli_distribution; @@ -74,6 +75,7 @@ void test_spots(RealType) BOOST_CHECK_EQUAL(bernoulli_distribution(static_cast(0.1L)).success_fraction(), static_cast(0.1L)); BOOST_CHECK_EQUAL(bernoulli_distribution(static_cast(0.9L)).success_fraction(), static_cast(0.9L)); +#ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW( // Constructor success_fraction outside 0 to 1. bernoulli_distribution(static_cast(2)), std::domain_error); BOOST_MATH_CHECK_THROW( @@ -86,7 +88,8 @@ void test_spots(RealType) BOOST_MATH_CHECK_THROW( pdf( // pdf k neither 0 nor 1. bernoulli_distribution(static_cast(0.25L)), static_cast(2)), std::domain_error); - +#endif + BOOST_CHECK_EQUAL( pdf( // OK k (or n) bernoulli_distribution(static_cast(0.5L)), static_cast(0)), @@ -134,6 +137,7 @@ void test_spots(RealType) static_cast(5.11111111111111111111111111111111111111111111L), tolerance); +#ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW( quantile( bernoulli_distribution(static_cast(2)), // prob >1 @@ -154,6 +158,7 @@ void test_spots(RealType) bernoulli_distribution(static_cast(0.5L)), // k < 0 static_cast(2)), std::domain_error ); +#endif BOOST_CHECK_CLOSE_FRACTION( cdf( @@ -217,6 +222,7 @@ void test_spots(RealType) // Checks for 'bad' parameters. // Construction. + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(bernoulli_distribution(-1), std::domain_error); // p outside 0 to 1. BOOST_MATH_CHECK_THROW(bernoulli_distribution(+2), std::domain_error); // p outside 0 to 1. @@ -269,7 +275,7 @@ void test_spots(RealType) BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf } // has_infinity - + #endif } // template void test_spots(RealType) BOOST_AUTO_TEST_CASE( test_main ) @@ -302,7 +308,9 @@ BOOST_AUTO_TEST_CASE( test_main ) // (Parameter value, arbitrarily zero, only communicates the floating point type). test_spots(0.0F); // Test float. test_spots(0.0); // Test double. +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS test_spots(0.0L); // Test long double. +#endif #if !BOOST_WORKAROUND(BOOST_BORLANDC, BOOST_TESTED_AT(0x582)) && !defined(BOOST_MATH_NO_REAL_CONCEPT_TESTS) test_spots(boost::math::concepts::real_concept(0.)); // Test real concept. #endif From 0e7ab64840283cb4b5e10fd8f2fa62f9222d1b8c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:45:21 -0400 Subject: [PATCH 30/37] Add bernoulli cdf CUDA testing --- test/cuda_jamfile | 2 + test/test_bernoulli_cdf_double.cu | 110 ++++++++++++++++++++++++++++++ test/test_bernoulli_cdf_float.cu | 110 ++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 test/test_bernoulli_cdf_double.cu create mode 100644 test/test_bernoulli_cdf_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 709289389..5741b61dc 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -23,6 +23,8 @@ run test_arcsine_pdf_double.cu ; run test_arcsine_pdf_float.cu ; run test_arcsine_quan_double.cu ; run test_arcsine_quan_float.cu ; +run test_bernoulli_cdf_double.cu ; +run test_bernoulli_cdf_float.cu ; run test_binomial.cpp ; run test_cauchy_cdf_double.cu ; run test_cauchy_cdf_float.cu ; diff --git a/test/test_bernoulli_cdf_double.cu b/test/test_bernoulli_cdf_double.cu new file mode 100644 index 000000000..e4c21ca06 --- /dev/null +++ b/test/test_bernoulli_cdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_cdf_float.cu b/test/test_bernoulli_cdf_float.cu new file mode 100644 index 000000000..82c0eabc0 --- /dev/null +++ b/test/test_bernoulli_cdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} From 044f164fb0d61eb5aa257f7fde8fcdb1dfdf5407 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 25 Jul 2024 15:47:15 -0400 Subject: [PATCH 31/37] Add bernoulli pdf CUDA testing --- test/cuda_jamfile | 2 + test/test_bernoulli_pdf_double.cu | 110 ++++++++++++++++++++++++++++++ test/test_bernoulli_pdf_float.cu | 110 ++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 test/test_bernoulli_pdf_double.cu create mode 100644 test/test_bernoulli_pdf_float.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 5741b61dc..90ecd3c16 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -25,6 +25,8 @@ run test_arcsine_quan_double.cu ; run test_arcsine_quan_float.cu ; run test_bernoulli_cdf_double.cu ; run test_bernoulli_cdf_float.cu ; +run test_bernoulli_pdf_double.cu ; +run test_bernoulli_pdf_float.cu ; run test_binomial.cpp ; run test_cauchy_cdf_double.cu ; run test_cauchy_cdf_float.cu ; diff --git a/test/test_bernoulli_pdf_double.cu b/test/test_bernoulli_pdf_double.cu new file mode 100644 index 000000000..24b33c16c --- /dev/null +++ b/test/test_bernoulli_pdf_double.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_bernoulli_pdf_float.cu b/test/test_bernoulli_pdf_float.cu new file mode 100644 index 000000000..08d2ca5a0 --- /dev/null +++ b/test/test_bernoulli_pdf_float.cu @@ -0,0 +1,110 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::bernoulli_distribution(in1[i]), static_cast(1)); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(0, 1); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::bernoulli_distribution(input_vector1[i]), static_cast(1))); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} From 8e696df7a289e108ce28a3f67b0a06c99299f77b Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 11:08:54 -0400 Subject: [PATCH 32/37] Add bool to numeric limits for completeness --- include/boost/math/tools/numeric_limits.hpp | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp index c49d50bd8..7d3d5db26 100644 --- a/include/boost/math/tools/numeric_limits.hpp +++ b/include/boost/math/tools/numeric_limits.hpp @@ -418,6 +418,44 @@ struct numeric_limits BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min () { return 0; } }; +template <> +struct numeric_limits +{ + BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits::is_specialized; + BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits::is_signed; + BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits::is_integer; + BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits::is_exact; + BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits::has_infinity; + BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits::has_quiet_NaN; + BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits::has_signaling_NaN; + + BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits::round_style; + BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits::is_iec559; + BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits::is_bounded; + BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits::is_modulo; + BOOST_MATH_STATIC constexpr int digits = std::numeric_limits::digits; + BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits::digits10; + BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits::max_digits10; + BOOST_MATH_STATIC constexpr int radix = std::numeric_limits::radix; + BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits::min_exponent; + BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits::min_exponent10; + BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits::max_exponent; + BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits::max_exponent10; + BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits::traps; + BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits::tinyness_before; + + // Member Functions + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min) () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max) () { return true; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; } + BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min () { return false; } +}; + #endif // BOOST_MATH_HAS_GPU_SUPPORT } // namespace math From f8a0f5485867c7f0646d96283015fe9218ad59e1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 11:54:29 -0400 Subject: [PATCH 33/37] Fix test poisson ambiguity with numeric_limits --- test/test_poisson.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_poisson.cpp b/test/test_poisson.cpp index 9b75ce162..73de4751e 100644 --- a/test/test_poisson.cpp +++ b/test/test_poisson.cpp @@ -53,12 +53,12 @@ void test_spots(RealType) // guaranteed for type RealType, eg 6 for float, 15 for double, // expressed as a percentage (so -2) for BOOST_CHECK_CLOSE, - int decdigits = numeric_limits::digits10; + int decdigits = std::numeric_limits::digits10; // May eb >15 for 80 and 128-bit FP types. if (decdigits <= 0) { // decdigits is not defined, for example real concept, // so assume precision of most test data is double (for example, MathCAD). - decdigits = numeric_limits::digits10; // == 15 for 64-bit + decdigits = std::numeric_limits::digits10; // == 15 for 64-bit } if (decdigits > 15 ) // numeric_limits::digits10) { // 15 is the accuracy of the MathCAD test data. @@ -644,7 +644,7 @@ BOOST_AUTO_TEST_CASE( test_main ) test_spots(0.0); // Test double. #endif #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS - if (numeric_limits::digits10 > numeric_limits::digits10) + if (std::numeric_limits::digits10 > std::numeric_limits::digits10) { // long double is better than double (so not MSVC where they are same). #ifdef TEST_LDOUBLE test_spots(0.0L); // Test long double. From 746f939aec102b75765ad539d73c016d14c612f5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 12:00:03 -0400 Subject: [PATCH 34/37] Fix header paths --- test/test_bernoulli_constants.cpp | 2 +- test/test_float_io.cpp | 2 +- test/test_owens_t.cpp | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_bernoulli_constants.cpp b/test/test_bernoulli_constants.cpp index 6d73d82a4..f5ae15a3e 100644 --- a/test/test_bernoulli_constants.cpp +++ b/test/test_bernoulli_constants.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include "table_type.hpp" #include #include #include diff --git a/test/test_float_io.cpp b/test/test_float_io.cpp index 107cc39d4..e1b5c67a1 100644 --- a/test/test_float_io.cpp +++ b/test/test_float_io.cpp @@ -301,7 +301,7 @@ void test() std::ios_base::fixed | std::ios_base::showpos}}; std::array, 40> string_data = {{ -#include "libs/math/test/string_data.ipp" +#include "string_data.ipp" }}; double num = 123456789.0; diff --git a/test/test_owens_t.cpp b/test/test_owens_t.cpp index 8c33e77f0..11389dd20 100644 --- a/test/test_owens_t.cpp +++ b/test/test_owens_t.cpp @@ -38,9 +38,9 @@ using boost::math::owens_t; #include #include -#include "libs/math/test/handle_test_result.hpp" -#include "libs/math/test/table_type.hpp" -#include "libs/math/test/functor.hpp" +#include "handle_test_result.hpp" +#include "table_type.hpp" +#include "functor.hpp" #include "boost/math/tools/test_value.hpp" #include "test_owens_t.hpp" From 2eaca5f6bd95cbcf4c53565b1d25ce53410a2f69 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 13:36:07 -0400 Subject: [PATCH 35/37] Fix invalid constexpr variables --- .../boost/math/special_functions/digamma.hpp | 18 +++++++++--------- .../boost/math/special_functions/sin_pi.hpp | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp index 98c939237..bde80f7dd 100644 --- a/include/boost/math/special_functions/digamma.hpp +++ b/include/boost/math/special_functions/digamma.hpp @@ -327,11 +327,11 @@ BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const std::integral_constant(0.3700660185912626595423257213284682051735604e-6L); - constexpr T P[] = { + BOOST_MATH_STATIC const float Y = 0.99558162689208984f; + BOOST_MATH_STATIC const T root = 1532632.0f / 1048576; + BOOST_MATH_STATIC const T root_minor = static_cast(0.3700660185912626595423257213284682051735604e-6L); + BOOST_MATH_STATIC const T P[] = { 0.25479851023250261e0f, -0.44981331915268368e0f, -0.43916936919946835e0f, -0.61041765350579073e-1f }; - constexpr T Q[] = { + BOOST_MATH_STATIC const T Q[] = { 0.1e1f, 0.15890202430554952e1f, 0.65341249856146947e0f, diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp index 3c558ab7b..acb5db29e 100644 --- a/include/boost/math/special_functions/sin_pi.hpp +++ b/include/boost/math/special_functions/sin_pi.hpp @@ -55,15 +55,15 @@ BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy& pol) } template -BOOST_MATH_FORCEINLINE BOOST_MATH_GPU_ENABLED T sin_pi_dispatch(T x, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T sin_pi_dispatch(T x, const Policy& pol) { - if (x < 0) + if (x < T(0)) { - return -sin_pi_imp(-x, pol); + return -sin_pi_imp(T(-x), pol); } else { - return sin_pi_imp(x, pol); + return sin_pi_imp(T(x), pol); } } From 480df5f63d813fe4af160b53ebc90c9e9d076276 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 13:46:25 -0400 Subject: [PATCH 36/37] Fix new trigamma dispatch function and illegal constexpr --- include/boost/math/special_functions/trigamma.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index 5d10135b9..2ec6c5256 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -49,7 +49,7 @@ template BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) { // Max error in interpolated form: 3.736e-017 - constexpr T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); + BOOST_MATH_STATIC const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045), BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321), @@ -122,7 +122,7 @@ template BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) { // Max error in interpolated form: 1.178e-020 - constexpr T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); + BOOST_MATH_STATIC const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); BOOST_MATH_STATIC const T P_1_2[] = { BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341), BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052), @@ -395,7 +395,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& ta return policies::raise_pole_error("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol); } T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol); - return result - trigamma_prec(z, pol, tag) + boost::math::pow<2>(constants::pi()) / (s * s); + return result - trigamma_prec(T(z), pol, tag) + boost::math::pow<2>(constants::pi()) / (s * s); } if(x < 1) { From bdc7c259af9b1aaa40183db26c7e89b94408e61c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 26 Jul 2024 14:02:31 -0400 Subject: [PATCH 37/37] Speed up CI builds --- .github/workflows/cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 53c551ee1..d57c681a4 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -70,7 +70,7 @@ jobs: - name: Build tests run: | cd ../boost-root/__build__ - cmake --build . --target tests + cmake --build . --target tests -j $(nproc) - name: Run tests run: | cd ../boost-root/__build__ @@ -136,7 +136,7 @@ jobs: - name: Build tests run: | cd ../boost-root/__build__ - cmake --build . --target tests + cmake --build . --target tests -j $(nproc) - name: Run tests run: | cd ../boost-root/__build__