diff --git a/include/boost/math/distributions/cauchy.hpp b/include/boost/math/distributions/cauchy.hpp index d914cca77..15a015838 100644 --- a/include/boost/math/distributions/cauchy.hpp +++ b/include/boost/math/distributions/cauchy.hpp @@ -13,6 +13,7 @@ #pragma warning(disable : 4127) // conditional expression is constant #endif +#include #include #include #include @@ -30,7 +31,7 @@ namespace detail { template -RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) +BOOST_MATH_GPU_ENABLED RealType cdf_imp(const cauchy_distribution& dist, const RealType& x, bool complement) { // // This calculates the cdf of the Cauchy distribution and/or its complement. @@ -54,7 +55,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy // to get the result. // BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::cdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); @@ -66,6 +67,16 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy { return result; } + #ifdef BOOST_MATH_HAS_GPU_SUPPORT + if(x > tools::max_value()) + { + return static_cast((complement) ? 0 : 1); + } + if(x < -tools::max_value()) + { + return static_cast((complement) ? 1 : 0); + } + #else if(std::numeric_limits::has_infinity && x == std::numeric_limits::infinity()) { // cdf +infinity is unity. return static_cast((complement) ? 0 : 1); @@ -74,6 +85,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy { // cdf -infinity is zero. return static_cast((complement) ? 1 : 0); } + #endif if(false == detail::check_x(function, x, &result, Policy())) { // Catches x == NaN return result; @@ -88,7 +100,7 @@ RealType cdf_imp(const cauchy_distribution& dist, const RealTy } // cdf template -RealType quantile_imp( +BOOST_MATH_GPU_ENABLED RealType quantile_imp( const cauchy_distribution& dist, const RealType& p, bool complement) @@ -101,7 +113,7 @@ RealType quantile_imp( // mid-point of the distribution. This is either added or subtracted // from the location parameter depending on whether `complement` is true. // - static const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::quantile(cauchy<%1%>&, %1%)"; BOOST_MATH_STD_USING // for ADL of std functions RealType result = 0; @@ -151,20 +163,20 @@ class cauchy_distribution typedef RealType value_type; typedef Policy policy_type; - cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) + BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType l_location = 0, RealType l_scale = 1) : m_a(l_location), m_hg(l_scale) { - static const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; + constexpr auto function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution"; RealType result; detail::check_location(function, l_location, &result, Policy()); detail::check_scale(function, l_scale, &result, Policy()); } // cauchy_distribution - RealType location()const + BOOST_MATH_GPU_ENABLED RealType location()const { return m_a; } - RealType scale()const + BOOST_MATH_GPU_ENABLED RealType scale()const { return m_hg; } @@ -184,13 +196,15 @@ cauchy_distribution(RealType,RealType)->cauchy_distribution -inline const std::pair range(const cauchy_distribution&) +BOOST_MATH_GPU_ENABLED inline const std::pair range(const cauchy_distribution&) { // Range of permissible values for random variable x. - if (std::numeric_limits::has_infinity) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + BOOST_MATH_IF_CONSTEXPR (std::numeric_limits::has_infinity) { return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. } else + #endif { // Can only use max_value. using boost::math::tools::max_value; return std::pair(-max_value(), max_value()); // - to + max. @@ -198,14 +212,16 @@ inline const std::pair range(const cauchy_distribution -inline const std::pair support(const cauchy_distribution& ) +BOOST_MATH_GPU_ENABLED inline const std::pair support(const cauchy_distribution& ) { // Range of supported values for random variable x. // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. - if (std::numeric_limits::has_infinity) + #ifndef BOOST_MATH_HAS_GPU_SUPPORT + BOOST_MATH_IF_CONSTEXPR (std::numeric_limits::has_infinity) { return std::pair(-std::numeric_limits::infinity(), std::numeric_limits::infinity()); // - to + infinity. } else + #endif { // Can only use max_value. using boost::math::tools::max_value; return std::pair(-tools::max_value(), max_value()); // - to + max. @@ -213,19 +229,19 @@ inline const std::pair support(const cauchy_distribution -inline RealType pdf(const cauchy_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType pdf(const cauchy_distribution& dist, const RealType& x) { BOOST_MATH_STD_USING // for ADL of std functions - static const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)"; + constexpr auto function = "boost::math::pdf(cauchy<%1%>&, %1%)"; RealType result = 0; RealType location = dist.location(); RealType scale = dist.scale(); - if(false == detail::check_scale("boost::math::pdf(cauchy<%1%>&, %1%)", scale, &result, Policy())) + if(false == detail::check_scale(function, scale, &result, Policy())) { return result; } - if(false == detail::check_location("boost::math::pdf(cauchy<%1%>&, %1%)", location, &result, Policy())) + if(false == detail::check_location(function, location, &result, Policy())) { return result; } @@ -250,31 +266,31 @@ inline RealType pdf(const cauchy_distribution& dist, const Rea } // pdf template -inline RealType cdf(const cauchy_distribution& dist, const RealType& x) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const cauchy_distribution& dist, const RealType& x) { return detail::cdf_imp(dist, x, false); } // cdf template -inline RealType quantile(const cauchy_distribution& dist, const RealType& p) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const cauchy_distribution& dist, const RealType& p) { return detail::quantile_imp(dist, p, false); } // quantile template -inline RealType cdf(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type, RealType>& c) { return detail::cdf_imp(c.dist, c.param, true); } // cdf complement template -inline RealType quantile(const complemented2_type, RealType>& c) +BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type, RealType>& c) { return detail::quantile_imp(c.dist, c.param, true); } // quantile complement template -inline RealType mean(const cauchy_distribution&) +BOOST_MATH_GPU_ENABLED inline RealType mean(const cauchy_distribution&) { // There is no mean: typedef typename Policy::assert_undefined_type assert_type; static_assert(assert_type::value == 0, "assert type is undefined"); @@ -287,7 +303,7 @@ inline RealType mean(const cauchy_distribution&) } template -inline RealType variance(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType variance(const cauchy_distribution& /*dist*/) { // There is no variance: typedef typename Policy::assert_undefined_type assert_type; @@ -301,18 +317,19 @@ inline RealType variance(const cauchy_distribution& /*dist*/) } template -inline RealType mode(const cauchy_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType mode(const cauchy_distribution& dist) { return dist.location(); } template -inline RealType median(const cauchy_distribution& dist) +BOOST_MATH_GPU_ENABLED inline RealType median(const cauchy_distribution& dist) { return dist.location(); } + template -inline RealType skewness(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType skewness(const cauchy_distribution& /*dist*/) { // There is no skewness: typedef typename Policy::assert_undefined_type assert_type; @@ -326,7 +343,7 @@ inline RealType skewness(const cauchy_distribution& /*dist*/) } template -inline RealType kurtosis(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const cauchy_distribution& /*dist*/) { // There is no kurtosis: typedef typename Policy::assert_undefined_type assert_type; @@ -340,7 +357,7 @@ inline RealType kurtosis(const cauchy_distribution& /*dist*/) } template -inline RealType kurtosis_excess(const cauchy_distribution& /*dist*/) +BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const cauchy_distribution& /*dist*/) { // There is no kurtosis excess: typedef typename Policy::assert_undefined_type assert_type; @@ -354,7 +371,7 @@ inline RealType kurtosis_excess(const cauchy_distribution& /*d } template -inline RealType entropy(const cauchy_distribution & dist) +BOOST_MATH_GPU_ENABLED inline RealType entropy(const cauchy_distribution & dist) { using std::log; return log(2*constants::two_pi()*dist.scale()); diff --git a/include/boost/math/distributions/detail/common_error_handling.hpp b/include/boost/math/distributions/detail/common_error_handling.hpp index f03f2c49b..9922396d5 100644 --- a/include/boost/math/distributions/detail/common_error_handling.hpp +++ b/include/boost/math/distributions/detail/common_error_handling.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006, 2007. // Copyright Paul A. Bristow 2006, 2007, 2012. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -9,6 +10,7 @@ #ifndef BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP #define BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP +#include #include #include // using boost::math::isfinite; @@ -23,7 +25,7 @@ namespace boost{ namespace math{ namespace detail { template -inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol) { if((prob < 0) || (prob > 1) || !(boost::math::isfinite)(prob)) { @@ -36,7 +38,7 @@ inline bool check_probability(const char* function, RealType const& prob, RealTy } template -inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 but NOT +infinity allowed. if((df <= 0) || !(boost::math::isfinite)(df)) { @@ -49,7 +51,7 @@ inline bool check_df(const char* function, RealType const& df, RealType* result, } template -inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol) { // df > 0 or +infinity are allowed. if( (df <= 0) || (boost::math::isnan)(df) ) { // is bad df <= 0 or NaN or -infinity. @@ -63,7 +65,7 @@ inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealTy template -inline bool check_scale( +BOOST_MATH_GPU_ENABLED inline bool check_scale( const char* function, RealType scale, RealType* result, @@ -80,7 +82,7 @@ inline bool check_scale( } template -inline bool check_location( +BOOST_MATH_GPU_ENABLED inline bool check_location( const char* function, RealType location, RealType* result, @@ -97,7 +99,7 @@ inline bool check_location( } template -inline bool check_x( +BOOST_MATH_GPU_ENABLED inline bool check_x( const char* function, RealType x, RealType* result, @@ -118,7 +120,7 @@ inline bool check_x( } // bool check_x template -inline bool check_x_not_NaN( +BOOST_MATH_GPU_ENABLED inline bool check_x_not_NaN( const char* function, RealType x, RealType* result, @@ -138,7 +140,7 @@ inline bool check_x_not_NaN( } // bool check_x_not_NaN template -inline bool check_x_gt0( +BOOST_MATH_GPU_ENABLED inline bool check_x_gt0( const char* function, RealType x, RealType* result, @@ -159,7 +161,7 @@ inline bool check_x_gt0( } // bool check_x_gt0 template -inline bool check_positive_x( +BOOST_MATH_GPU_ENABLED inline bool check_positive_x( const char* function, RealType x, RealType* result, @@ -179,13 +181,18 @@ inline bool check_positive_x( } template -inline bool check_non_centrality( +BOOST_MATH_GPU_ENABLED inline bool check_non_centrality( const char* function, RealType ncp, RealType* result, const Policy& pol) { + #ifndef BOOST_MATH_HAS_GPU_SUPPORT static const RealType upper_limit = static_cast((std::numeric_limits::max)()) - boost::math::policies::get_max_root_iterations(); + #else + constexpr RealType upper_limit = static_cast(LONG_LONG_MAX) - boost::math::policies::get_max_root_iterations(); + #endif + if((ncp < 0) || !(boost::math::isfinite)(ncp) || ncp > upper_limit) { *result = policies::raise_domain_error( @@ -197,7 +204,7 @@ inline bool check_non_centrality( } template -inline bool check_finite( +BOOST_MATH_GPU_ENABLED inline bool check_finite( const char* function, RealType x, RealType* result, diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index 1329dba10..36ec8d9d6 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -598,7 +598,54 @@ BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error( // This may or may not do the right thing, but the user asked for the error // to be ignored so here we go anyway: static_assert(std::numeric_limits::is_specialized, "The target type must have std::numeric_limits specialized."); + #ifndef BOOST_MATH_HAS_GPU_SUPPORT return val > 0 ? (std::numeric_limits::max)() : (std::numeric_limits::is_integer ? (std::numeric_limits::min)() : -(std::numeric_limits::max)()); + #else + BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? SHRT_MAX : SHRT_MIN; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? INT_MAX : INT_MIN; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? LONG_MAX : LONG_MIN; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? LLONG_MAX : LLONG_MIN; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? USHRT_MAX : static_cast(0U); + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? UINT_MAX : 0U; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? ULONG_MAX : 0UL; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? ULLONG_MAX : 0ULL; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? FLT_MAX : -FLT_MAX; + } + else BOOST_IF_CONSTEXPR (std::is_same::value) + { + return val > 0 ? DBL_MAX : -DBL_MAX; + } + else + { + return val > 0 ? static_cast(-1) : static_cast(1); + } + #endif } template diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp index a819a4cb6..f12837a2a 100644 --- a/include/boost/math/policies/policy.hpp +++ b/include/boost/math/policies/policy.hpp @@ -45,13 +45,6 @@ namespace policies{ # ifndef BOOST_MATH_PROMOTE_DOUBLE_POLICY # define BOOST_MATH_PROMOTE_DOUBLE_POLICY false # endif -#endif - -// -// Refined support for sycl since it does not support errno -// - -#ifdef BOOST_MATH_ENABLE_SYCL # ifndef BOOST_MATH_DOMAIN_ERROR_POLICY # define BOOST_MATH_DOMAIN_ERROR_POLICY ignore_error # endif diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp index 77cd5f0ae..2488bb5d4 100644 --- a/include/boost/math/special_functions/cbrt.hpp +++ b/include/boost/math/special_functions/cbrt.hpp @@ -10,6 +10,7 @@ #pragma once #endif +#include #include #include #include @@ -38,7 +39,7 @@ struct largest_cbrt_int_type }; template -T cbrt_imp(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED T cbrt_imp(T z, const Policy& pol) { BOOST_MATH_STD_USING // @@ -51,7 +52,7 @@ T cbrt_imp(T z, const Policy& pol) // Expected Error Term: -1.231e-006 // Maximum Relative Change in Control Points: 5.982e-004 // - static const T P[] = { + BOOST_MATH_STATIC const T P[] = { static_cast(0.37568269008611818), static_cast(1.3304968705558024), static_cast(-1.4897101632445036), @@ -59,7 +60,7 @@ T cbrt_imp(T z, const Policy& pol) static_cast(-0.6398703759826468), static_cast(0.13584489959258635), }; - static const T correction[] = { + BOOST_MATH_STATIC const T correction[] = { static_cast(0.62996052494743658238360530363911), // 2^-2/3 static_cast(0.79370052598409973737585281963615), // 2^-1/3 static_cast(1), @@ -154,7 +155,7 @@ T cbrt_imp(T z, const Policy& pol) } // namespace detail template -inline typename tools::promote_args::type cbrt(T z, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cbrt(T z, const Policy& pol) { using result_type = typename tools::promote_args::type; using value_type = typename policies::evaluation::type; @@ -162,7 +163,7 @@ inline typename tools::promote_args::type cbrt(T z, const Policy& pol) } template -inline typename tools::promote_args::type cbrt(T z) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type cbrt(T z) { return cbrt(z, policies::policy<>()); } diff --git a/include/boost/math/special_functions/detail/fp_traits.hpp b/include/boost/math/special_functions/detail/fp_traits.hpp index 2947a32a2..015ea9cd3 100644 --- a/include/boost/math/special_functions/detail/fp_traits.hpp +++ b/include/boost/math/special_functions/detail/fp_traits.hpp @@ -4,6 +4,7 @@ #define BOOST_MATH_FP_TRAITS_HPP // Copyright (c) 2006 Johan Rade +// Copyright (c) 2024 Matt Borland // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -24,6 +25,7 @@ With these techniques, the code could be simplified. #include #include #include +#include #include #include @@ -202,14 +204,14 @@ template<> struct fp_traits_non_native { typedef ieee_copy_all_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7f800000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x007fffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7f800000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x007fffff; typedef uint32_t bits; - static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } - static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } + BOOST_MATH_GPU_ENABLED static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); } + BOOST_MATH_GPU_ENABLED static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); } }; // ieee_tag version, double (64 bits) ---------------------------------------------- @@ -250,15 +252,15 @@ template<> struct fp_traits_non_native { typedef ieee_copy_all_bits_tag method; - static constexpr uint64_t sign = static_cast(0x80000000u) << 32; - static constexpr uint64_t exponent = static_cast(0x7ff00000) << 32; - static constexpr uint64_t flag = 0; - static constexpr uint64_t significand + BOOST_MATH_STATIC constexpr uint64_t sign = static_cast(0x80000000u) << 32; + BOOST_MATH_STATIC constexpr uint64_t exponent = static_cast(0x7ff00000) << 32; + BOOST_MATH_STATIC constexpr uint64_t flag = 0; + BOOST_MATH_STATIC constexpr uint64_t significand = (static_cast(0x000fffff) << 32) + static_cast(0xffffffffu); typedef uint64_t bits; - static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } - static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } + BOOST_MATH_GPU_ENABLED static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); } + BOOST_MATH_GPU_ENABLED static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); } }; #endif @@ -330,10 +332,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00008000; - static constexpr uint32_t significand = 0x00007fff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00008000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff; typedef uint32_t bits; @@ -381,10 +383,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7ff00000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x000fffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7ff00000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x000fffff; typedef uint32_t bits; @@ -399,7 +401,7 @@ struct fp_traits_non_native } private: - static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; + BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; }; @@ -419,10 +421,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00008000; - static constexpr uint32_t significand = 0x00007fff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00008000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff; // copy 1st, 2nd, 5th and 6th byte. 3rd and 4th byte are padding. @@ -455,10 +457,10 @@ struct fp_traits_non_native { typedef ieee_copy_leading_bits_tag method; - static constexpr uint32_t sign = 0x80000000u; - static constexpr uint32_t exponent = 0x7fff0000; - static constexpr uint32_t flag = 0x00000000; - static constexpr uint32_t significand = 0x0000ffff; + BOOST_MATH_STATIC constexpr uint32_t sign = 0x80000000u; + BOOST_MATH_STATIC constexpr uint32_t exponent = 0x7fff0000; + BOOST_MATH_STATIC constexpr uint32_t flag = 0x00000000; + BOOST_MATH_STATIC constexpr uint32_t significand = 0x0000ffff; typedef uint32_t bits; @@ -473,7 +475,7 @@ struct fp_traits_non_native } private: - static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; + BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12; }; #endif @@ -553,7 +555,8 @@ struct select_native && !defined(BOOST_MATH_DISABLE_STD_FPCLASSIFY)\ && !defined(__INTEL_COMPILER)\ && !defined(sun)\ - && !defined(__VXWORKS__) + && !defined(__VXWORKS__)\ + && !defined(BOOST_MATH_HAS_GPU_SUPPORT) # define BOOST_MATH_USE_STD_FPCLASSIFY #endif diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp index c58459e36..7d69f8b9c 100644 --- a/include/boost/math/special_functions/detail/round_fwd.hpp +++ b/include/boost/math/special_functions/detail/round_fwd.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2008. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -21,53 +22,53 @@ namespace boost { template - typename tools::promote_args::type trunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type trunc(const T& v, const Policy& pol); template - typename tools::promote_args::type trunc(const T& v); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type trunc(const T& v); template - int itrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED int itrunc(const T& v, const Policy& pol); template - int itrunc(const T& v); + BOOST_MATH_GPU_ENABLED int itrunc(const T& v); template - long ltrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long ltrunc(const T& v, const Policy& pol); template - long ltrunc(const T& v); + BOOST_MATH_GPU_ENABLED long ltrunc(const T& v); template - long long lltrunc(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v, const Policy& pol); template - long long lltrunc(const T& v); + BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v); template - typename tools::promote_args::type round(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type round(const T& v, const Policy& pol); template - typename tools::promote_args::type round(const T& v); + BOOST_MATH_GPU_ENABLED typename tools::promote_args::type round(const T& v); template - int iround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED int iround(const T& v, const Policy& pol); template - int iround(const T& v); + BOOST_MATH_GPU_ENABLED int iround(const T& v); template - long lround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long lround(const T& v, const Policy& pol); template - long lround(const T& v); + BOOST_MATH_GPU_ENABLED long lround(const T& v); template - long long llround(const T& v, const Policy& pol); + BOOST_MATH_GPU_ENABLED long long llround(const T& v, const Policy& pol); template - long long llround(const T& v); + BOOST_MATH_GPU_ENABLED long long llround(const T& v); template - T modf(const T& v, T* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol); template - T modf(const T& v, T* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart); template - T modf(const T& v, int* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol); template - T modf(const T& v, int* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart); template - T modf(const T& v, long* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol); template - T modf(const T& v, long* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart); template - T modf(const T& v, long long* ipart, const Policy& pol); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart, const Policy& pol); template - T modf(const T& v, long long* ipart); + BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart); } } diff --git a/include/boost/math/special_functions/ellint_1.hpp b/include/boost/math/special_functions/ellint_1.hpp index dfc1815f7..f7fbbce40 100644 --- a/include/boost/math/special_functions/ellint_1.hpp +++ b/include/boost/math/special_functions/ellint_1.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2006 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -18,6 +19,7 @@ #pragma once #endif +#include #include #include #include @@ -36,13 +38,13 @@ typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& namespace detail{ template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); +BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant const&); template -T ellint_k_imp(T k, const Policy& pol, T one_minus_k2); +BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, T one_minus_k2); // Elliptic integral (Legendre form) of the first kind template @@ -760,7 +762,7 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T k, const Policy& pol, const std::true_type&) +typename tools::promote_args::type ellint_1(T k, const Policy& pol, const std::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -776,7 +778,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T k, const } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi, const std::false_type&) +typename tools::promote_args::type ellint_1(T1 k, T2 phi, const std::false_type&) { return boost::math::ellint_1(k, phi, policies::policy<>()); } @@ -785,14 +787,14 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, // Complete elliptic integral (Legendre form) of the first kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T k) +typename tools::promote_args::type ellint_1(T k) { return ellint_1(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the first kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! +typename tools::promote_args::type ellint_1(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -800,7 +802,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_1(T1 k, T2 phi) +typename tools::promote_args::type ellint_1(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_1(k, phi, tag_type()); diff --git a/include/boost/math/special_functions/ellint_2.hpp b/include/boost/math/special_functions/ellint_2.hpp index b09cdd490..5e2552cec 100644 --- a/include/boost/math/special_functions/ellint_2.hpp +++ b/include/boost/math/special_functions/ellint_2.hpp @@ -1,5 +1,6 @@ // Copyright (c) 2006 Xiaogang Zhang // Copyright (c) 2006 John Maddock +// Copyright (c) 2024 Matt Borland // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -38,11 +39,11 @@ typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& namespace detail{ template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); template -T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); +BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const std::integral_constant&); // Elliptic integral (Legendre form) of the second kind template @@ -701,7 +702,7 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T k, const Policy& pol, const std::true_type&) +typename tools::promote_args::type ellint_2(T k, const Policy& pol, const std::true_type&) { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; @@ -714,7 +715,7 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T k, const // Elliptic integral (Legendre form) of the second kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi, const std::false_type&) +typename tools::promote_args::type ellint_2(T1 k, T2 phi, const std::false_type&) { return boost::math::ellint_2(k, phi, policies::policy<>()); } @@ -723,21 +724,21 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, // Complete elliptic integral (Legendre form) of the second kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T k) +typename tools::promote_args::type ellint_2(T k) { return ellint_2(k, policies::policy<>()); } // Elliptic integral (Legendre form) of the second kind template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi) +typename tools::promote_args::type ellint_2(T1 k, T2 phi) { typedef typename policies::is_policy::type tag_type; return detail::ellint_2(k, phi, tag_type()); } template -BOOST_MATH_FORCEINLINE typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! +typename tools::promote_args::type ellint_2(T1 k, T2 phi, const Policy& pol) // LCOV_EXCL_LINE gcc misses this but sees the function body, strange! { typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; diff --git a/include/boost/math/special_functions/expm1.hpp b/include/boost/math/special_functions/expm1.hpp index eec635603..a5af573cd 100644 --- a/include/boost/math/special_functions/expm1.hpp +++ b/include/boost/math/special_functions/expm1.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -294,18 +295,18 @@ inline typename tools::promote_args::type expm1(T x, const Policy& /* pol */) #if defined(BOOST_HAS_EXPM1) && !(defined(__osf__) && defined(__DECCXX_VER)) # ifdef BOOST_MATH_USE_C99 -inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } +BOOST_MATH_GPU_ENABLED inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); } # ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS inline long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); } # endif # else inline float expm1(float x, const policies::policy<>&){ return static_cast(::expm1(x)); } # endif -inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); } +BOOST_MATH_GPU_ENABLED inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); } #endif template -inline typename tools::promote_args::type expm1(T x) +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type expm1(T x) { return expm1(x, policies::policy<>()); } diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp index 6119c8e86..e2ac58e85 100644 --- a/include/boost/math/special_functions/math_fwd.hpp +++ b/include/boost/math/special_functions/math_fwd.hpp @@ -4,6 +4,7 @@ // Copyright Paul A. Bristow 2006. // Copyright John Maddock 2006. +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. @@ -26,6 +27,7 @@ #include #include #include +#include #include #include // for argument promotion. #include @@ -561,10 +563,10 @@ namespace boost // cbrt - cube root. template - tools::promote_args_t cbrt(RT z); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cbrt(RT z); template - tools::promote_args_t cbrt(RT z, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t cbrt(RT z, const Policy&); // log1p is log(x + 1) template @@ -582,10 +584,10 @@ namespace boost // Exp (x) minus 1 functions. template - tools::promote_args_t expm1(T); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expm1(T); template - tools::promote_args_t expm1(T, const Policy&); + BOOST_MATH_GPU_ENABLED tools::promote_args_t expm1(T, const Policy&); // Power - 1 template @@ -883,16 +885,18 @@ namespace boost bool isnormal BOOST_NO_MACRO_EXPAND(T t); template - int signbit BOOST_NO_MACRO_EXPAND(T x); + BOOST_MATH_GPU_ENABLED int signbit BOOST_NO_MACRO_EXPAND(T x); template - int sign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_MATH_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z); template - typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); + BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type + copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y); template - typename tools::promote_args_permissive::type changesign BOOST_NO_MACRO_EXPAND(const T& z); + BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type + changesign BOOST_NO_MACRO_EXPAND(const T& z); // Exponential integrals: namespace detail{ @@ -1191,13 +1195,13 @@ namespace boost #define BOOST_MATH_DETAIL_LL_FUNC(Policy)\ \ template \ - inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\ \ template \ - inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\ # define BOOST_MATH_DETAIL_11_FUNC(Policy)\ template \ @@ -1473,7 +1477,7 @@ namespace boost inline boost::math::tools::promote_args_t log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\ \ template \ - inline boost::math::tools::promote_args_t expm1(T x){ return boost::math::expm1(x, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t expm1(T x){ return boost::math::expm1(x, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t \ @@ -1594,31 +1598,31 @@ template \ inline boost::math::tools::promote_args_t zeta(T s){ return boost::math::zeta(s, Policy()); }\ \ template \ - inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\ \ template \ - inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\ \ template \ - inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\ \ template \ - inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\ \ template \ - inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\ \ template \ - inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\ \ template \ - inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ - inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ + BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\ \ template \ inline boost::math::tools::promote_args_t pow(T v){ return boost::math::pow(v, Policy()); }\ diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp index 75e6be9f4..e08945dca 100644 --- a/include/boost/math/special_functions/modf.hpp +++ b/include/boost/math/special_functions/modf.hpp @@ -1,4 +1,5 @@ // Copyright John Maddock 2007. +// Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -17,49 +18,49 @@ namespace boost{ namespace math{ template -inline T modf(const T& v, T* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart, const Policy& pol) { *ipart = trunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, T* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, int* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart, const Policy& pol) { *ipart = itrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, int* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart, const Policy& pol) { *ipart = ltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart) { return modf(v, ipart, policies::policy<>()); } template -inline T modf(const T& v, long long* ipart, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart, const Policy& pol) { *ipart = lltrunc(v, pol); return v - *ipart; } template -inline T modf(const T& v, long long* ipart) +BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart) { return modf(v, ipart, policies::policy<>()); } diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp index e74acba85..3df0145b6 100644 --- a/include/boost/math/special_functions/round.hpp +++ b/include/boost/math/special_functions/round.hpp @@ -30,7 +30,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline tools::promote_args_t round(const T& v, const Policy& pol, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy& pol, const std::false_type&) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; @@ -65,7 +65,7 @@ inline tools::promote_args_t round(const T& v, const Policy& pol, const std:: } } template -inline tools::promote_args_t round(const T& v, const Policy&, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy&, const std::true_type&) { return v; } @@ -73,12 +73,12 @@ inline tools::promote_args_t round(const T& v, const Policy&, const std::true } // namespace detail template -inline tools::promote_args_t round(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v, const Policy& pol) { return detail::round(v, pol, std::integral_constant::value>()); } template -inline tools::promote_args_t round(const T& v) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t round(const T& v) { return round(v, policies::policy<>()); } @@ -103,7 +103,7 @@ inline int iround(const T& v, const Policy& pol) result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -127,7 +127,7 @@ inline int iround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -138,20 +138,20 @@ inline int iround(const T& v, const Policy& pol) return static_cast(r); } template -inline int iround(const T& v) +BOOST_MATH_GPU_ENABLED inline int iround(const T& v) { return iround(v, policies::policy<>()); } template -inline long lround(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long lround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -175,7 +175,7 @@ inline long lround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -186,20 +186,20 @@ inline long lround(const T& v, const Policy& pol) return static_cast(r); } template -inline long lround(const T& v) +BOOST_MATH_GPU_ENABLED inline long lround(const T& v) { return lround(v, policies::policy<>()); } template -inline long long llround(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long long llround(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = boost::math::tools::promote_args_t; result_type r = boost::math::round(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -223,7 +223,7 @@ inline long long llround(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -234,7 +234,7 @@ inline long long llround(const T& v, const Policy& pol) return static_cast(r); } template -inline long long llround(const T& v) +BOOST_MATH_GPU_ENABLED inline long long llround(const T& v) { return llround(v, policies::policy<>()); } diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp index 8f9fc4793..cd7fdfb2a 100644 --- a/include/boost/math/special_functions/sign.hpp +++ b/include/boost/math/special_functions/sign.hpp @@ -1,6 +1,7 @@ // (C) Copyright John Maddock 2006. // (C) Copyright Johan Rade 2006. // (C) Copyright Paul A. Bristow 2011 (added changesign). +// (C) Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file @@ -35,13 +36,13 @@ namespace detail { // signed zero or NaN. template - inline int signbit_impl(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag const&) { return x < 0; } template - inline int signbit_impl(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag const&) { return x < 0; } @@ -91,13 +92,13 @@ namespace detail { // signed zero or NaN. template - inline T (changesign_impl)(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag const&) { return -x; } template - inline T (changesign_impl)(T x, generic_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag const&) { return -x; } @@ -124,7 +125,7 @@ namespace detail { #endif template - inline T changesign_impl(T x, ieee_copy_all_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline T changesign_impl(T x, ieee_copy_all_bits_tag const&) { typedef typename fp_traits::sign_change_type traits; @@ -136,7 +137,7 @@ namespace detail { } template - inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) + BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&) { typedef typename fp_traits::sign_change_type traits; @@ -150,7 +151,8 @@ namespace detail { } // namespace detail -template int (signbit)(T x) +template +BOOST_MATH_GPU_ENABLED int (signbit)(T x) { typedef typename detail::fp_traits::type traits; typedef typename traits::method method; @@ -160,12 +162,13 @@ template int (signbit)(T x) } template -inline int sign BOOST_NO_MACRO_EXPAND(const T& z) +BOOST_MATH_GPU_ENABLED inline int sign BOOST_NO_MACRO_EXPAND(const T& z) { return (z == 0) ? 0 : (boost::math::signbit)(z) ? -1 : 1; } -template typename tools::promote_args_permissive::type (changesign)(const T& x) +template +BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive::type (changesign)(const T& x) { //!< \brief return unchanged binary pattern of x, except for change of sign bit. typedef typename detail::fp_traits::sign_change_type traits; typedef typename traits::method method; @@ -176,7 +179,7 @@ template typename tools::promote_args_permissive::type (changesign) } template -inline typename tools::promote_args_permissive::type +BOOST_MATH_GPU_ENABLED inline typename tools::promote_args_permissive::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y) { BOOST_MATH_STD_USING diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp index a084de560..4288225f1 100644 --- a/include/boost/math/special_functions/trunc.hpp +++ b/include/boost/math/special_functions/trunc.hpp @@ -27,7 +27,7 @@ namespace boost{ namespace math{ namespace detail{ template -inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std::false_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std::false_type&) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; @@ -39,20 +39,21 @@ inline tools::promote_args_t trunc(const T& v, const Policy& pol, const std:: } template -inline tools::promote_args_t trunc(const T& v, const Policy&, const std::true_type&) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy&, const std::true_type&) { return v; } -} +} // Namespace detail template -inline tools::promote_args_t trunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v, const Policy& pol) { return detail::trunc(v, pol, std::integral_constant::value>()); } + template -inline tools::promote_args_t trunc(const T& v) +BOOST_MATH_GPU_ENABLED inline tools::promote_args_t trunc(const T& v) { return trunc(v, policies::policy<>()); } @@ -70,13 +71,13 @@ inline tools::promote_args_t trunc(const T& v) // https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax // template -inline int itrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -100,7 +101,7 @@ inline int itrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -110,20 +111,21 @@ inline int itrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline int itrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v) { return itrunc(v, policies::policy<>()); } template -inline long ltrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -147,7 +149,7 @@ inline long ltrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -157,20 +159,21 @@ inline long ltrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline long ltrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v) { return ltrunc(v, policies::policy<>()); } template -inline long long lltrunc(const T& v, const Policy& pol) +BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v, const Policy& pol) { BOOST_MATH_STD_USING using result_type = tools::promote_args_t; result_type r = boost::math::trunc(v, pol); - #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP + #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) if constexpr (std::is_arithmetic_v #ifdef BOOST_MATH_FLOAT128_TYPE && !std::is_same_v @@ -194,7 +197,7 @@ inline long long lltrunc(const T& v, const Policy& pol) } } #else - static const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); + BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast(1), std::numeric_limits::digits); if (r >= max_val || r < -max_val) { @@ -204,21 +207,22 @@ inline long long lltrunc(const T& v, const Policy& pol) return static_cast(r); } + template -inline long long lltrunc(const T& v) +BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v) { return lltrunc(v, policies::policy<>()); } template -inline typename std::enable_if::value, int>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, int>::type iconvert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, int>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, int>::type iconvert(const T& v, const Policy& pol) { using boost::math::itrunc; @@ -226,14 +230,14 @@ inline typename std::enable_if::value, int>::type } template -inline typename std::enable_if::value, long>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, long>::type lconvert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, long>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, long>::type lconvert(const T& v, const Policy& pol) { using boost::math::ltrunc; @@ -241,14 +245,14 @@ inline typename std::enable_if::value, long>::ty } template -inline typename std::enable_if::value, long long>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, long long>::type llconvertert(const T& v, const Policy&) { return static_cast(v); } template -inline typename std::enable_if::value, long long>::type +BOOST_MATH_GPU_ENABLED inline typename std::enable_if::value, long long>::type llconvertert(const T& v, const Policy& pol) { using boost::math::lltrunc; diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index e5b339483..e7f3e54fd 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -699,6 +699,7 @@ namespace boost{ namespace math{ // spir64 does not support long double # define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +# define BOOST_MATH_NO_REAL_CONCEPT_TESTS # undef BOOST_MATH_FORCEINLINE # define BOOST_MATH_FORCEINLINE inline @@ -731,15 +732,15 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_min(const T& a, const T& b) { return template BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { return a > b ? a : b; } -#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b); -#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b); -#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b); +#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b) +#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b) +#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b) #else -#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b); -#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b); -#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b); +#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b) +#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b) +#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b) #endif @@ -749,13 +750,20 @@ BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { retur #if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L # define BOOST_MATH_STATIC_CONSTEXPR inline constexpr # define BOOST_MATH_STATIC static +# ifndef BOOST_MATH_HAS_GPU_SUPPORT +# define BOOST_MATH_STATIC_LOCAL_VARIABLE static +# else +# define BOOST_MATH_STATIC_LOCAL_VARIABLE +# endif #else # ifndef BOOST_MATH_HAS_GPU_SUPPORT # define BOOST_MATH_STATIC_CONSTEXPR static constexpr # define BOOST_MATH_STATIC static +# define BOOST_MATH_STATIC_LOCAL_VARIABLE # else # define BOOST_MATH_STATIC_CONSTEXPR constexpr # define BOOST_MATH_STATIC constexpr +# define BOOST_MATH_STATIC_LOCAL_VARIABLE static # endif #endif diff --git a/include_private/boost/math/tools/test.hpp b/include_private/boost/math/tools/test.hpp index 10f6143e2..8f4db18c1 100644 --- a/include_private/boost/math/tools/test.hpp +++ b/include_private/boost/math/tools/test.hpp @@ -1,4 +1,5 @@ // (C) Copyright John Maddock 2006. +// (C) Copyright Matt Borland 2024. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -253,6 +254,7 @@ test_result test_hetero(const A& a, F1 test_func, F2 expect_func) return result; } +#ifndef BOOST_MATH_NO_EXCEPTIONS template void test_check_throw(Val, Exception) { @@ -293,6 +295,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*) BOOST_CHECK((v == boost::math::tools::max_value()) || (v == -boost::math::tools::max_value())); } } +#endif } // namespace tools } // namespace math @@ -303,7 +306,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*) // exception-free testing support, ideally we'd only define this in our tests, // but to keep things simple we really need it somewhere that's always included: // -#ifdef BOOST_NO_EXCEPTIONS +#ifdef BOOST_MATH_NO_EXCEPTIONS # define BOOST_MATH_CHECK_THROW(x, ExceptionType) boost::math::tools::test_check_throw(x, static_cast(nullptr)); #else # define BOOST_MATH_CHECK_THROW(x, y) BOOST_CHECK_THROW(x, y) diff --git a/test/cuda_jamfile b/test/cuda_jamfile index f4740a210..65c5d71be 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -24,8 +24,26 @@ run test_arcsine_pdf_float.cu ; run test_arcsine_quan_double.cu ; run test_arcsine_quan_float.cu ; run test_binomial.cpp ; +run test_cauchy_cdf_double.cu ; +run test_cauchy_cdf_float.cu ; +run test_cauchy_pdf_double.cu ; +run test_cauchy_pdf_float.cu ; +run test_cauchy_quan_double.cu ; +run test_cauchy_quan_float.cu ; # Special Functions # run test_beta_simple.cpp ; run test_beta_double.cu ; -run test_beta_float.cu ; \ No newline at end of file +run test_beta_float.cu ; +run test_cbrt_double.cu ; +run test_cbrt_float.cu ; +run test_changesign_double.cu ; +run test_changesign_float.cu ; +run test_expm1_double.cu ; +run test_expm1_float.cu ; +run test_modf_double.cu ; +run test_modf_float.cu ; +run test_round_double.cu ; +run test_round_float.cu ; +run test_trunc_double.cu ; +run test_trunc_float.cu ; diff --git a/test/handle_test_result.hpp b/test/handle_test_result.hpp index e909d6458..66bfe557b 100644 --- a/test/handle_test_result.hpp +++ b/test/handle_test_result.hpp @@ -6,8 +6,8 @@ #ifndef BOOST_MATH_HANDLE_TEST_RESULT #define BOOST_MATH_HANDLE_TEST_RESULT +#include "../include_private/boost/math/tools/test.hpp" #include -#include #include #include #include diff --git a/test/sycl_jamfile b/test/sycl_jamfile index 679bae0dc..e8bd12f8c 100644 --- a/test/sycl_jamfile +++ b/test/sycl_jamfile @@ -11,6 +11,11 @@ project : requirements # Distributions run test_arcsine.cpp ; +run test_cauchy.cpp ; # Special Functions run test_beta_simple.cpp ; +run test_cbrt.cpp ; +run test_sign.cpp ; +run test_round.cpp ; +run test_expm1_simple.cpp; diff --git a/test/test_cauchy.cpp b/test/test_cauchy.cpp index 002690e5f..75c8c4f1e 100644 --- a/test/test_cauchy.cpp +++ b/test/test_cauchy.cpp @@ -18,17 +18,17 @@ // #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false // To compile even if Cauchy mean is used. -#include #include // for real_concept #include using boost::math::cauchy_distribution; -#include "test_out_of_range.hpp" - #define BOOST_TEST_MAIN #include // Boost.Test #include + +#include "test_out_of_range.hpp" + #include using std::cout; using std::endl; @@ -38,11 +38,11 @@ void test_spots(RealType T) { // Check some bad parameters to construct the distribution, #ifndef BOOST_NO_EXCEPTIONS - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, 0), std::domain_error); // zero scale. - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, -1), std::domain_error); // negative scale (shape). + BOOST_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, 0), std::domain_error); // zero scale. + BOOST_CHECK_THROW(boost::math::cauchy_distribution nbad1(0, -1), std::domain_error); // negative scale (shape). #else - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution(0, 0), std::domain_error); // zero scale. - BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution(0, -1), std::domain_error); // negative scale (shape). + BOOST_CHECK_THROW(boost::math::cauchy_distribution(0, 0), std::domain_error); // zero scale. + BOOST_CHECK_THROW(boost::math::cauchy_distribution(0, -1), std::domain_error); // negative scale (shape). #endif cauchy_distribution C01; @@ -667,35 +667,35 @@ void test_spots(RealType T) // To compile even if Cauchy mean is used. // See policy reference, mathematically undefined function policies // - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // mean(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // variance(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // standard_deviation(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // kurtosis(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // kurtosis_excess(dist), // std::domain_error); - //BOOST_MATH_CHECK_THROW( + //BOOST_CHECK_THROW( // skewness(dist), // std::domain_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(dist, RealType(0.0)), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(dist, RealType(1.0)), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(complement(dist, RealType(0.0))), std::overflow_error); - BOOST_MATH_CHECK_THROW( + BOOST_CHECK_THROW( quantile(complement(dist, RealType(1.0))), std::overflow_error); @@ -705,7 +705,7 @@ void test_spots(RealType T) } // template void test_spots(RealType) -BOOST_AUTO_TEST_CASE( test_main ) +BOOST_AUTO_TEST_CASE(test_main) { BOOST_MATH_CONTROL_FP; // Check that can generate cauchy distribution using the two convenience methods: diff --git a/test/test_cauchy_cdf_double.cu b/test/test_cauchy_cdf_double.cu new file mode 100644 index 000000000..dc99cbe33 --- /dev/null +++ b/test/test_cauchy_cdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_cdf_float.cu b/test/test_cauchy_cdf_float.cu new file mode 100644 index 000000000..dc99cbe33 --- /dev/null +++ b/test/test_cauchy_cdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(cdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_pdf_double.cu b/test/test_cauchy_pdf_double.cu new file mode 100644 index 000000000..7a7fe5ba6 --- /dev/null +++ b/test/test_cauchy_pdf_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_pdf_float.cu b/test/test_cauchy_pdf_float.cu new file mode 100644 index 000000000..5ec3b604b --- /dev/null +++ b/test/test_cauchy_pdf_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = pdf(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist(-10000, 10000); + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(pdf(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_quan_double.cu b/test/test_cauchy_quan_double.cu new file mode 100644 index 000000000..21f4b4dda --- /dev/null +++ b/test/test_cauchy_quan_double.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cauchy_quan_float.cu b/test/test_cauchy_quan_float.cu new file mode 100644 index 000000000..b6bed1520 --- /dev/null +++ b/test/test_cauchy_quan_float.cu @@ -0,0 +1,109 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in1, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = quantile(boost::math::cauchy_distribution(), in1[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + try{ + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector1(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + boost::random::mt19937 gen; + boost::random::uniform_real_distribution dist; + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector1[i] = dist(gen); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 512; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(quantile(boost::math::cauchy_distribution(), input_vector1[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + } + return 0; +} diff --git a/test/test_cbrt.cpp b/test/test_cbrt.cpp index 8b36a765d..6abb9bd88 100644 --- a/test/test_cbrt.cpp +++ b/test/test_cbrt.cpp @@ -9,7 +9,10 @@ # pragma warning (disable : 4224) #endif +#ifndef SYCL_LANGUAGE_VERSION #include // include /libs/math/src/ +#endif + #include "test_cbrt.hpp" #include // Added to avoid link failure missing cbrt variants. diff --git a/test/test_cbrt.hpp b/test/test_cbrt.hpp index f606a5840..77e4aed51 100644 --- a/test/test_cbrt.hpp +++ b/test/test_cbrt.hpp @@ -1,5 +1,6 @@ // Copyright John Maddock 2006. // Copyright Paul A. Bristow 2007, 2009 +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -9,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -93,7 +93,9 @@ void test_cbrt(T, const char* name) } BOOST_IF_CONSTEXPR(std::numeric_limits::has_quiet_NaN) { + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_CHECK_THROW(boost::math::cbrt(std::numeric_limits::quiet_NaN()), std::domain_error); + #endif } } diff --git a/test/test_cbrt_double.cu b/test/test_cbrt_double.cu new file mode 100644 index 000000000..cc2c32685 --- /dev/null +++ b/test/test_cbrt_double.cu @@ -0,0 +1,99 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cbrt_float.cu b/test/test_cbrt_float.cu new file mode 100644 index 000000000..a4e98cce8 --- /dev/null +++ b/test/test_cbrt_float.cu @@ -0,0 +1,99 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::cbrt(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::cbrt(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_changesign_double.cu b/test/test_changesign_double.cu new file mode 100644 index 000000000..bfb2ade1e --- /dev/null +++ b/test/test_changesign_double.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_changesign_float.cu b/test/test_changesign_float.cu new file mode 100644 index 000000000..d7e1764bd --- /dev/null +++ b/test/test_changesign_float.cu @@ -0,0 +1,111 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::changesign(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + switch(i % 55) + { + case 1: + h_A[i] = 0; + break; + case 2: + h_A[i] = std::numeric_limits::infinity(); + break; + case 3: + h_A[i] = -std::numeric_limits::infinity(); + break; + } + if(i % 1) + h_A[i] = -h_A[i]; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::changesign(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (h_C[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expm1_double.cu b/test/test_expm1_double.cu new file mode 100644 index 000000000..cfed7d840 --- /dev/null +++ b/test/test_expm1_double.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expm1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expm1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expm1_float.cu b/test/test_expm1_float.cu new file mode 100644 index 000000000..3d439b887 --- /dev/null +++ b/test/test_expm1_float.cu @@ -0,0 +1,100 @@ + +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::expm1(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(boost::math::expm1(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_expm1_simple.cpp b/test/test_expm1_simple.cpp new file mode 100644 index 000000000..00513ea40 --- /dev/null +++ b/test/test_expm1_simple.cpp @@ -0,0 +1,32 @@ +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include "math_unit_test.hpp" + +constexpr int N = 50000; + +template +void test() +{ + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0, 0.01); + + for (int n = 0; n < N; ++n) + { + const T value (dist(rng)); + CHECK_ULP_CLOSE(std::expm1(value), boost::math::expm1(value), 10); + } +} + +int main() +{ + test(); + test(); + + return boost::math::test::report_errors(); +} diff --git a/test/test_modf_double.cu b/test/test_modf_double.cu new file mode 100644 index 000000000..06e65c106 --- /dev/null +++ b/test/test_modf_double.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_modf_float.cu b/test/test_modf_float.cu new file mode 100644 index 000000000..06e65c106 --- /dev/null +++ b/test/test_modf_float.cu @@ -0,0 +1,105 @@ + +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + float_type fract; + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::modf(h_A[i], &fract)); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_round.cpp b/test/test_round.cpp index 95ff4d234..e363efd56 100644 --- a/test/test_round.cpp +++ b/test/test_round.cpp @@ -3,12 +3,15 @@ // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef SYCL_LANGUAGE_VERSION #include +#endif #include #define BOOST_TEST_MAIN +#include +#include "../include_private/boost/math/tools/test.hpp" #include -#include #include #include #include @@ -222,6 +225,7 @@ void test_round(T, const char* name ) // // Finish off by testing the error handlers: // + #ifndef BOOST_MATH_NO_EXCEPTIONS BOOST_MATH_CHECK_THROW(iround(static_cast(1e20)), boost::math::rounding_error); BOOST_MATH_CHECK_THROW(iround(static_cast(-1e20)), boost::math::rounding_error); BOOST_MATH_CHECK_THROW(lround(static_cast(1e20)), boost::math::rounding_error); @@ -314,6 +318,7 @@ void test_round(T, const char* name ) BOOST_MATH_CHECK_THROW(llround(static_cast((std::numeric_limits::min)()) - 1), boost::math::rounding_error); } #endif + #endif // // try non-throwing error handlers: // diff --git a/test/test_round_double.cu b/test/test_round_double.cu new file mode 100644 index 000000000..3dae4342d --- /dev/null +++ b/test/test_round_double.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_round_float.cu b/test/test_round_float.cu new file mode 100644 index 000000000..45dd14c03 --- /dev/null +++ b/test/test_round_float.cu @@ -0,0 +1,98 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::round(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} + diff --git a/test/test_sign.cpp b/test/test_sign.cpp index 864d2dd12..530a60d50 100644 --- a/test/test_sign.cpp +++ b/test/test_sign.cpp @@ -1,5 +1,6 @@ -#define BOOST_TEST_MAIN// Copyright John Maddock 2008 +// Copyright John Maddock 2008 // (C) Copyright Paul A. Bristow 2011 (added tests for changesign) +// Copyright Matt Borland 2024 // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt @@ -147,7 +148,9 @@ BOOST_AUTO_TEST_CASE( test_main ) test_spots(0.0, "double"); // Test double. OK at decdigits 7, tolerance = 1e07 % // long double support for the sign functions is considered "core" so we always test it // even when long double support is turned off via BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS test_spots(0.0L, "long double"); // Test long double. +#endif #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS test_spots(boost::math::concepts::real_concept(0), "real_concept"); // Test real_concept. #endif diff --git a/test/test_trunc_double.cu b/test/test_trunc_double.cu new file mode 100644 index 000000000..5a2d7b622 --- /dev/null +++ b/test/test_trunc_double.cu @@ -0,0 +1,97 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_trunc_float.cu b/test/test_trunc_float.cu new file mode 100644 index 000000000..d6fe4d352 --- /dev/null +++ b/test/test_trunc_float.cu @@ -0,0 +1,97 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef float float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector addition of " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr h_A(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr h_C(numElements); + + // Initialize the input vectors + for (int i = 0; i < numElements; ++i) + { + h_A[i] = rand()/(float_type)RAND_MAX; + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 1024; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(h_A.get(), h_C.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(4 * boost::math::trunc(h_A[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(h_C[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}