Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU Batch 5 #6

Merged
merged 33 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e0db581
Add GPU support to sqrt1pm1
mborland Jul 29, 2024
24c0c01
Add GPU support to continued fractions and remove recursion
mborland Jul 29, 2024
96953ea
Add GPU support to erf and erfc
mborland Jul 29, 2024
a8af41e
Add SYCL erf testing
mborland Jul 29, 2024
c91ac7c
Add sqrt1pm1 to fwd
mborland Jul 29, 2024
325720d
Add CUDA erf and erfc testing
mborland Jul 29, 2024
d1b5823
Replace integral constant pointers with references
mborland Jul 30, 2024
6a12bf0
Make erf_inv and erfc_inv GPU enabled
mborland Jul 30, 2024
0e1be52
Add erf_inv and erfc_inv CUDA testing
mborland Jul 30, 2024
3d0bd18
Replace igamma_temme_large integral constant pointer with reference
mborland Jul 30, 2024
8862896
Add GPU support to igamma_temme_large
mborland Jul 30, 2024
7cb3860
Add GPU support to lgamma_small_imp
mborland Jul 30, 2024
cc8f359
Add additional factorial overloads for GPU times
mborland Jul 30, 2024
d3fc301
Add GPU support to lgamma_small
mborland Jul 30, 2024
d02674c
Fix signbit GPU support
mborland Jul 30, 2024
b0496b1
Support GPU with root epsilon functions
mborland Jul 30, 2024
58a5bc4
Add GPU support to gamma functions
mborland Jul 30, 2024
4362b02
Add GPU markers to math_fwd for gamma functions
mborland Jul 30, 2024
443887e
Add CUDA testing for tgamma and lgamma
mborland Jul 30, 2024
fcb1c5b
Disable factorial check on GPU platform
mborland Jul 30, 2024
e2a9039
Add SYCL gamma testing
mborland Jul 30, 2024
7dcb432
Remove extra overloads
mborland Jul 30, 2024
f1f470e
Remove recursion from tgamma
mborland Jul 30, 2024
30a7ad1
Remove recursion in lgamma
mborland Jul 30, 2024
082b625
Ignore CUDA warning about GNU force inline
mborland Jul 30, 2024
6d19b67
Ignore literal range warnings
mborland Jul 30, 2024
19f166b
Ignore mapairy literal range warnings
mborland Jul 30, 2024
84a7272
Make powm1 GPU compatible
mborland Jul 30, 2024
7bd6f95
Fix GCC warnings
mborland Jul 30, 2024
c86e744
Revert if constexpr change
mborland Jul 30, 2024
4f846a9
Fix typo
mborland Jul 31, 2024
d13ff4e
Fix comparison
mborland Jul 31, 2024
43d5905
Use tag type idiom
mborland Jul 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/boost/math/special_functions/beta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,9 @@ template <>
struct Pn_size<long double>
{
static constexpr unsigned value = 50; // ~35-50 digit accuracy
#ifndef BOOST_MATH_HAS_GPU_SUPPORT
static_assert(::boost::math::max_factorial<long double>::value >= 100, "Type does not provide for ~35-50 digits of accuracy");
#endif
};

template <class T, class Policy>
Expand Down
67 changes: 35 additions & 32 deletions include/boost/math/special_functions/detail/erf_inv.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// (C) Copyright John Maddock 2006.
// (C) Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Expand All @@ -13,6 +14,7 @@
#pragma warning(disable:4702) // Unreachable code: optimization warning
#endif

#include <boost/math/tools/config.hpp>
#include <type_traits>

namespace boost{ namespace math{
Expand All @@ -23,7 +25,7 @@ namespace detail{
// this version is for 80-bit long double's and smaller:
//
template <class T, class Policy>
T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant<int, 64>*)
BOOST_MATH_GPU_ENABLED T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant<int, 64>&)
{
BOOST_MATH_STD_USING // for ADL of std names.

Expand All @@ -44,8 +46,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
// Maximum Deviation Found (actual error term at infinite precision) 8.030e-21
//
// LCOV_EXCL_START
static const float Y = 0.0891314744949340820313f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.0891314744949340820313f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.000508781949658280665617),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.00836874819741736770379),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.0334806625409744615033),
Expand All @@ -55,7 +57,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, 0.00822687874676915743155),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.00538772965071242932965)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.970005043303290640362),
BOOST_MATH_BIG_CONSTANT(T, 64, -1.56574558234175846809),
Expand Down Expand Up @@ -87,8 +89,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
// Maximum Deviation Found (error term) 4.811e-20
//
// LCOV_EXCL_START
static const float Y = 2.249481201171875f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 2.249481201171875f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.202433508355938759655),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.105264680699391713268),
BOOST_MATH_BIG_CONSTANT(T, 64, 8.37050328343119927838),
Expand All @@ -99,7 +101,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, 21.1294655448340526258),
BOOST_MATH_BIG_CONSTANT(T, 64, -3.67192254707729348546)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 6.24264124854247537712),
BOOST_MATH_BIG_CONSTANT(T, 64, 3.9713437953343869095),
Expand Down Expand Up @@ -142,8 +144,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
{
// LCOV_EXCL_START
// Max error found: 1.089051e-20
static const float Y = 0.807220458984375f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.807220458984375f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.131102781679951906451),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.163794047193317060787),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.117030156341995252019),
Expand All @@ -156,7 +158,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, 0.285225331782217055858e-7),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.681149956853776992068e-9)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 3.46625407242567245975),
BOOST_MATH_BIG_CONSTANT(T, 64, 5.38168345707006855425),
Expand All @@ -175,8 +177,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
{
// LCOV_EXCL_START
// Max error found: 8.389174e-21
static const float Y = 0.93995571136474609375f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.93995571136474609375f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.0350353787183177984712),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.00222426529213447927281),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.0185573306514231072324),
Expand All @@ -187,7 +189,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, -0.230404776911882601748e-9),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.266339227425782031962e-11)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 1.3653349817554063097),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.762059164553623404043),
Expand All @@ -205,8 +207,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
{
// LCOV_EXCL_START
// Max error found: 1.481312e-19
static const float Y = 0.98362827301025390625f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.98362827301025390625f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.0167431005076633737133),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.00112951438745580278863),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.00105628862152492910091),
Expand All @@ -217,7 +219,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, -0.281128735628831791805e-13),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.99055709973310326855e-16)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.591429344886417493481),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.138151865749083321638),
Expand All @@ -235,8 +237,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
{
// LCOV_EXCL_START
// Max error found: 5.697761e-20
static const float Y = 0.99714565277099609375f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99714565277099609375f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.0024978212791898131227),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.779190719229053954292e-5),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.254723037413027451751e-4),
Expand All @@ -246,7 +248,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, 0.145596286718675035587e-11),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.116765012397184275695e-17)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.207123112214422517181),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.0169410838120975906478),
Expand All @@ -264,8 +266,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
{
// LCOV_EXCL_START
// Max error found: 1.279746e-20
static const float Y = 0.99941349029541015625f;
static const T P[] = {
BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99941349029541015625f;
BOOST_MATH_STATIC const T P[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, -0.000539042911019078575891),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.28398759004727721098e-6),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.899465114892291446442e-6),
Expand All @@ -275,7 +277,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
BOOST_MATH_BIG_CONSTANT(T, 64, 0.135880130108924861008e-14),
BOOST_MATH_BIG_CONSTANT(T, 64, -0.348890393399948882918e-21)
};
static const T Q[] = {
BOOST_MATH_STATIC const T Q[] = {
BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.0845746234001899436914),
BOOST_MATH_BIG_CONSTANT(T, 64, 0.00282092984726264681981),
Expand Down Expand Up @@ -310,12 +312,13 @@ struct erf_roots
};

template <class T, class Policy>
T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant<int, 0>*)
T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant<int, 0>&)
{
//
// Generic version, get a guess that's accurate to 64-bits (10^-19)
//
T guess = erf_inv_imp(p, q, pol, static_cast<std::integral_constant<int, 64> const*>(nullptr));
using tag_type = std::integral_constant<int, 64>;
T guess = erf_inv_imp(p, q, pol, tag_type());
T result;
//
// If T has more bit's than 64 in it's mantissa then we need to iterate,
Expand Down Expand Up @@ -344,14 +347,14 @@ T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_con
} // namespace detail

template <class T, class Policy>
typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
{
typedef typename tools::promote_args<T>::type result_type;

//
// Begin by testing for domain errors, and other special cases:
//
static const char* function = "boost::math::erfc_inv<%1%>(%1%, %1%)";
constexpr auto function = "boost::math::erfc_inv<%1%>(%1%, %1%)";
if((z < 0) || (z > 2))
return policies::raise_domain_error<result_type>(function, "Argument outside range [0,2] in inverse erfc function (got p=%1%).", z, pol);
if(z == 0)
Expand Down Expand Up @@ -401,18 +404,18 @@ typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
// And get the result, negating where required:
//
return s * policies::checked_narrowing_cast<result_type, forwarding_policy>(
detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), static_cast<tag_type const*>(nullptr)), function);
detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), tag_type()), function);
}

template <class T, class Policy>
typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
{
typedef typename tools::promote_args<T>::type result_type;

//
// Begin by testing for domain errors, and other special cases:
//
static const char* function = "boost::math::erf_inv<%1%>(%1%, %1%)";
constexpr auto function = "boost::math::erf_inv<%1%>(%1%, %1%)";
if((z < -1) || (z > 1))
return policies::raise_domain_error<result_type>(function, "Argument outside range [-1, 1] in inverse erf function (got p=%1%).", z, pol);
if(z == 1)
Expand Down Expand Up @@ -469,17 +472,17 @@ typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
// And get the result, negating where required:
//
return s * policies::checked_narrowing_cast<result_type, forwarding_policy>(
detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), static_cast<tag_type const*>(nullptr)), function);
detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), tag_type()), function);
}

template <class T>
inline typename tools::promote_args<T>::type erfc_inv(T z)
BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erfc_inv(T z)
{
return erfc_inv(z, policies::policy<>());
}

template <class T>
inline typename tools::promote_args<T>::type erf_inv(T z)
BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erf_inv(T z)
{
return erf_inv(z, policies::policy<>());
}
Expand Down
Loading