diff --git a/include/cuinterval/arithmetic/basic.cuh b/include/cuinterval/arithmetic/basic.cuh index d6082b2..0450f4d 100644 --- a/include/cuinterval/arithmetic/basic.cuh +++ b/include/cuinterval/arithmetic/basic.cuh @@ -837,7 +837,15 @@ inline constexpr __device__ interval log1p(interval x) template inline constexpr __device__ interval pown(interval x, std::integral auto n) { - using std::pow; + auto pow = [](T x, std::integral auto n) -> T { + // The default std::pow implementation returns a double for std::pow(float, int). We want a float. + if constexpr (std::is_same_v) { + return powf(x, n); + } else { + using std::pow; + return pow(x, n); + } + }; if (empty(x)) { return x; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a1cd127..e8494d5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,6 +24,9 @@ add_executable(tests tests_loop.cpp tests_operator_overloading.h tests_operator_overloading.cu + tests_powf.h + tests_powf.cu + tests_powf.cpp generated/tests_generated.cpp generated/tests_c_xsc.cpp generated/tests_libieeep1788_elem.cpp diff --git a/tests/tests.h b/tests/tests.h index eda10ee..709bdfb 100644 --- a/tests/tests.h +++ b/tests/tests.h @@ -44,6 +44,19 @@ bool check_within_ulps(T x, T y, std::size_t n, T direction) return false; } +template +struct is_interval : std::false_type +{ +}; + +template +struct is_interval> : std::true_type +{ +}; + +template +inline constexpr bool is_interval_v = is_interval::value; + template void check_all_equal(T *h_res, std::span h_ref, int max_ulps_diff, std::source_location location, Args &&...args) { @@ -66,10 +79,13 @@ void check_all_equal(T *h_res, std::span h_ref, int max_ulps_diff, std::so if (h_res[i] != h_res[i] && h_ref[i] != h_ref[i]) // both are NaN continue; - if constexpr (std::is_same_v>) { + if constexpr (is_interval_v) { + if (!empty(h_res[i]) || !empty(h_ref[i])) { - bool lb_within_ulps = check_within_ulps(h_res[i].lb, h_ref[i].lb, max_ulps_diff, -std::numeric_limits::infinity()); - bool ub_within_ulps = check_within_ulps(h_res[i].ub, h_ref[i].ub, max_ulps_diff, std::numeric_limits::infinity()); + using TV = T::value_type; + auto inf = std::numeric_limits::infinity(); + bool lb_within_ulps = check_within_ulps(h_res[i].lb, h_ref[i].lb, max_ulps_diff, -inf); + bool ub_within_ulps = check_within_ulps(h_res[i].ub, h_ref[i].ub, max_ulps_diff, inf); auto out = expect(eq(lb_within_ulps && ub_within_ulps, true), location) << std::hexfloat diff --git a/tests/tests_additional.h b/tests/tests_additional.h index a251c9e..8dd53a0 100644 --- a/tests/tests_additional.h +++ b/tests/tests_additional.h @@ -4,6 +4,7 @@ #include "tests_common.h" #include "tests_loop.h" #include "tests_operator_overloading.h" +#include "tests_powf.h" #include @@ -18,14 +19,16 @@ void tests_additional(cuda_buffers buffers, cuda_streams streams, cuda_events ev tests_bisect(buffers[0], streams, events); #pragma omp task depend(inout:buffers[1].host,buffers[1].device) tests_bisection(buffers[1], streams[1], events[1]); - #pragma omp task + #pragma omp task depend(inout:buffers[2].host,buffers[2].device) tests_pi_approximation(streams[2], events[2]); - #pragma omp task + #pragma omp task depend(inout:buffers[3].host,buffers[3].device) tests_horner(streams[3], events[3]); #pragma omp task depend(inout:buffers[0].host,buffers[0].device) tests_mince(buffers[0], streams[0], events[0]); #pragma omp task depend(inout:buffers[1].host,buffers[1].device) tests_operator_overloading(buffers[1], streams[1], events[1]); + #pragma omp task depend(inout:buffers[2].host,buffers[2].device) + tests_powf(streams[2], events[2]); } } } diff --git a/tests/tests_powf.cpp b/tests/tests_powf.cpp new file mode 100644 index 0000000..217cf1f --- /dev/null +++ b/tests/tests_powf.cpp @@ -0,0 +1,29 @@ +#include "tests_powf.h" +#include "tests.h" + +#include + +#include +#include +#include + +using cu::interval; + +std::vector> compute_powf(cudaStream_t stream, std::vector> xs, std::vector exponents); + +void tests_powf(cudaStream_t stream, cudaEvent_t event) +{ + using T = float; + using I = interval; + + using namespace boost::ut; + + constexpr int n = 5; + constexpr int max_ulp_diff = 4; + std::vector xs = { { -1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 3.0 }, { 3.0, 4.0 } }; + std::vector exponents = { 0, 1, 2, 3, 4 }; + std::vector out = compute_powf(stream, xs, exponents); + std::array ref { I { 1.0, 1.0 }, { 0.0, 1.0 }, { 1.0, 4.0 }, { 8.0, 27.0 }, { 81.0, 256.0 } }; + + check_all_equal(out.data(), ref, max_ulp_diff, std::source_location::current(), xs.data(), exponents.data()); +} diff --git a/tests/tests_powf.cu b/tests/tests_powf.cu new file mode 100644 index 0000000..a5297cf --- /dev/null +++ b/tests/tests_powf.cu @@ -0,0 +1,39 @@ +#include + +#include +#include +#include +#include + +#include + +using cu::interval; + +struct powf_fn +{ + template + __device__ I operator()(const I &x, int n) const + { + using cu::pow; + return pow(x, n); + } +}; + +std::vector> compute_powf(cudaStream_t stream, std::vector> xs, std::vector exponents) +{ + using T = float; + using I = interval; + + thrust::host_vector h_xs = xs; + thrust::host_vector h_exponents = exponents; + + auto n = xs.size(); + thrust::device_vector d_res(n); + thrust::device_vector d_xs = h_xs; + thrust::device_vector d_exponents = h_exponents; + thrust::transform(d_xs.begin(), d_xs.end(), d_exponents.begin(), d_res.begin(), powf_fn()); + std::vector h_res(n); + thrust::copy(d_res.begin(), d_res.end(), h_res.begin()); + + return h_res; +} diff --git a/tests/tests_powf.h b/tests/tests_powf.h new file mode 100644 index 0000000..4a7dce0 --- /dev/null +++ b/tests/tests_powf.h @@ -0,0 +1,9 @@ +#pragma once +#ifndef CUDA_INTERVAL_TESTS_POWF_H +#define CUDA_INTERVAL_TESTS_POWF_H + +#include + +void tests_powf(cudaStream_t stream, cudaEvent_t event); + +#endif // CUDA_INTERVAL_TESTS_POWF_H