better float testing support and fix of float pow

neilkichler · Jan 8, 2025 · 81c4053 · 81c4053
1 parent 6946b31
commit 81c4053
Show file tree

Hide file tree

Showing 7 changed files with 113 additions and 6 deletions.
diff --git a/include/cuinterval/arithmetic/basic.cuh b/include/cuinterval/arithmetic/basic.cuh
@@ -837,7 +837,15 @@ inline constexpr __device__ interval<T> log1p(interval<T> x)
 template<typename T>
 inline constexpr __device__ interval<T> pown(interval<T> x, std::integral auto n)
 {
-    using std::pow;
+    auto pow = [](T x, std::integral auto n) -> T {
+        // The default std::pow implementation returns a double for std::pow(float, int). We want a float.
+        if constexpr (std::is_same_v<T, float>) {
+            return powf(x, n);
+        } else {
+            using std::pow;
+            return pow(x, n);
+        }
+    };
 
     if (empty(x)) {
         return x;

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -24,6 +24,9 @@ add_executable(tests
     tests_loop.cpp
     tests_operator_overloading.h
     tests_operator_overloading.cu
+    tests_powf.h
+    tests_powf.cu
+    tests_powf.cpp
     generated/tests_generated.cpp
     generated/tests_c_xsc.cpp
     generated/tests_libieeep1788_elem.cpp

diff --git a/tests/tests.h b/tests/tests.h
@@ -44,6 +44,19 @@ bool check_within_ulps(T x, T y, std::size_t n, T direction)
     return false;
 }
 
+template<typename T>
+struct is_interval : std::false_type
+{
+};
+
+template<typename T>
+struct is_interval<cu::interval<T>> : std::true_type
+{
+};
+
+template<typename T>
+inline constexpr bool is_interval_v = is_interval<T>::value;
+
 template<typename T, int N, typename... Args>
 void check_all_equal(T *h_res, std::span<T, N> h_ref, int max_ulps_diff, std::source_location location, Args &&...args)
 {
@@ -66,10 +79,13 @@ void check_all_equal(T *h_res, std::span<T, N> h_ref, int max_ulps_diff, std::so
         if (h_res[i] != h_res[i] && h_ref[i] != h_ref[i]) // both are NaN
             continue;
 
-        if constexpr (std::is_same_v<T, cu::interval<double>>) {
+        if constexpr (is_interval_v<T>) {
+
             if (!empty(h_res[i]) || !empty(h_ref[i])) {
-                bool lb_within_ulps = check_within_ulps(h_res[i].lb, h_ref[i].lb, max_ulps_diff, -std::numeric_limits<double>::infinity());
-                bool ub_within_ulps = check_within_ulps(h_res[i].ub, h_ref[i].ub, max_ulps_diff, std::numeric_limits<double>::infinity());
+                using TV            = T::value_type;
+                auto inf            = std::numeric_limits<TV>::infinity();
+                bool lb_within_ulps = check_within_ulps(h_res[i].lb, h_ref[i].lb, max_ulps_diff, -inf);
+                bool ub_within_ulps = check_within_ulps(h_res[i].ub, h_ref[i].ub, max_ulps_diff, inf);
 
                 auto out = expect(eq(lb_within_ulps && ub_within_ulps, true), location)
                     << std::hexfloat

diff --git a/tests/tests_additional.h b/tests/tests_additional.h
@@ -4,6 +4,7 @@
 #include "tests_common.h"
 #include "tests_loop.h"
 #include "tests_operator_overloading.h"
+#include "tests_powf.h"
 
 #include <omp.h>
 
@@ -18,14 +19,16 @@ void tests_additional(cuda_buffers buffers, cuda_streams streams, cuda_events ev
             tests_bisect(buffers[0], streams, events);
             #pragma omp task depend(inout:buffers[1].host,buffers[1].device)
             tests_bisection(buffers[1], streams[1], events[1]);
-            #pragma omp task
+            #pragma omp task depend(inout:buffers[2].host,buffers[2].device)
             tests_pi_approximation(streams[2], events[2]);
-            #pragma omp task
+            #pragma omp task depend(inout:buffers[3].host,buffers[3].device)
             tests_horner(streams[3], events[3]);
             #pragma omp task depend(inout:buffers[0].host,buffers[0].device)
             tests_mince(buffers[0], streams[0], events[0]);
             #pragma omp task depend(inout:buffers[1].host,buffers[1].device)
             tests_operator_overloading(buffers[1], streams[1], events[1]);
+            #pragma omp task depend(inout:buffers[2].host,buffers[2].device)
+            tests_powf(streams[2], events[2]);
         }
     }
 }
diff --git a/tests/tests_powf.cpp b/tests/tests_powf.cpp
@@ -0,0 +1,29 @@
+#include "tests_powf.h"
+#include "tests.h"
+
+#include <cuinterval/interval.h>
+
+#include <array>
+#include <source_location>
+#include <vector>
+
+using cu::interval;
+
+std::vector<interval<float>> compute_powf(cudaStream_t stream, std::vector<interval<float>> xs, std::vector<int> exponents);
+
+void tests_powf(cudaStream_t stream, cudaEvent_t event)
+{
+    using T = float;
+    using I = interval<T>;
+
+    using namespace boost::ut;
+
+    constexpr int n            = 5;
+    constexpr int max_ulp_diff = 4;
+    std::vector<I> xs          = { { -1.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 3.0 }, { 3.0, 4.0 } };
+    std::vector<int> exponents = { 0, 1, 2, 3, 4 };
+    std::vector<I> out         = compute_powf(stream, xs, exponents);
+    std::array<I, n> ref { I { 1.0, 1.0 }, { 0.0, 1.0 }, { 1.0, 4.0 }, { 8.0, 27.0 }, { 81.0, 256.0 } };
+
+    check_all_equal<I, n>(out.data(), ref, max_ulp_diff, std::source_location::current(), xs.data(), exponents.data());
+}
diff --git a/tests/tests_powf.cu b/tests/tests_powf.cu
@@ -0,0 +1,39 @@
+#include <cuinterval/cuinterval.h>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/transform.h>
+
+#include <vector>
+
+using cu::interval;
+
+struct powf_fn
+{
+    template<typename I>
+    __device__ I operator()(const I &x, int n) const
+    {
+        using cu::pow;
+        return pow(x, n);
+    }
+};
+
+std::vector<interval<float>> compute_powf(cudaStream_t stream, std::vector<interval<float>> xs, std::vector<int> exponents)
+{
+    using T = float;
+    using I = interval<T>;
+
+    thrust::host_vector<I> h_xs          = xs;
+    thrust::host_vector<int> h_exponents = exponents;
+
+    auto n = xs.size();
+    thrust::device_vector<I> d_res(n);
+    thrust::device_vector<I> d_xs          = h_xs;
+    thrust::device_vector<int> d_exponents = h_exponents;
+    thrust::transform(d_xs.begin(), d_xs.end(), d_exponents.begin(), d_res.begin(), powf_fn());
+    std::vector<I> h_res(n);
+    thrust::copy(d_res.begin(), d_res.end(), h_res.begin());
+
+    return h_res;
+}
diff --git a/tests/tests_powf.h b/tests/tests_powf.h
@@ -0,0 +1,9 @@
+#pragma once
+#ifndef CUDA_INTERVAL_TESTS_POWF_H
+#define CUDA_INTERVAL_TESTS_POWF_H
+
+#include <cuda_runtime.h>
+
+void tests_powf(cudaStream_t stream, cudaEvent_t event);
+
+#endif // CUDA_INTERVAL_TESTS_POWF_H