Skip to content

Commit

Permalink
add rootn
Browse files Browse the repository at this point in the history
  • Loading branch information
neilkichler committed Feb 28, 2024
1 parent ae3d7fe commit 549545d
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 0 deletions.
30 changes: 30 additions & 0 deletions include/cuinterval/arithmetic/basic.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,36 @@ __device__ interval<T> pow_(interval<T> x, T y)
return {};
}

template<typename T>
__device__ interval<T> rootn(interval<T> x, std::integral auto n)
{
assert(n && "n must be non-zero integer");

if (empty(x)) {
return x;
} else if (n == 0) {
return empty<T>();
} else if (n == 1) {
return x;
} else if (n == 2) {
return sqrt(x);
} else if (n < 0) {
return recip(rootn(x, -n));
}

bool is_odd = n % 2;
interval<T> domain { is_odd ? intrinsic::neg_inf<T>() : static_cast<T>(0),
intrinsic::pos_inf<T>() };

x = intersection(x, domain);
if (empty(x)) {
return empty<T>();
}

return { intrinsic::next_after(pow(inf(x), 1.0 / n), domain.lb),
intrinsic::next_after(pow(sup(x), 1.0 / n), domain.ub) };
}

template<typename T>
__device__ interval<T> pow(interval<T> x, interval<T> y)
{
Expand Down
1 change: 1 addition & 0 deletions tests/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def convert_to_test(file_path):
"cospi": {"args": [I], "ret": I, "ulp_error": 3},
"pown": {"args": [I, N], "ret": I, "ulp_error": 1},
"pow": {"args": [I, I], "ret": I, "ulp_error": 1},
"rootn": {"args": [I, N], "ret": I, "ulp_error": 2},
# "cot": {"args": [I], "ret": I, "ulp_error": 4},
}

Expand Down
9 changes: 9 additions & 0 deletions tests/test_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,15 @@ __global__ void test_pown(int n, interval<T> *x, int *n_pow, interval<T> *res)
}
}

template<typename T>
__global__ void test_rootn(int n, interval<T> *x, int *n_pow, interval<T> *res)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i < n) {
res[i] = rootn(x[i], n_pow[i]);
}
}

template<typename T>
__global__ void test_pow(int n, interval<T> *x, interval<T> *y, interval<T> *res)
{
Expand Down
38 changes: 38 additions & 0 deletions tests/tests_c_xsc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -991,6 +991,44 @@ void tests_c_xsc() {
}
};

"cxsc.intervalstdfunc_rootn"_test = [&] {
constexpr int n = 3;
std::array<I, n> h_xs {{
{0.0,0.0},
{1024.0,1024.0},
{27.0,27.0},
}};

std::array<N, n> h_ys {{
4,
10,
3,
}};

std::array<I, n> h_res{};
I *d_res = (I *)d_res_;
I *d_xs = (I *)d_xs_;
N *d_ys = (N *)d_ys_;
int n_result_bytes = n * sizeof(I);
std::array<I, n> h_ref {{
{0.0,0.0},
{2.0,2.0},
{3.0,3.0},
}};

CUDA_CHECK(cudaMemcpy(d_xs, h_xs.data(), n_bytes, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_ys, h_ys.data(), n_bytes, cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(d_res, h_res.data(), n_result_bytes, cudaMemcpyHostToDevice));
test_rootn<<<numBlocks, blockSize>>>(n, d_xs, d_ys, d_res);
CUDA_CHECK(cudaMemcpy(h_res.data(), d_res, n_result_bytes, cudaMemcpyDeviceToHost));
int max_ulp_diff = 2;
auto failed = check_all_equal<I, n>(h_res, h_ref, max_ulp_diff);
for (auto fail_id : failed) {
printf("failed at case %zu:\n", fail_id);
printf("x = [%a, %a]\ny = %d\n", h_xs[fail_id].lb, h_xs[fail_id].ub, h_ys[fail_id]);
}
};

"cxsc.intervalstdfunc_sqr"_test = [&] {
constexpr int n = 3;
std::array<I, n> h_xs {{
Expand Down

0 comments on commit 549545d

Please sign in to comment.