Skip to content

Commit a918a22

Browse files
committed
Add CUDA testing of pFq
1 parent 32d7c82 commit a918a22

File tree

3 files changed

+211
-2
lines changed

3 files changed

+211
-2
lines changed

test/cuda_jamfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ project : requirements
99
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
1010
;
1111

12-
run test_2F0_double.cu ;
13-
run test_2F0_float.cu ;
12+
run test_pFq_double.cu ;
1413

1514
# Quad
1615
run test_exp_sinh_quad_float.cu ;
@@ -367,6 +366,8 @@ run test_0F1_double.cu ;
367366
run test_0F1_float.cu ;
368367
run test_1F0_double.cu ;
369368
run test_1F0_float.cu ;
369+
run test_2F0_double.cu ;
370+
run test_2F0_float.cu ;
370371

371372
run test_lgamma_double.cu ;
372373
run test_lgamma_float.cu ;

test/test_pFq_double.cu

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
2+
// Copyright John Maddock 2016.
3+
// Copyright Matt Borland 2024.
4+
// Use, modification and distribution are subject to the
5+
// Boost Software License, Version 1.0. (See accompanying file
6+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7+
8+
#include <iostream>
9+
#include <iomanip>
10+
#include <vector>
11+
#include <boost/math/special_functions.hpp>
12+
#include "cuda_managed_ptr.hpp"
13+
#include "stopwatch.hpp"
14+
15+
// For the CUDA runtime routines (prefixed with "cuda_")
16+
#include <cuda_runtime.h>
17+
18+
typedef double float_type;
19+
20+
/**
21+
* CUDA Kernel Device code
22+
*
23+
*/
24+
__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
25+
{
26+
using std::cos;
27+
int i = blockDim.x * blockIdx.x + threadIdx.x;
28+
29+
if (i < numElements)
30+
{
31+
out[i] = boost::math::hypergeometric_pFq(std::initializer_list<float_type>({in1[i]}), std::initializer_list<float_type>({in2[i]}), static_cast<float_type>(1));
32+
}
33+
}
34+
35+
/**
36+
* Host main routine
37+
*/
38+
int main(void)
39+
{
40+
// Error code to check return values for CUDA calls
41+
cudaError_t err = cudaSuccess;
42+
43+
// Print the vector length to be used, and compute its size
44+
int numElements = 50000;
45+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
46+
47+
// Allocate the managed input vector A
48+
cuda_managed_ptr<float_type> input_vector1(numElements);
49+
50+
// Allocate the managed input vector B
51+
cuda_managed_ptr<float_type> input_vector2(numElements);
52+
53+
// Allocate the managed output vector C
54+
cuda_managed_ptr<float_type> output_vector(numElements);
55+
56+
// Initialize the input vectors
57+
for (int i = 0; i < numElements; ++i)
58+
{
59+
input_vector1[i] = rand()/(float_type)RAND_MAX;
60+
input_vector2[i] = rand()/(float_type)RAND_MAX;
61+
}
62+
63+
// Launch the Vector Add CUDA Kernel
64+
int threadsPerBlock = 256;
65+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
66+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
67+
68+
watch w;
69+
70+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
71+
cudaDeviceSynchronize();
72+
73+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
74+
75+
err = cudaGetLastError();
76+
77+
if (err != cudaSuccess)
78+
{
79+
std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
80+
return EXIT_FAILURE;
81+
}
82+
83+
// Verify that the result vector is correct
84+
std::vector<float_type> results;
85+
results.reserve(numElements);
86+
w.reset();
87+
for(int i = 0; i < numElements; ++i)
88+
results.push_back(boost::math::hypergeometric_pFq(std::initializer_list<float_type>({input_vector1[i]}), std::initializer_list<float_type>({input_vector2[i]}), static_cast<float_type>(1)));
89+
double t = w.elapsed();
90+
// check the results
91+
for(int i = 0; i < numElements; ++i)
92+
{
93+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
94+
{
95+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
96+
return EXIT_FAILURE;
97+
}
98+
}
99+
100+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
101+
std::cout << "Done\n";
102+
103+
return 0;
104+
}

test/test_pFq_float.cu

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
2+
// Copyright John Maddock 2016.
3+
// Copyright Matt Borland 2024.
4+
// Use, modification and distribution are subject to the
5+
// Boost Software License, Version 1.0. (See accompanying file
6+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7+
8+
#include <iostream>
9+
#include <iomanip>
10+
#include <vector>
11+
#include <boost/math/special_functions.hpp>
12+
#include "cuda_managed_ptr.hpp"
13+
#include "stopwatch.hpp"
14+
15+
// For the CUDA runtime routines (prefixed with "cuda_")
16+
#include <cuda_runtime.h>
17+
18+
typedef float float_type;
19+
20+
/**
21+
* CUDA Kernel Device code
22+
*
23+
*/
24+
__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
25+
{
26+
using std::cos;
27+
int i = blockDim.x * blockIdx.x + threadIdx.x;
28+
29+
if (i < numElements)
30+
{
31+
out[i] = boost::math::hypergeometric_pFq(std::initializer_list<float_type>({in1[i]}), std::initializer_list<float_type>({in2[i]}), static_cast<float_type>(1));
32+
}
33+
}
34+
35+
/**
36+
* Host main routine
37+
*/
38+
int main(void)
39+
{
40+
// Error code to check return values for CUDA calls
41+
cudaError_t err = cudaSuccess;
42+
43+
// Print the vector length to be used, and compute its size
44+
int numElements = 50000;
45+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
46+
47+
// Allocate the managed input vector A
48+
cuda_managed_ptr<float_type> input_vector1(numElements);
49+
50+
// Allocate the managed input vector B
51+
cuda_managed_ptr<float_type> input_vector2(numElements);
52+
53+
// Allocate the managed output vector C
54+
cuda_managed_ptr<float_type> output_vector(numElements);
55+
56+
// Initialize the input vectors
57+
for (int i = 0; i < numElements; ++i)
58+
{
59+
input_vector1[i] = rand()/(float_type)RAND_MAX;
60+
input_vector2[i] = rand()/(float_type)RAND_MAX;
61+
}
62+
63+
// Launch the Vector Add CUDA Kernel
64+
int threadsPerBlock = 256;
65+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
66+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
67+
68+
watch w;
69+
70+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
71+
cudaDeviceSynchronize();
72+
73+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
74+
75+
err = cudaGetLastError();
76+
77+
if (err != cudaSuccess)
78+
{
79+
std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
80+
return EXIT_FAILURE;
81+
}
82+
83+
// Verify that the result vector is correct
84+
std::vector<float_type> results;
85+
results.reserve(numElements);
86+
w.reset();
87+
for(int i = 0; i < numElements; ++i)
88+
results.push_back(boost::math::hypergeometric_pFq(std::initializer_list<float_type>({input_vector1[i]}), std::initializer_list<float_type>({input_vector2[i]}), static_cast<float_type>(1)));
89+
double t = w.elapsed();
90+
// check the results
91+
for(int i = 0; i < numElements; ++i)
92+
{
93+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
94+
{
95+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
96+
return EXIT_FAILURE;
97+
}
98+
}
99+
100+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
101+
std::cout << "Done\n";
102+
103+
return 0;
104+
}

0 commit comments

Comments
 (0)