Skip to content

Commit

Permalink
Merge pull request #5 from arminms:add_namespace
Browse files Browse the repository at this point in the history
add additional namespaces
  • Loading branch information
arminms authored Oct 12, 2023
2 parents c740fe9 + a4a9371 commit cb901dd
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 29 deletions.
34 changes: 30 additions & 4 deletions example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ int main(int argc, char* argv[])
std::cout << '\n' << std::endl;
}
```
## CUDA/ROCm (`rand100.cu`, `rand100_rocm.cpp`)
`CUDA` and `ROCm` sources are exactly the same in this case, so only one of them is shown here:
## CUDA (`rand100.cu`)
```c++
#include <iostream>
#include <iomanip>
Expand All @@ -170,7 +169,34 @@ int main(int argc, char* argv[])
const auto n{100};
thrust::device_vector<int> v(n);
p2rng::generate_n
p2rng::cuda::generate_n
( std::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
);
for (size_t i = 0; i < n; ++i)
{ if (0 == i % 10)
std::cout << '\n';
std::cout << std::setw(3) << v[i];
}
std::cout << '\n' << std::endl;
}
```
## ROCm (`rand100_rocm.cpp`)
```c++
#include <iostream>
#include <iomanip>

#include <thrust/device_vector.h>
#include <p2rng/p2rng.hpp>

int main(int argc, char* argv[])
{ const unsigned long pi_seed{3141592654};
const auto n{100};
thrust::device_vector<int> v(n);

p2rng::rocm::generate_n
( std::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
Expand Down Expand Up @@ -199,7 +225,7 @@ int main(int argc, char* argv[])
sycl::buffer<int> v{sycl::range(n)};
sycl::queue q;
p2rng::generate_n
p2rng::oneapi::generate_n
( dpl::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
Expand Down
2 changes: 1 addition & 1 deletion example/rand100.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int main(int argc, char* argv[])
const auto n{100};
thrust::device_vector<int> v(n);

p2rng::generate_n
p2rng::cuda::generate_n
( std::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
Expand Down
2 changes: 1 addition & 1 deletion example/rand100_oneapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ int main(int argc, char* argv[])
sycl::buffer<int> v{sycl::range(n)};
sycl::queue q;

p2rng::generate_n
p2rng::oneapi::generate_n
( dpl::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
Expand Down
2 changes: 1 addition & 1 deletion example/rand100_rocm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int main(int argc, char* argv[])
const auto n{100};
thrust::device_vector<int> v(n);

p2rng::generate_n
p2rng::rocm::generate_n
( std::begin(v)
, n
, p2rng::bind(trng::uniform_int_dist(10, 100), pcg32(pi_seed))
Expand Down
34 changes: 24 additions & 10 deletions include/p2rng/algorithm/generate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,14 @@
#ifndef _P2RNG_ALGORITHM_GENERATE_HPP_
#define _P2RNG_ALGORITHM_GENERATE_HPP_

#if !(defined(SYCL_LANGUAGE_VERSION) || defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__))
# include <omp.h>
#endif
namespace p2rng {

/**
* === oneAPI ==================================================================
*/

#if defined(__INTEL_LLVM_COMPILER) && defined(SYCL_LANGUAGE_VERSION)

namespace p2rng::oneapi {

/**
* @brief Assigns @a n random numbers using SYCL device, generated by given
* function object @a g.
Expand Down Expand Up @@ -115,19 +112,25 @@ inline auto generate
, sycl::queue q = sycl::queue()
)-> sycl::event
{ auto n{std::distance(first, last)};
return p2rng::generate_n(first, n, g, q);
return p2rng::oneapi::generate_n(first, n, g, q);
}

} // end p2rng::oneapi namespace

/**
* === CUDA / ROCm =============================================================
*/

#elif defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)

namespace p2rng::cuda {

/**
* device kernel
*/

namespace kernel {

template<typename T, typename SizeT, typename GeneratorT>
__global__ void block_splitting
( T* out
Expand All @@ -141,6 +144,8 @@ __global__ void block_splitting
}
}

} // end kernel namespace

/**
* @brief Assigns @a n random numbers using GPU, generated by given function
* object @a g.
Expand Down Expand Up @@ -168,7 +173,7 @@ inline OutputIt generate_n
)
{ const Size threads_per_block{256};
Size blocks_per_grid{n / threads_per_block + 1};
block_splitting<<<blocks_per_grid, threads_per_block>>>
p2rng::cuda::kernel::block_splitting<<<blocks_per_grid, threads_per_block>>>
( thrust::raw_pointer_cast(&out[0])
, n
, g
Expand Down Expand Up @@ -201,15 +206,24 @@ inline void generate
, Generator g
)
{ auto n{std::distance(first, last)};
p2rng::generate_n(first, n, g);
p2rng::cuda::generate_n(first, n, g);
}

} // end p2rng::cuda namespace

// using rocm as an alias for cuda to prevent redundancy
namespace p2rng
{ namespace rocm = p2rng::cuda; }

/**
* === OpenMP ==================================================================
*/

#else

# include <omp.h>
namespace p2rng {

/**
* @brief Assigns @a n random numbers in parallel, generated by given function
* object @a g.
Expand Down Expand Up @@ -277,8 +291,8 @@ inline void generate
p2rng::generate_n(first, n, g);
}

#endif //__INTEL_LLVM_COMPILER && SYCL_LANGUAGE_VERSION

} // end p2rng namespace

#endif //__INTEL_LLVM_COMPILER && SYCL_LANGUAGE_VERSION

#endif //_P2RNG_ALGORITHM_GENERATE_HPP_
2 changes: 1 addition & 1 deletion perf/benchmarks_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void p2rng_generate_cuda(benchmark::State& st)

for (auto _ : st)
{ cudaEventRecord(start);
p2rng::generate_n
p2rng::cuda::generate_n
( v.begin()
, n
, p2rng::bind(trng::uniform_dist<T>(10, 100), pcg32(seed_pi))
Expand Down
2 changes: 1 addition & 1 deletion perf/benchmarks_oneapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void p2rng_generate_oneapi(benchmark::State& st)
sycl::buffer<T> v(n);

for (auto _ : st)
{ auto event = p2rng::generate_n
{ auto event = p2rng::oneapi::generate_n
( dpl::begin(v)
, n
, p2rng::bind(trng::uniform_dist<T>(10, 100), pcg32(seed_pi))
Expand Down
2 changes: 1 addition & 1 deletion perf/benchmarks_rocm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void p2rng_generate_rocm(benchmark::State& st)

for (auto _ : st)
{ hipEventRecord(start);
p2rng::generate_n
p2rng::rocm::generate_n
( v.begin()
, n
, p2rng::bind(trng::uniform_dist<T>(10, 100), pcg32(seed_pi))
Expand Down
6 changes: 3 additions & 3 deletions test/unit_tests_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TEMPLATE_TEST_CASE("generate_n() - CUDA", "[10K][pcg32]", float, double)

SECTION("p2rng::generate_n()")
{ thrust::device_vector<T> dvt(n);
auto itr = p2rng::generate_n
auto itr = p2rng::cuda::generate_n
( std::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -94,7 +94,7 @@ TEMPLATE_TEST_CASE("generate() - CUDA", "[10K][pcg32]", float, double)

SECTION("p2rng::generate()")
{ thrust::device_vector<T> dvt(n);
p2rng::generate
p2rng::cuda::generate
( std::begin(dvt)
, std::end(dvt)
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -124,7 +124,7 @@ TEMPLATE_TEST_CASE("uniform_int_dist - CUDA", "[10K][pcg32][dist]", int)
);

thrust::device_vector<T> dvt(n);
auto itr = p2rng::generate_n
auto itr = p2rng::cuda::generate_n
( std::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down
6 changes: 3 additions & 3 deletions test/unit_tests_oneapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ TEMPLATE_TEST_CASE( "generate_n() - oneAPI", "[10K][pcg32]", float, double )

SECTION("p2rng::generate()")
{ sycl::buffer<T> dvt{sycl::range(n)};
p2rng::generate_n
p2rng::oneapi::generate_n
( dpl::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -88,7 +88,7 @@ TEMPLATE_TEST_CASE( "generate() - oneAPI", "[10K][pcg32]", float, double )

SECTION("p2rng::generate()")
{ sycl::buffer<T> dvt{sycl::range(n)};
p2rng::generate
p2rng::oneapi::generate
( dpl::begin(dvt)
, dpl::end(dvt)
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -120,7 +120,7 @@ TEMPLATE_TEST_CASE( "uniform_int_dist - oneAPI", "[10K][pcg32][dist]", int )
);

sycl::buffer<T> dvt{sycl::range(n)};
p2rng::generate_n
p2rng::oneapi::generate_n
( dpl::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down
6 changes: 3 additions & 3 deletions test/unit_tests_rocm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TEMPLATE_TEST_CASE("generate_n() - ROCm", "[10K][pcg32]", float, double)

SECTION("p2rng::generate_n()")
{ thrust::device_vector<T> dvt(n);
auto itr = p2rng::generate_n
auto itr = p2rng::rocm::generate_n
( std::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -94,7 +94,7 @@ TEMPLATE_TEST_CASE("generate() - ROCm", "[10K][pcg32]", float, double)

SECTION("p2rng::generate()")
{ thrust::device_vector<T> dvt(n);
p2rng::generate
p2rng::rocm::generate
( std::begin(dvt)
, std::end(dvt)
, p2rng::bind(u, pcg32(seed_pi))
Expand Down Expand Up @@ -124,7 +124,7 @@ TEMPLATE_TEST_CASE("uniform_int_dist - ROCm", "[10K][pcg32][dist]", int)
);

thrust::device_vector<T> dvt(n);
auto itr = p2rng::generate_n
auto itr = p2rng::rocm::generate_n
( std::begin(dvt)
, n
, p2rng::bind(u, pcg32(seed_pi))
Expand Down

0 comments on commit cb901dd

Please sign in to comment.