Skip to content

Commit

Permalink
clang format cuda header
Browse files Browse the repository at this point in the history
  • Loading branch information
johnbowen42 committed Aug 23, 2023
1 parent b4c358e commit c7d1997
Showing 1 changed file with 25 additions and 25 deletions.
50 changes: 25 additions & 25 deletions src/serac/numerics/functional/domain_integral_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,8 @@ template <mfem::Geometry::Type g, typename test, typename trial, int Q, typename
typename dsolution_type, typename dresidual_type>
__global__ void gradient_cuda_quadrature(const dsolution_type du, dresidual_type dr,
GPUArrayView<derivatives_type, 2> qf_derivatives,
const mfem::DeviceTensor<4, const double> J, int num_elements,
int grid_dim, int block_dim, int thread_id)
const mfem::DeviceTensor<4, const double> J, int num_elements, int grid_dim,
int block_dim, int thread_id)
{
using test_element = finite_element<g, test>;
using trial_element = finite_element<g, trial>;
Expand Down Expand Up @@ -522,38 +522,38 @@ void action_of_gradient_kernel(serac::detail::GPULaunchConfiguration config, con
auto du = detail::Reshape<trial>(dU.Read(), trial_ndof, num_elements);
auto dr = detail::Reshape<test>(dR.ReadWrite(), test_ndof, num_elements);

using global_thread_x = RAJA::LoopPolicy<loop_policy
#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP)
, gpu_global_thread_x_policy
#endif
RAJA::RangeSegment range (0, num_elements);
using global_thread_x = RAJA::LoopPolicy < loop_policy
#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP)
,
gpu_global_thread_x_policy
#endif
RAJA::RangeSegment range(0, num_elements);

cudaDeviceSynchronize();
serac::accelerator::displayLastCUDAMessage();

// call gradient_cuda
if constexpr (policy == serac::detail::ThreadParallelizationStrategy::THREAD_PER_QUADRATURE_POINT) {
int blocks_quadrature_element = (num_elements * rule.size() + config.blocksize - 1) / config.blocksize;
RAJA::launch<launch_policy>(RAJA::ExecPlace::DEVICE,
RAJA::LaunchParams(RAJA::Teams(blocks_quadrature_element),
RAJA::Threads(config.blocksize)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
RAJA::loop<global_thread_x>(ctx, range, [&](int t_idx) {
gradient_cuda_quadrature<g, test, trial, Q, derivatives_type>
(du, dr, qf_derivatives, J, num_elements, blocks_quadrature_element, config.blocksize, t_idx);
});
});
RAJA::launch<launch_policy>(
RAJA::ExecPlace::DEVICE,
RAJA::LaunchParams(RAJA::Teams(blocks_quadrature_element), RAJA::Threads(config.blocksize)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
RAJA::loop<global_thread_x>(ctx, range, [&](int t_idx) {
gradient_cuda_quadrature<g, test, trial, Q, derivatives_type>(
du, dr, qf_derivatives, J, num_elements, blocks_quadrature_element, config.blocksize, t_idx);
});
});
} else if constexpr (policy == serac::detail::ThreadParallelizationStrategy::THREAD_PER_ELEMENT) {
int blocks_element = (num_elements + config.blocksize - 1) / config.blocksize;
RAJA::launch<launch_policy>(RAJA::ExecPlace::DEVICE,
RAJA::LaunchParams(RAJA::Teams(blocks_element),
RAJA::Threads(config.blocksize)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
RAJA::loop<global_thread_x>(ctx, range, [&](int t_idx) {
gradient_cuda_element<g, test, trial, Q, derivatives_type>
(du, dr, qf_derivatives, J, num_elements, blocks_element, config.blocksize, t_idx);
});
});
RAJA::launch<launch_policy>(
RAJA::ExecPlace::DEVICE, RAJA::LaunchParams(RAJA::Teams(blocks_element), RAJA::Threads(config.blocksize)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
RAJA::loop<global_thread_x>(ctx, range, [&](int t_idx) {
gradient_cuda_element<g, test, trial, Q, derivatives_type>(du, dr, qf_derivatives, J, num_elements,
blocks_element, config.blocksize, t_idx);
});
});
}

cudaDeviceSynchronize();
Expand Down

0 comments on commit c7d1997

Please sign in to comment.