Skip to content

Commit

Permalink
Format files
Browse files Browse the repository at this point in the history
Co-authored-by: Pratik Nayak <[email protected]>
  • Loading branch information
ginkgo-bot and pratikvn committed Oct 29, 2023
1 parent addf8fe commit b8def5b
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 153 deletions.
29 changes: 12 additions & 17 deletions common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,10 @@ __device__ __forceinline__ void scale(
}

template <typename ValueType, typename Mapping>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void scale_kernel(const gko::batch::multi_vector::
uniform_batch<const ValueType>
alpha,
const gko::batch::multi_vector::
uniform_batch<ValueType>
x,
Mapping map)
__global__
__launch_bounds__(default_block_size, sm_oversubscription) void scale_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
const gko::batch::multi_vector::uniform_batch<ValueType> x, Mapping map)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -176,11 +171,11 @@ __device__ __forceinline__ void compute_gen_dot_product(

template <typename ValueType, typename Mapping>
__global__
__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> x,
const gko::batch::multi_vector::uniform_batch<const ValueType> y,
const gko::batch::multi_vector::uniform_batch<ValueType> result,
Mapping map)
__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> x,
const gko::batch::multi_vector::uniform_batch<const ValueType> y,
const gko::batch::multi_vector::uniform_batch<ValueType> result,
Mapping map)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -319,9 +314,9 @@ __device__ __forceinline__ void copy(

template <typename ValueType>
__global__
__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> src,
const gko::batch::multi_vector::uniform_batch<ValueType> dst)
__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> src,
const gko::batch::multi_vector::uniform_batch<ValueType> dst)
{
for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
batch_id += gridDim.x) {
Expand Down
3 changes: 1 addition & 2 deletions cuda/solver/batch_bicgstab_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,8 @@ constexpr int sm_oversubscription = 4;
namespace batch_bicgstab {


#include "common/cuda_hip/components/uninitialized_array.hpp.inc"

#include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
#include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc"
Expand Down
125 changes: 66 additions & 59 deletions dpcpp/base/batch_multi_vector_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,37 +210,41 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
if (x->get_common_size()[1] == 1) {
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto y_b = batch::extract_batch_item(y_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
y_b.values, res_b.values[0],
item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b =
batch::extract_batch_item(x_ub, group_id);
const auto y_b =
batch::extract_batch_item(y_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
y_b.values, res_b.values[0],
item_ct1);
});
});
} else {
// TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto y_b = batch::extract_batch_item(y_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
compute_gen_dot_product_kernel(
x_b, y_b, res_b, item_ct1,
[](auto val) { return val; });
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b =
batch::extract_batch_item(x_ub, group_id);
const auto y_b =
batch::extract_batch_item(y_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
compute_gen_dot_product_kernel(
x_b, y_b, res_b, item_ct1,
[](auto val) { return val; });
});
});
}
}
Expand Down Expand Up @@ -274,18 +278,19 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,

exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto y_b = batch::extract_batch_item(y_ub, group_id);
const auto res_b = batch::extract_batch_item(res_ub, group_id);
compute_gen_dot_product_kernel(
x_b, y_b, res_b, item_ct1,
[](auto val) { return conj(val); });
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto y_b = batch::extract_batch_item(y_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
compute_gen_dot_product_kernel(
x_b, y_b, res_b, item_ct1,
[](auto val) { return conj(val); });
});
});
}

Expand Down Expand Up @@ -317,31 +322,33 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
if (x->get_common_size()[1] == 1) {
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
res_b.values[0], item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b =
batch::extract_batch_item(x_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
res_b.values[0], item_ct1);
});
});
} else {
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
compute_norm2_kernel(x_b, res_b, item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(max_subgroup_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto x_b =
batch::extract_batch_item(x_ub, group_id);
const auto res_b =
batch::extract_batch_item(res_ub, group_id);
compute_norm2_kernel(x_b, res_b, item_ct1);
});
});
}
}
Expand Down
56 changes: 29 additions & 27 deletions dpcpp/matrix/batch_dense_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,18 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
// Launch a kernel that has nbatches blocks, each block has max group size
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
simple_apply_kernel(mat_b, b_b.values, x_b.values,
item_ct1);
});
});
}

Expand Down Expand Up @@ -147,22 +148,23 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
// Launch a kernel that has nbatches blocks, each block has max group size
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto alpha_b =
batch::extract_batch_item(alpha_ub, group_id);
const auto beta_b =
batch::extract_batch_item(beta_ub, group_id);
advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
beta_b.values[0], x_b.values, item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto alpha_b =
batch::extract_batch_item(alpha_ub, group_id);
const auto beta_b =
batch::extract_batch_item(beta_ub, group_id);
advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
beta_b.values[0], x_b.values,
item_ct1);
});
});
}

Expand Down
56 changes: 29 additions & 27 deletions dpcpp/matrix/batch_ell_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,18 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
// Launch a kernel that has nbatches blocks, each block has max group size
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
simple_apply_kernel(mat_b, b_b.values, x_b.values,
item_ct1);
});
});
}

Expand Down Expand Up @@ -145,22 +146,23 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
// Launch a kernel that has nbatches blocks, each block has max group size
exec->get_queue()->submit([&](sycl::handler& cgh) {
cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto alpha_b =
batch::extract_batch_item(alpha_ub, group_id);
const auto beta_b =
batch::extract_batch_item(beta_ub, group_id);
advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
beta_b.values[0], x_b.values, item_ct1);
});
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1)
[[sycl::reqd_sub_group_size(config::warp_size)]] {
auto group = item_ct1.get_group();
auto group_id = group.get_group_linear_id();
const auto mat_b =
batch::matrix::extract_batch_item(mat_ub, group_id);
const auto b_b = batch::extract_batch_item(b_ub, group_id);
const auto x_b = batch::extract_batch_item(x_ub, group_id);
const auto alpha_b =
batch::extract_batch_item(alpha_ub, group_id);
const auto beta_b =
batch::extract_batch_item(beta_ub, group_id);
advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
beta_b.values[0], x_b.values,
item_ct1);
});
});
}

Expand Down
8 changes: 4 additions & 4 deletions dpcpp/preconditioner/batch_identity.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ public:

static int dynamic_work_size(int, int) { return 0; }

void generate(size_type batch_id,
const gko::batch::matrix::ell::batch_item<const ValueType,
gko::int32>&,
ValueType* const, sycl::nd_item<3> item_ct1)
void generate(
size_type batch_id,
const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&,
ValueType* const, sycl::nd_item<3> item_ct1)
{}

void generate(size_type batch_id,
Expand Down
7 changes: 3 additions & 4 deletions dpcpp/solver/batch_bicgstab_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,9 @@ class KernelCaller {
slm_values(sycl::range<1>(shared_size), cgh);

cgh.parallel_for(
sycl_nd_range(grid, block), [=
](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
simd_len)]] [
[intel::kernel_args_restrict]] {
sycl_nd_range(grid, block),
[=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
simd_len)]] [[intel::kernel_args_restrict]] {
auto batch_id = item_ct1.get_group_linear_id();
const auto mat_global_entry =
gko::batch::matrix::extract_batch_item(mat, batch_id);
Expand Down
Loading

0 comments on commit b8def5b

Please sign in to comment.