Skip to content

Commit

Permalink
[SYCL] Fix use of memcpy in group_load and marray builtins (#16501)
Browse files Browse the repository at this point in the history
The implementation of group_load and marray builtins currently use
std::memcpy, which currently may fail to compile on device when the user
sets -D_FORTIFY_SOURCE=2. This commit fixes this by using
sycl::detail::memcpy_no_adl instead.

This solution should be replaced by a devicelib implementation of
`__memcpy_chk` when device-side abort/assertions work as intended.

---------

Signed-off-by: Larsen, Steffen <[email protected]>
  • Loading branch information
steffenlarsen authored Jan 8, 2025
1 parent df00dcb commit b75c7af
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 4 deletions.
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/builtins/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ auto builtin_marray_impl(FuncTy F, const Ts &...x) {
else
return F(to_vec2(x, I * 2)...);
}();
std::memcpy(&Res[I * 2], &PartialRes, sizeof(decltype(PartialRes)));
sycl::detail::memcpy_no_adl(&Res[I * 2], &PartialRes,
sizeof(decltype(PartialRes)));
}
if (N % 2)
Res[N - 1] = F(x[N - 1]...);
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/memcpy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ namespace detail {
// sycl::detail namespace, like in the following code:
// sycl::vec<int , 1> a, b;
// memcpy(&a, &b, sizeof(sycl::vec<int , 1>));
inline void memcpy_no_adl(void *Dst, const void *Src, size_t Size) {
template <typename T1, typename T2>
inline void memcpy_no_adl(T1 *Dst, const T2 *Src, size_t Size) {
#ifdef __SYCL_DEVICE_ONLY__
__builtin_memcpy(Dst, Src, Size);
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,11 @@ group_load(Group g, InputIteratorT in_ptr,

if constexpr (std::is_same_v<std::remove_const_t<value_type>, OutputT>) {
static_assert(sizeof(load) == out.size_bytes());
std::memcpy(out.begin(), &load, out.size_bytes());
sycl::detail::memcpy_no_adl(out.begin(), &load, out.size_bytes());
} else {
std::remove_const_t<value_type> values[ElementsPerWorkItem];
static_assert(sizeof(load) == sizeof(values));
std::memcpy(values, &load, sizeof(values));
sycl::detail::memcpy_no_adl(values, &load, sizeof(values));

// Note: can't `memcpy` directly into `out` because that might bypass
// an implicit conversion required by the specification.
Expand Down
49 changes: 49 additions & 0 deletions sycl/test-e2e/Regression/group_load_fortified.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// REQUIRES: aspect-usm_device_allocations
// RUN: %{build} -D_FORTIFY_SOURCE=2 -o %t.out
// RUN: %{run} %t.out

// Checks that group_load runs even when the source code is fortified. This
// failed at one point due to the use of std::memcpy in the implementation,
// which would hold an assert in device code when fortified, which would fail
// to JIT compile.

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
#include <sycl/sub_group.hpp>
#include <sycl/usm.hpp>

namespace syclexp = sycl::ext::oneapi::experimental;

int main(void) {
sycl::queue Q;

constexpr std::size_t N = 256;
constexpr std::uint32_t LWS = 64;
constexpr std::uint32_t VecSize = 4;
constexpr std::size_t NGroups = (N + VecSize * LWS - 1) / (VecSize * LWS);

int *Ptr = sycl::malloc_device<int>(N, Q);

Q.submit([&](sycl::handler &CGH) {
CGH.parallel_for(
sycl::nd_range<1>{sycl::range<1>{NGroups * LWS}, sycl::range<1>{LWS}},
[=](sycl::nd_item<1> It) {
const std::size_t GID = It.get_global_id();
const sycl::sub_group &SG = It.get_sub_group();

constexpr auto Striped = syclexp::properties{
syclexp::data_placement_striped, syclexp::full_group};

auto MPtr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(Ptr);

sycl::vec<int, VecSize> X{};
syclexp::group_load(SG, MPtr, X, Striped);
});
}).wait();

sycl::free(Ptr, Q);

return 0;
}

0 comments on commit b75c7af

Please sign in to comment.