-
Notifications
You must be signed in to change notification settings - Fork 752
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL] Fix use of memcpy in group_load and marray builtins (#16501)
The implementation of group_load and marray builtins currently use std::memcpy, which currently may fail to compile on device when the user sets -D_FORTIFY_SOURCE=2. This commit fixes this by using sycl::detail::memcpy_no_adl instead. This solution should be replaced by a devicelib implementation of `__memcpy_chk` when device-side abort/assertions work as intended. --------- Signed-off-by: Larsen, Steffen <[email protected]>
- Loading branch information
1 parent
df00dcb
commit b75c7af
Showing
4 changed files
with
55 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// REQUIRES: aspect-usm_device_allocations | ||
// RUN: %{build} -D_FORTIFY_SOURCE=2 -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// Checks that group_load runs even when the source code is fortified. This | ||
// failed at one point due to the use of std::memcpy in the implementation, | ||
// which would hold an assert in device code when fortified, which would fail | ||
// to JIT compile. | ||
|
||
#include <sycl/detail/core.hpp> | ||
#include <sycl/ext/oneapi/experimental/group_load_store.hpp> | ||
#include <sycl/sub_group.hpp> | ||
#include <sycl/usm.hpp> | ||
|
||
namespace syclexp = sycl::ext::oneapi::experimental; | ||
|
||
int main(void) { | ||
sycl::queue Q; | ||
|
||
constexpr std::size_t N = 256; | ||
constexpr std::uint32_t LWS = 64; | ||
constexpr std::uint32_t VecSize = 4; | ||
constexpr std::size_t NGroups = (N + VecSize * LWS - 1) / (VecSize * LWS); | ||
|
||
int *Ptr = sycl::malloc_device<int>(N, Q); | ||
|
||
Q.submit([&](sycl::handler &CGH) { | ||
CGH.parallel_for( | ||
sycl::nd_range<1>{sycl::range<1>{NGroups * LWS}, sycl::range<1>{LWS}}, | ||
[=](sycl::nd_item<1> It) { | ||
const std::size_t GID = It.get_global_id(); | ||
const sycl::sub_group &SG = It.get_sub_group(); | ||
|
||
constexpr auto Striped = syclexp::properties{ | ||
syclexp::data_placement_striped, syclexp::full_group}; | ||
|
||
auto MPtr = sycl::address_space_cast< | ||
sycl::access::address_space::global_space, | ||
sycl::access::decorated::yes>(Ptr); | ||
|
||
sycl::vec<int, VecSize> X{}; | ||
syclexp::group_load(SG, MPtr, X, Striped); | ||
}); | ||
}).wait(); | ||
|
||
sycl::free(Ptr, Q); | ||
|
||
return 0; | ||
} |