From cdcf1a6bd644ea78bd676c66c791108030ca74d2 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 26 Feb 2025 23:45:11 +0000 Subject: [PATCH 1/7] [SYCL][E2E] Disable tests that sporadically fail on BMG (#17201) I reproduced the assert tests locking up the whole machine and causing GPU resets, so just disable them all for now. This is probably one of the major causes of the instability. For some reason `letter_example.cpp` is causing GPU resets on BMG, which is another good candidate as for the runner instability, so just move all XFAILing InlineAsm tests to UNSUPPORTED so they don't run at all on failing platforms. --------- Signed-off-by: Sarnie, Nick --- sycl/test-e2e/Assert/lit.local.cfg | 3 +++ sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp | 4 ++-- sycl/test-e2e/InlineAsm/letter_example.cpp | 4 ++-- sycl/test-e2e/InlineAsm/malloc_shared_32.cpp | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 sycl/test-e2e/Assert/lit.local.cfg diff --git a/sycl/test-e2e/Assert/lit.local.cfg b/sycl/test-e2e/Assert/lit.local.cfg new file mode 100644 index 0000000000000..aa56a61fcb306 --- /dev/null +++ b/sycl/test-e2e/Assert/lit.local.cfg @@ -0,0 +1,3 @@ +# https://github.com/intel/llvm/issues/17203 +config.unsupported_features += ['arch-intel_gpu_bmg_g21'] + diff --git a/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp b/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp index e862af40b7767..76092d3fae653 100644 --- a/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp +++ b/sycl/test-e2e/InlineAsm/asm_multiple_instructions.cpp @@ -5,8 +5,8 @@ // RUN: %{run} %t.out // The test is failing when writing directly to output buffer. // If temporary variable is used (see TO_PASS mode) the test succeeded. -// XFAIL: gpu && run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16412 +// UNSUPPORTED: gpu && run-mode +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16412 #include "include/asmhelper.h" #include #include diff --git a/sycl/test-e2e/InlineAsm/letter_example.cpp b/sycl/test-e2e/InlineAsm/letter_example.cpp index f6d3df75871c4..f483715e30dde 100644 --- a/sycl/test-e2e/InlineAsm/letter_example.cpp +++ b/sycl/test-e2e/InlineAsm/letter_example.cpp @@ -1,6 +1,6 @@ // REQUIRES: sg-16,aspect-usm_shared_allocations -// XFAIL: arch-intel_gpu_bmg_g21 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp index 56b1cc95749b9..39995de1c2806 100644 --- a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp +++ b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp @@ -1,6 +1,6 @@ // REQUIRES: sg-32,aspect-usm_shared_allocations -// XFAIL: arch-intel_gpu_bmg_g21 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out From a3d81c70f7148bd1ebbafda529e862c35c0295bc Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 27 Feb 2025 08:16:56 +0100 Subject: [PATCH 2/7] [SYCL][NFC] Remove redundant argument in common_ctor_checks (#17186) This commit removes the redundant State argument from kernel_bundle_impl::common_ctor_checks, as it always corresponds 1:1 with the MState member variable. Signed-off-by: Larsen, Steffen --- sycl/source/detail/kernel_bundle_impl.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 8bad9bb34ee4a..e4cf15a2e69a4 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -58,7 +58,7 @@ class kernel_bundle_impl { using SpecConstMapT = std::map>; - void common_ctor_checks(bundle_state State) { + void common_ctor_checks() const { const bool AllDevicesInTheContext = checkAllDevicesAreInContext(MDevices, MContext); if (MDevices.empty() || !AllDevicesInTheContext) @@ -67,12 +67,12 @@ class kernel_bundle_impl { "Not all devices are associated with the context or " "vector of devices is empty"); - if (bundle_state::input == State && + if (bundle_state::input == MState && !checkAllDevicesHaveAspect(MDevices, aspect::online_compiler)) throw sycl::exception(make_error_code(errc::invalid), "Not all devices have aspect::online_compiler"); - if (bundle_state::object == State && + if (bundle_state::object == MState && !checkAllDevicesHaveAspect(MDevices, aspect::online_linker)) throw sycl::exception(make_error_code(errc::invalid), "Not all devices have aspect::online_linker"); @@ -82,7 +82,7 @@ class kernel_bundle_impl { kernel_bundle_impl(context Ctx, std::vector Devs, bundle_state State) : MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) { - common_ctor_checks(State); + common_ctor_checks(); MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, State); @@ -267,7 +267,7 @@ class kernel_bundle_impl { bundle_state State) : MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) { - common_ctor_checks(State); + common_ctor_checks(); MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, KernelIDs, State); @@ -278,7 +278,7 @@ class kernel_bundle_impl { const DevImgSelectorImpl &Selector, bundle_state State) : MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) { - common_ctor_checks(State); + common_ctor_checks(); MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, Selector, State); From 47404a4a569c323077023fb91f5b7f86b61317c8 Mon Sep 17 00:00:00 2001 From: Udit Kumar Agarwal Date: Wed, 26 Feb 2025 23:18:40 -0800 Subject: [PATCH 3/7] [SYCL] Use C-arrays as storage in `sycl::vec` (#17025) https://github.com/intel/llvm/pull/14130 caused `sycl::vec` to use `std::array` as its underlying storage. However, operations on `std::array` may emit debug-mode-only functions, on which the device compiler may fail. This PR replaces `std::array` with C-style arrays as storage in `sycl::vec`. --- .../detail/type_traits/vec_marray_traits.hpp | 10 +++ sycl/include/sycl/detail/vector_convert.hpp | 5 +- sycl/include/sycl/vector.hpp | 12 ++- .../vector/vector_bf16_builtins_preview.cpp | 10 +-- .../vector/vector_convert_bfloat_preview.cpp | 8 +- .../vector/vector_math_ops_preview.cpp | 84 +++++++++---------- sycl/test/regression/vec_array_windows.cpp | 1 + 7 files changed, 77 insertions(+), 53 deletions(-) diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index 6ce39bf6a072a..365bf3e9f27a5 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -8,6 +8,16 @@ #pragma once +#define __SYCL_USE_NEW_VEC_IMPL 1 + +#ifndef __SYCL_USE_NEW_VEC_IMPL +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) +#define __SYCL_USE_NEW_VEC_IMPL 1 +#else +#define __SYCL_USE_NEW_VEC_IMPL 0 +#endif +#endif + #include #include diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index 3af216e495509..6729dfd151100 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -57,6 +57,9 @@ #include // for is_sigeninteger, is_s... #include // for errc +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +#include +#endif #include // bfloat16 #include @@ -930,7 +933,7 @@ vec vec::convert() const { auto val = detail::convertImpl(bit_cast(*this)); - Result.m_Data = sycl::bit_cast(val); + sycl::detail::memcpy_no_adl(&Result.m_Data, &val, sizeof(Result)); } else #endif // __SYCL_DEVICE_ONLY__ { diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 160bbaf429f64..015f345740ea4 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -144,7 +144,16 @@ template class vec_base { static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) && \ + defined(__SYCL_USE_NEW_VEC_IMPL) + using DataType = DataT[AdjustedNum]; +#else using DataType = std::array; + // Assuming that std::array has the same size as the underlying array. + // C++ standard does not guarantee that, but it is true for most popular + // implementations. + static_assert(sizeof(DataType) == sizeof(DataT[AdjustedNum])); +#endif protected: // fields @@ -287,7 +296,8 @@ class __SYCL_EBO vec typename vector_t_ = vector_t, typename = typename std::enable_if_t>> constexpr vec(vector_t_ openclVector) { - this->m_Data = sycl::bit_castm_Data)>(openclVector); + sycl::detail::memcpy_no_adl(&this->m_Data, &openclVector, + sizeof(openclVector)); } /* @SYCL2020 diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp index 5c27c66fda0ad..7ce718a5f5868 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.71") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.149") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.188") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.420") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp index 436852b841577..24d09d4760f53 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.71") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.110") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.149") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.229") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp b/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp index 708d553220964..b6adf26170ab7 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp @@ -46,7 +46,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.115") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.126") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.167") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) @@ -104,10 +104,10 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.208") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.208", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.203", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) @@ -121,17 +121,17 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST4PLUSIVET_EENS0_3VECIS5_LI3EEEE4TYPEERKSB_SF__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] // CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] // CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86:![0-9]+]], !noalias [[META83]] // CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META83]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] +// CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.247") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -164,7 +164,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.298") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.380") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -210,9 +210,9 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.462") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.462", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.450", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) @@ -225,14 +225,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST7GREATERIVET_EENS0_3VECISLI4EEEE4TYPEERKNSA_IS5_LI4EEESG__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META127]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] // CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] // CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] +// CHECK-NEXT: [[ARRAYIDX_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: @@ -249,7 +249,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.539") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.539") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) @@ -264,7 +264,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.579") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.579") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) @@ -277,7 +277,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.618") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.618") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) @@ -289,7 +289,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.628") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) @@ -302,7 +302,7 @@ SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.668") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.707") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) @@ -315,7 +315,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.419") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) @@ -327,9 +327,9 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.748") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.748", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.730", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) @@ -341,12 +341,12 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META190]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META190]] // CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] +// CHECK-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: @@ -358,10 +358,10 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.786") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.786") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.786", align 32 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.768", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]] @@ -372,15 +372,15 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I]] // CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86]], !noalias [[META202]] // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META202]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] +// CHECK-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: diff --git a/sycl/test/regression/vec_array_windows.cpp b/sycl/test/regression/vec_array_windows.cpp index d9754bc2e4e93..f96edbf0153a6 100644 --- a/sycl/test/regression/vec_array_windows.cpp +++ b/sycl/test/regression/vec_array_windows.cpp @@ -6,6 +6,7 @@ // REQUIRES: windows // RUN: %clangxx -fsycl -D_DEBUG %s -fsycl-device-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -D_DEBUG -fsycl-device-only %s %} #include From 346b64eeaeb4a40a5838ba3ea4feba1ff398e54e Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Thu, 27 Feb 2025 15:29:46 +0800 Subject: [PATCH 4/7] [DevTSAN] Support thread sanitizer for device offloading in driver (#17155) This PR is going to support thread sanitizer for device offloading (driver part) User can enable tsan by appending following flags to build command: 1.'-fsanitize=thread' enable tsan for both host/device side 2.'-Xarch_host -fsanitize=thread' enable tsan only for host side 3.'-Xarch_device -fsanitize=thread' enable tsan only for device side --- clang/lib/Driver/SanitizerArgs.cpp | 12 ++++++ clang/lib/Driver/ToolChains/SYCL.cpp | 23 +++++++++--- .../Driver/Inputs/SYCL/lib/libsycl-tsan.bc | 0 .../test/Driver/sycl-device-lib-old-model.cpp | 37 +++++++++++++++++++ clang/test/Driver/sycl-device-lib.cpp | 36 ++++++++++++++++++ .../test/Driver/sycl-device-sanitizer-win.cpp | 10 +++++ clang/test/Driver/sycl-device-sanitizer.cpp | 14 +++++++ 7 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-tsan.bc diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index f00de41e74ffd..81f968e95aa4b 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1275,6 +1275,16 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-msan-eager-checks=1"); + } else if (Sanitizers.has(SanitizerKind::Thread)) { + CmdArgs.push_back("-fsanitize=thread"); + // The tsan function entry/exit builtins are used to record stack + // position, we don't need them in device offloading. + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-tsan-instrument-func-entry-exit=0"); + // In device offloading, user can't call memory instrinsics explicitly, so + // we can safely skip them. + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-tsan-instrument-memintrinsics=0"); } #else // _WIN32 std::string SanitizeArg; @@ -1282,6 +1292,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, SanitizeArg = "-fsanitize=address"; else if (Sanitizers.has(SanitizerKind::Memory)) SanitizeArg = "-fsanitize=memory"; + else if (Sanitizers.has(SanitizerKind::Thread)) + SanitizeArg = "-fsanitize=thread"; if (!SanitizeArg.empty()) TC.getDriver().Diag(diag::warn_drv_unsupported_option_for_target) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 025a91c75d34a..c12ad1d525928 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -618,6 +618,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, // For DG2, we just use libsycl-msan as placeholder. {"libsycl-msan", "internal"}, {"libsycl-msan-pvc", "internal"}}; + const SYCLDeviceLibsList SYCLDeviceTsanLibs = {{"libsycl-tsan", "internal"}}; #endif const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = { @@ -758,8 +759,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SyclFEEQArgVals.end()); ArgVals.insert(ArgVals.end(), ArchDeviceVals.begin(), ArchDeviceVals.end()); - // Driver will report error if address sanitizer and memory sanitizer are - // both enabled, so we only need to check first one here. + // Driver will report error if more than one of address sanitizer, memory + // sanitizer or thread sanitizer is enabled, so we only need to check first + // one here. for (const std::string &Arg : ArgVals) { if (Arg.find("-fsanitize=address") != std::string::npos) { SanitizeVal = "address"; @@ -769,6 +771,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SanitizeVal = "memory"; break; } + if (Arg.find("-fsanitize=thread") != std::string::npos) { + SanitizeVal = "thread"; + break; + } } } @@ -776,6 +782,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addSingleLibrary(SYCLDeviceAsanLibs[sanitizer_lib_idx]); else if (SanitizeVal == "memory") addSingleLibrary(SYCLDeviceMsanLibs[sanitizer_lib_idx]); + else if (SanitizeVal == "thread") + addLibraries(SYCLDeviceTsanLibs); #endif if (isNativeCPU) @@ -898,6 +906,7 @@ static llvm::SmallVector SYCLDeviceLibList{ "msan", "msan-pvc", "msan-cpu", + "tsan", #endif "imf", "imf-fp64", @@ -1739,11 +1748,12 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, if (SupportedByNativeCPU(*this, Opt)) continue; // All sanitizer options are not currently supported, except - // AddressSanitizer and MemorySanitizer + // AddressSanitizer and MemorySanitizer and ThreadSanitizer if (A->getOption().getID() == options::OPT_fsanitize_EQ && A->getValues().size() == 1) { std::string SanitizeVal = A->getValue(); - if (SanitizeVal == "address" || SanitizeVal == "memory") + if (SanitizeVal == "address" || SanitizeVal == "memory" || + SanitizeVal == "thread") continue; } D.Diag(clang::diag::warn_drv_unsupported_option_for_target) @@ -1784,7 +1794,8 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (Opt.getID() == options::OPT_fsanitize_EQ && A->getValues().size() == 1) { std::string SanitizeVal = A->getValue(); - if (SanitizeVal == "address" || SanitizeVal == "memory") { + if (SanitizeVal == "address" || SanitizeVal == "memory" || + SanitizeVal == "thread") { if (IsNewDAL) DAL->append(A); continue; @@ -2202,5 +2213,5 @@ void SYCLToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, } SanitizerMask SYCLToolChain::getSupportedSanitizers() const { - return SanitizerKind::Address | SanitizerKind::Memory; + return SanitizerKind::Address | SanitizerKind::Memory | SanitizerKind::Thread; } diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-tsan.bc b/clang/test/Driver/Inputs/SYCL/lib/libsycl-tsan.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-device-lib-old-model.cpp b/clang/test/Driver/sycl-device-lib-old-model.cpp index 21c611c3e46fe..fdabe3b6c5838 100644 --- a/clang/test/Driver/sycl-device-lib-old-model.cpp +++ b/clang/test/Driver/sycl-device-lib-old-model.cpp @@ -378,3 +378,40 @@ // RUN: -Xarch_device -fsanitize=memory -### 2>&1 | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_MSAN_CPU // SYCL_DEVICE_LIB_MSAN_CPU: llvm-link{{.*}} "-only-needed" "{{.*}}libsycl-crt.bc" // SYCL_DEVICE_LIB_MSAN_CPU-SAME: "{{.*}}libsycl-msan-cpu.bc" + +/// ########################################################################### +/// test behavior of libsycl-tsan.bc linking when -fsanitize=thread is available +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend=spir64 -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device "-fsanitize=thread -DUSE_SYCL_DEVICE_TSAN" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --no-offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device "-fsanitize=thread -DUSE_SYCL_DEVICE_TSAN" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_TSAN_MACRO +// SYCL_DEVICE_LIB_TSAN: llvm-link{{.*}} "{{.*}}libsycl-crt.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-complex.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-complex-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-cmath.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-cmath-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-imf.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-imf-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-imf-bf16.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-cassert.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-cstring.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-complex.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-complex-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-cmath.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-cmath-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-imf.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-imf-fp64.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-fallback-imf-bf16.bc" +// SYCL_DEVICE_LIB_TSAN-SAME: "{{.*}}libsycl-tsan.bc" +// SYCL_DEVICE_TSAN_MACRO: "-cc1" +// SYCL_DEVICE_TSAN_MACRO-SAME: "USE_SYCL_DEVICE_TSAN" +// SYCL_DEVICE_TSAN_MACRO: llvm-link{{.*}} "-only-needed" +// SYCL_DEVICE_TSAN_MACRO-SAME: "{{.*}}libsycl-tsan.bc" diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index a80cdfbd847d7..2fbf7411bae3d 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -365,3 +365,39 @@ // SYCL_DEVICE_LIB_MSAN_CPU: clang-linker-wrapper{{.*}} "-sycl-device-libraries // SYCL_DEVICE_LIB_MSAN_CPU-SAME: {{.*}}libsycl-msan-cpu.new.o +/// ########################################################################### +/// test behavior of libsycl-tsan.o linking when -fsanitize=thread is available +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend=spir64 -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device -fsanitize=thread -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device "-fsanitize=thread -DUSE_SYCL_DEVICE_TSAN" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_TSAN +// RUN: %clangxx -fsycl --offload-new-driver %s --sysroot=%S/Inputs/SYCL -Xarch_device "-fsanitize=thread -DUSE_SYCL_DEVICE_TSAN" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_TSAN_MACRO +// SYCL_DEVICE_LIB_TSAN: clang-linker-wrapper{{.*}} "-sycl-device-libraries +// SYCL_DEVICE_LIB_TSAN: {{.*}}libsycl-crt.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-complex. +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-complex-fp64. +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-cmath.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-cmath-fp64.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-imf.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-imf-fp64.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-imf-bf16.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-cassert.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-cstring.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-complex.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-complex-fp64.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-cmath.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-cmath-fp64.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-imf.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-imf-fp64.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-fallback-imf-bf16.new.o +// SYCL_DEVICE_LIB_TSAN-SAME: {{.*}}libsycl-tsan.new.o +// SYCL_DEVICE_TSAN_MACRO: "-cc1" +// SYCL_DEVICE_TSAN_MACRO-SAME: "USE_SYCL_DEVICE_TSAN" +// SYCL_DEVICE_TSAN_MACRO: libsycl-tsan.new.o diff --git a/clang/test/Driver/sycl-device-sanitizer-win.cpp b/clang/test/Driver/sycl-device-sanitizer-win.cpp index 563d36e381df8..ecb4ed8496632 100644 --- a/clang/test/Driver/sycl-device-sanitizer-win.cpp +++ b/clang/test/Driver/sycl-device-sanitizer-win.cpp @@ -22,3 +22,13 @@ // RUN: | FileCheck --check-prefix=SYCL-MSAN %s // SYCL-MSAN: ignoring '-fsanitize=memory' option as it is not currently supported for target 'spir64-unknown-unknown' + +/// ########################################################################### + +// We need to add "not" here since "error: unsupported option '-fsanitize=thread' for target 'x86_64-pc-windows-msvc'" +// RUN: not %clangxx -fsycl -fsanitize=thread -c %s -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL-TSAN %s +// RUN: %clangxx -fsycl -Xarch_device -fsanitize=thread -c %s -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL-TSAN %s + +// SYCL-TSAN: ignoring '-fsanitize=thread' option as it is not currently supported for target 'spir64-unknown-unknown' diff --git a/clang/test/Driver/sycl-device-sanitizer.cpp b/clang/test/Driver/sycl-device-sanitizer.cpp index b25bb3c524955..7045469cae652 100644 --- a/clang/test/Driver/sycl-device-sanitizer.cpp +++ b/clang/test/Driver/sycl-device-sanitizer.cpp @@ -52,3 +52,17 @@ // RUN: | FileCheck --check-prefix=SYCL-MSAN-XARCH-DEVICE %s // SYCL-MSAN-XARCH-DEVICE: clang{{.*}} "-fsycl-is-device" // SYCL-MSAN-XARCH-DEVICE-SAME: -fsanitize=memory + +/// ########################################################################### + +// RUN: %clangxx -fsycl -fsanitize=thread -c %s -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL-TSAN %s +// SYCL-TSAN: clang{{.*}} "-fsycl-is-device" +// SYCL-TSAN-SAME: -fsanitize=thread +// SYCL-TSAN-SAME: "-mllvm" "-tsan-instrument-func-entry-exit=0" +// SYCL-TSAN-SAME: "-mllvm" "-tsan-instrument-memintrinsics=0" + +// RUN: %clangxx -fsycl -Xarch_device -fsanitize=thread -c %s -### 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL-TSAN-XARCH-DEVICE %s +// SYCL-TSAN-XARCH-DEVICE: clang{{.*}} "-fsycl-is-device" +// SYCL-TSAN-XARCH-DEVICE-SAME: -fsanitize=thread From 95ee1e3e92a354ce9cb625b1b29320ccf3934e36 Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Thu, 27 Feb 2025 02:00:21 -0700 Subject: [PATCH 5/7] [SYCL][E2E] Use run expansions in kernel compiler tests (#17174) --- .../KernelCompiler/kernel_compiler_opencl.cpp | 12 ++++++------ sycl/test-e2e/KernelCompiler/sycl_and_cache.cpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp index 036bb9c86f286..c6a6092f6b5b3 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp @@ -17,14 +17,14 @@ // -- Test again, with caching. // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=5 SYCL_CACHE_DIR=%t/cache_dir -// RUN: rm -rf %t/cache_dir -// RUN: %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE -// RUN: %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE +// RUN: %{run-aux} rm -rf %t/cache_dir +// RUN: %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE +// RUN: %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE // -- Add leak check. -// RUN: rm -rf %t/cache_dir -// RUN: %{l0_leak_check} %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE -// RUN: %{l0_leak_check} %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE +// RUN: %{run-aux} rm -rf %t/cache_dir +// RUN: %{l0_leak_check} %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE +// RUN: %{l0_leak_check} %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE // CHECK-WRITTEN-TO-CACHE: [Persistent Cache]: enabled // CHECK-WRITTEN-TO-CACHE-NOT: [kernel_compiler Persistent Cache]: using cached binary diff --git a/sycl/test-e2e/KernelCompiler/sycl_and_cache.cpp b/sycl/test-e2e/KernelCompiler/sycl_and_cache.cpp index c75b9c0352ed9..b5927c80d41b2 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_and_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_and_cache.cpp @@ -18,9 +18,9 @@ // -- Run with caching. // DEFINE: %{cache_vars} = %{l0_leak_check} env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=5 SYCL_CACHE_DIR=%t/cache_dir -// RUN: rm -rf %t/cache_dir -// RUN: %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE -// RUN: %{cache_vars} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE +// RUN: %{run-aux} rm -rf %t/cache_dir +// RUN: %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-WRITTEN-TO-CACHE +// RUN: %{cache_vars} %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-READ-FROM-CACHE // CHECK-WRITTEN-TO-CACHE: [Persistent Cache]: enabled // CHECK-WRITTEN-TO-CACHE-NOT: [kernel_compiler Persistent Cache]: using cached binary From 42ff9c292e66522e045266f93e754c6996afa33c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9sz=C3=A1ros=20Gergely?= Date: Thu, 27 Feb 2025 10:01:10 +0100 Subject: [PATCH 6/7] [SYCL][LIBCLC] Use half builtins for generic math functions (#17163) `__builtin_()` without a suffix is equivalent to the C math function `()`, these functions take and return `double` values. Instead of using these use the `f16` suffixed builtins which take and return `half` values. On CPU targets these will be usually lowered to the single precision library calls (with promotion/truncation). On other targets (for example GPUs) the half builtins might be lowered directly to hardware instructions. Some of the builtins are not available for half precision, in these cases use the `f` suffixed builtins (which take and return `float`s). --- libclc/libspirv/lib/generic/math/acosh.cl | 2 +- libclc/libspirv/lib/generic/math/asinh.cl | 2 +- libclc/libspirv/lib/generic/math/atan.cl | 2 +- libclc/libspirv/lib/generic/math/atan2.cl | 2 +- libclc/libspirv/lib/generic/math/cbrt.cl | 2 +- libclc/libspirv/lib/generic/math/clc_exp10.cl | 2 +- libclc/libspirv/lib/generic/math/clc_fmod.cl | 2 +- libclc/libspirv/lib/generic/math/clc_hypot.cl | 2 +- libclc/libspirv/lib/generic/math/clc_remainder.cl | 2 +- libclc/libspirv/lib/generic/math/cos.cl | 2 +- libclc/libspirv/lib/generic/math/cosh.cl | 2 +- libclc/libspirv/lib/generic/math/erf.cl | 2 +- libclc/libspirv/lib/generic/math/erfc.cl | 2 +- libclc/libspirv/lib/generic/math/exp.cl | 2 +- libclc/libspirv/lib/generic/math/exp2.cl | 2 +- libclc/libspirv/lib/generic/math/expm1.cl | 2 +- libclc/libspirv/lib/generic/math/fdim.cl | 2 +- libclc/libspirv/lib/generic/math/frexp.cl | 6 +++--- libclc/libspirv/lib/generic/math/log.cl | 2 +- libclc/libspirv/lib/generic/math/log10.cl | 2 +- libclc/libspirv/lib/generic/math/log1p.cl | 2 +- libclc/libspirv/lib/generic/math/log2.cl | 2 +- libclc/libspirv/lib/generic/math/logb.cl | 2 +- libclc/libspirv/lib/generic/math/sin.cl | 2 +- 24 files changed, 26 insertions(+), 26 deletions(-) diff --git a/libclc/libspirv/lib/generic/math/acosh.cl b/libclc/libspirv/lib/generic/math/acosh.cl index 592cd4ee155bc..22c61d95a0d2b 100644 --- a/libclc/libspirv/lib/generic/math/acosh.cl +++ b/libclc/libspirv/lib/generic/math/acosh.cl @@ -118,6 +118,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_acosh, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_acosh, __builtin_acosh, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_acosh, __builtin_acoshf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/asinh.cl b/libclc/libspirv/lib/generic/math/asinh.cl index 8887c51765572..f5cea25a6eb38 100644 --- a/libclc/libspirv/lib/generic/math/asinh.cl +++ b/libclc/libspirv/lib/generic/math/asinh.cl @@ -366,6 +366,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_asinh, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_asinh, __builtin_asinh, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_asinh, __builtin_asinhf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/atan.cl b/libclc/libspirv/lib/generic/math/atan.cl index 4ed0361bacf68..dc5c895aab1e3 100644 --- a/libclc/libspirv/lib/generic/math/atan.cl +++ b/libclc/libspirv/lib/generic/math/atan.cl @@ -178,6 +178,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atan, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_atan, __builtin_atan, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_atan, __builtin_atanf16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/atan2.cl b/libclc/libspirv/lib/generic/math/atan2.cl index 702f84bf1f961..ecdde758f9ced 100644 --- a/libclc/libspirv/lib/generic/math/atan2.cl +++ b/libclc/libspirv/lib/generic/math/atan2.cl @@ -250,6 +250,6 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atan2, double, #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_atan2, __builtin_atan2, half, half) +_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_atan2, __builtin_atan2f16, half, half) #endif diff --git a/libclc/libspirv/lib/generic/math/cbrt.cl b/libclc/libspirv/lib/generic/math/cbrt.cl index 931f95b9cc553..795ffe2c6cd97 100644 --- a/libclc/libspirv/lib/generic/math/cbrt.cl +++ b/libclc/libspirv/lib/generic/math/cbrt.cl @@ -149,6 +149,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_cbrt, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cbrt, __builtin_cbrt, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cbrt, __builtin_cbrtf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/clc_exp10.cl b/libclc/libspirv/lib/generic/math/clc_exp10.cl index 0e1d6d1e019f4..bd3a248623fcc 100644 --- a/libclc/libspirv/lib/generic/math/clc_exp10.cl +++ b/libclc/libspirv/lib/generic/math/clc_exp10.cl @@ -165,6 +165,6 @@ _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_exp10, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __clc_exp10, __builtin_exp10, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __clc_exp10, __builtin_exp10f16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/clc_fmod.cl b/libclc/libspirv/lib/generic/math/clc_fmod.cl index b5083574ad49e..a365ac2d5dc6e 100644 --- a/libclc/libspirv/lib/generic/math/clc_fmod.cl +++ b/libclc/libspirv/lib/generic/math/clc_fmod.cl @@ -171,6 +171,6 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_fmod, double, #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_BINARY_BUILTIN(half, __clc_fmod, __builtin_fmod, half, half) +_CLC_DEFINE_BINARY_BUILTIN(half, __clc_fmod, __builtin_fmodf16, half, half) #endif diff --git a/libclc/libspirv/lib/generic/math/clc_hypot.cl b/libclc/libspirv/lib/generic/math/clc_hypot.cl index c045005c3ed76..aeabcc0583dbf 100644 --- a/libclc/libspirv/lib/generic/math/clc_hypot.cl +++ b/libclc/libspirv/lib/generic/math/clc_hypot.cl @@ -95,6 +95,6 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double, #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_BINARY_BUILTIN(half, __clc_hypot, __builtin_hypot, half, half) +_CLC_DEFINE_BINARY_BUILTIN(half, __clc_hypot, __builtin_hypotf, half, half) #endif diff --git a/libclc/libspirv/lib/generic/math/clc_remainder.cl b/libclc/libspirv/lib/generic/math/clc_remainder.cl index a95f641688c9b..f4fa670c47959 100644 --- a/libclc/libspirv/lib/generic/math/clc_remainder.cl +++ b/libclc/libspirv/lib/generic/math/clc_remainder.cl @@ -207,7 +207,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_remainder, double, #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_BINARY_BUILTIN(half, __clc_remainder, __builtin_remainder, half, +_CLC_DEFINE_BINARY_BUILTIN(half, __clc_remainder, __builtin_remainderf, half, half) #endif diff --git a/libclc/libspirv/lib/generic/math/cos.cl b/libclc/libspirv/lib/generic/math/cos.cl index 9c56171cb9f53..1950ddc1fc888 100644 --- a/libclc/libspirv/lib/generic/math/cos.cl +++ b/libclc/libspirv/lib/generic/math/cos.cl @@ -67,6 +67,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_cos, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cos, __builtin_cos, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cos, __builtin_cosf16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/cosh.cl b/libclc/libspirv/lib/generic/math/cosh.cl index 66b8277e6af45..c7ecb7fee667d 100644 --- a/libclc/libspirv/lib/generic/math/cosh.cl +++ b/libclc/libspirv/lib/generic/math/cosh.cl @@ -214,6 +214,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_cosh, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cosh, __builtin_cosh, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_cosh, __builtin_coshf16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/erf.cl b/libclc/libspirv/lib/generic/math/erf.cl index 629b484693877..fe80ead1b8182 100644 --- a/libclc/libspirv/lib/generic/math/erf.cl +++ b/libclc/libspirv/lib/generic/math/erf.cl @@ -545,6 +545,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_erf, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erf, __builtin_erf, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erf, __builtin_erff, half) #endif diff --git a/libclc/libspirv/lib/generic/math/erfc.cl b/libclc/libspirv/lib/generic/math/erfc.cl index f91e115f85468..b7497c77c29e0 100644 --- a/libclc/libspirv/lib/generic/math/erfc.cl +++ b/libclc/libspirv/lib/generic/math/erfc.cl @@ -554,6 +554,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_erfc, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erfc, __builtin_erfc, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erfc, __builtin_erfcf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/exp.cl b/libclc/libspirv/lib/generic/math/exp.cl index 1d88ffe164805..e01cf8b33dbd6 100644 --- a/libclc/libspirv/lib/generic/math/exp.cl +++ b/libclc/libspirv/lib/generic/math/exp.cl @@ -80,6 +80,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_exp, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_exp, __builtin_exp, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_exp, __builtin_expf16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/exp2.cl b/libclc/libspirv/lib/generic/math/exp2.cl index b92d36bee73b1..08a95a9af698b 100644 --- a/libclc/libspirv/lib/generic/math/exp2.cl +++ b/libclc/libspirv/lib/generic/math/exp2.cl @@ -75,6 +75,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_exp2, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_exp2, __builtin_exp2, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_exp2, __builtin_exp2f16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/expm1.cl b/libclc/libspirv/lib/generic/math/expm1.cl index 08324144a2e5e..793594d62e106 100644 --- a/libclc/libspirv/lib/generic/math/expm1.cl +++ b/libclc/libspirv/lib/generic/math/expm1.cl @@ -154,6 +154,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_expm1, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_expm1, __builtin_expm1, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_expm1, __builtin_expm1f, half) #endif diff --git a/libclc/libspirv/lib/generic/math/fdim.cl b/libclc/libspirv/lib/generic/math/fdim.cl index 39b92c50af67d..7ec08ad3aab97 100644 --- a/libclc/libspirv/lib/generic/math/fdim.cl +++ b/libclc/libspirv/lib/generic/math/fdim.cl @@ -19,6 +19,6 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_fdim, __builtin_fdim, half, half) +_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_fdim, __builtin_fdimf, half, half) #endif diff --git a/libclc/libspirv/lib/generic/math/frexp.cl b/libclc/libspirv/lib/generic/math/frexp.cl index 2ec46be49d7b2..6eaad26b45fa1 100644 --- a/libclc/libspirv/lib/generic/math/frexp.cl +++ b/libclc/libspirv/lib/generic/math/frexp.cl @@ -42,11 +42,11 @@ return BUILTIN(x, y); \ } -_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexp, half, global int *) +_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexpf16, half, global int *) _CLC_V_V_VP_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, __spirv_ocl_frexp, half, global, int) -_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexp, half, local int *) +_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexpf16, half, local int *) _CLC_V_V_VP_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, __spirv_ocl_frexp, half, local, int) -_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexp, half, int *) +_CLC_DEFINE_NO_VEC(half, __spirv_ocl_frexp, __builtin_frexpf16, half, int *) _CLC_V_V_VP_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, __spirv_ocl_frexp, half, , int) #endif diff --git a/libclc/libspirv/lib/generic/math/log.cl b/libclc/libspirv/lib/generic/math/log.cl index baa32785c82a9..a08681a8bbcd8 100644 --- a/libclc/libspirv/lib/generic/math/log.cl +++ b/libclc/libspirv/lib/generic/math/log.cl @@ -37,6 +37,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log, __builtin_log, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log, __builtin_logf16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/log10.cl b/libclc/libspirv/lib/generic/math/log10.cl index 3b99a7e921f39..a8dde145b7ab6 100644 --- a/libclc/libspirv/lib/generic/math/log10.cl +++ b/libclc/libspirv/lib/generic/math/log10.cl @@ -29,6 +29,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log10, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log10, __builtin_log10, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log10, __builtin_log10f16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/log1p.cl b/libclc/libspirv/lib/generic/math/log1p.cl index e26f88126305c..df161a94b473d 100644 --- a/libclc/libspirv/lib/generic/math/log1p.cl +++ b/libclc/libspirv/lib/generic/math/log1p.cl @@ -171,6 +171,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log1p, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log1p, __builtin_log1p, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log1p, __builtin_log1pf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/log2.cl b/libclc/libspirv/lib/generic/math/log2.cl index 4090d713a70d9..6e810c5a0b436 100644 --- a/libclc/libspirv/lib/generic/math/log2.cl +++ b/libclc/libspirv/lib/generic/math/log2.cl @@ -29,6 +29,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log2, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log2, __builtin_log2, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log2, __builtin_log2f16, half) #endif diff --git a/libclc/libspirv/lib/generic/math/logb.cl b/libclc/libspirv/lib/generic/math/logb.cl index 06365c4f69b47..9ca1c9b1cd549 100644 --- a/libclc/libspirv/lib/generic/math/logb.cl +++ b/libclc/libspirv/lib/generic/math/logb.cl @@ -43,6 +43,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_logb, double) #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_logb, __builtin_logb, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_logb, __builtin_logbf, half) #endif diff --git a/libclc/libspirv/lib/generic/math/sin.cl b/libclc/libspirv/lib/generic/math/sin.cl index 6a44c7bc8803e..fb2f3fd2e428b 100644 --- a/libclc/libspirv/lib/generic/math/sin.cl +++ b/libclc/libspirv/lib/generic/math/sin.cl @@ -69,6 +69,6 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sin, double); #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_sin, __builtin_sin, half) +_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_sin, __builtin_sinf16, half) #endif From 1e0c0214d8e1067b8049dfb957a7740f7f17ffe9 Mon Sep 17 00:00:00 2001 From: Kseniya Tikhomirova Date: Thu, 27 Feb 2025 10:34:32 +0100 Subject: [PATCH 7/7] [SYCL] Extend image cleanup for __sycl_unregister_lib (#17091) fixes https://github.com/intel/llvm/issues/16031 --------- Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/kernel_program_cache.hpp | 174 ++++---- .../program_manager/program_manager.cpp | 105 +++-- .../program_manager/program_manager.hpp | 8 +- sycl/test-e2e/SharedLib/use_with_dlopen.cpp | 7 +- .../use_with_dlopen_verify_cache.cpp | 7 +- sycl/unittests/helpers/MockDeviceImage.hpp | 39 +- sycl/unittests/program_manager/CMakeLists.txt | 1 + sycl/unittests/program_manager/Cleanup.cpp | 401 ++++++++++++++++++ .../arg_mask/EliminatedArgMask.cpp | 6 +- 9 files changed, 610 insertions(+), 138 deletions(-) create mode 100644 sycl/unittests/program_manager/Cleanup.cpp diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 668fff9ba1feb..d1832da1c59f6 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -294,6 +294,11 @@ class KernelProgramCache { MProgramEvictionList.pop_front(); } } + + void erase(const ProgramCacheKeyT &CacheKey) { + MProgramEvictionList.remove(CacheKey); + MProgramToEvictionListMap.erase(CacheKey); + } }; ~KernelProgramCache() = default; @@ -427,11 +432,10 @@ class KernelProgramCache { template void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) { - + ur_program_handle_t Program = std::get<3>(CacheVal); if (SYCLConfig:: isProgramCacheEvictionEnabled()) { - ur_program_handle_t Program = std::get<3>(CacheVal); // Save kernel in fast cache only if the corresponding program is also // in the cache. auto LockedCache = acquireCachedPrograms(); @@ -439,19 +443,89 @@ class KernelProgramCache { if (ProgCache.ProgramSizeMap.find(Program) == ProgCache.ProgramSizeMap.end()) return; - - // Save reference between the program and the fast cache key. - std::unique_lock Lock(MKernelFastCacheMutex); - MProgramToKernelFastCacheKeyMap[Program].emplace_back(CacheKey); } - + // Save reference between the program and the fast cache key. std::unique_lock Lock(MKernelFastCacheMutex); + MProgramToKernelFastCacheKeyMap[Program].emplace_back(CacheKey); + // if no insertion took place, thus some other thread has already inserted // smth in the cache traceKernel("Kernel inserted.", CacheKey.second, true); MKernelFastCache.emplace(CacheKey, CacheVal); } + // Expects locked program cache + size_t removeProgramByKey(const ProgramCacheKeyT &CacheKey, + ProgramCache &ProgCache) { + auto It = ProgCache.Cache.find(CacheKey); + + if (It != ProgCache.Cache.end()) { + // We are about to remove this program now. + // (1) Remove it from KernelPerProgram cache. + // (2) Remove corresponding entries from KernelFastCache. + // (3) Remove it from ProgramCache KeyMap. + // (4) Remove it from the ProgramCache. + // (5) Remove it from ProgramSizeMap. + // (6) Update the cache size. + + // Remove entry from the KernelsPerProgram cache. + ur_program_handle_t NativePrg = It->second->Val; + { + auto LockedCacheKP = acquireKernelsPerProgramCache(); + // List kernels that are to be removed from the cache, if tracing is + // enabled. + if (SYCLConfig::isTraceInMemCache()) { + for (const auto &Kernel : LockedCacheKP.get()[NativePrg]) + traceKernel("Kernel evicted.", Kernel.first); + } + LockedCacheKP.get().erase(NativePrg); + } + + { + // Remove corresponding entries from KernelFastCache. + std::unique_lock Lock(MKernelFastCacheMutex); + if (auto FastCacheKeyItr = + MProgramToKernelFastCacheKeyMap.find(NativePrg); + FastCacheKeyItr != MProgramToKernelFastCacheKeyMap.end()) { + for (const auto &FastCacheKey : FastCacheKeyItr->second) { + MKernelFastCache.erase(FastCacheKey); + traceKernel("Kernel evicted.", FastCacheKey.second, true); + } + MProgramToKernelFastCacheKeyMap.erase(FastCacheKeyItr); + } + } + + // Remove entry from ProgramCache KeyMap. + CommonProgramKeyT CommonKey = + std::make_pair(CacheKey.first.second, CacheKey.second); + // Since KeyMap is a multi-map, we need to iterate over all entries + // with this CommonKey and remove those that match the CacheKey. + auto KeyMapItrRange = ProgCache.KeyMap.equal_range(CommonKey); + for (auto KeyMapItr = KeyMapItrRange.first; + KeyMapItr != KeyMapItrRange.second; ++KeyMapItr) { + if (KeyMapItr->second == CacheKey) { + ProgCache.KeyMap.erase(KeyMapItr); + break; + } + } + + // Get size of the program. + size_t ProgramSize = MCachedPrograms.ProgramSizeMap[It->second->Val]; + // Evict program from the cache. + ProgCache.Cache.erase(It); + // Remove program size from the cache size. + MCachedPrograms.ProgramCacheSizeInBytes -= ProgramSize; + MCachedPrograms.ProgramSizeMap.erase(NativePrg); + + traceProgram("Program evicted.", CacheKey); + } else + // This should never happen. + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Program not found in the cache."); + + return MCachedPrograms.ProgramCacheSizeInBytes; + } + // Evict programs from cache to free up space. void evictPrograms(size_t DesiredCacheSize, size_t CurrentCacheSize) { @@ -471,73 +545,7 @@ class KernelProgramCache { ProgramCacheKeyT CacheKey = ProgramEvictionList.front(); auto LockedCache = acquireCachedPrograms(); auto &ProgCache = LockedCache.get(); - auto It = ProgCache.Cache.find(CacheKey); - - if (It != ProgCache.Cache.end()) { - // We are about to remove this program now. - // (1) Remove it from KernelPerProgram cache. - // (2) Remove corresponding entries from KernelFastCache. - // (3) Remove it from ProgramCache KeyMap. - // (4) Remove it from the ProgramCache. - // (5) Remove it from ProgramSizeMap. - // (6) Update the cache size. - - // Remove entry from the KernelsPerProgram cache. - ur_program_handle_t NativePrg = It->second->Val; - { - auto LockedCacheKP = acquireKernelsPerProgramCache(); - // List kernels that are to be removed from the cache, if tracing is - // enabled. - if (SYCLConfig::isTraceInMemCache()) { - for (const auto &Kernel : LockedCacheKP.get()[NativePrg]) - traceKernel("Kernel evicted.", Kernel.first); - } - LockedCacheKP.get().erase(NativePrg); - } - - { - // Remove corresponding entries from KernelFastCache. - std::unique_lock Lock(MKernelFastCacheMutex); - if (auto FastCacheKeyItr = - MProgramToKernelFastCacheKeyMap.find(NativePrg); - FastCacheKeyItr != MProgramToKernelFastCacheKeyMap.end()) { - for (const auto &FastCacheKey : FastCacheKeyItr->second) { - MKernelFastCache.erase(FastCacheKey); - traceKernel("Kernel evicted.", FastCacheKey.second, true); - } - MProgramToKernelFastCacheKeyMap.erase(FastCacheKeyItr); - } - } - - // Remove entry from ProgramCache KeyMap. - CommonProgramKeyT CommonKey = - std::make_pair(CacheKey.first.second, CacheKey.second); - // Since KeyMap is a multi-map, we need to iterate over all entries - // with this CommonKey and remove those that match the CacheKey. - auto KeyMapItrRange = LockedCache.get().KeyMap.equal_range(CommonKey); - for (auto KeyMapItr = KeyMapItrRange.first; - KeyMapItr != KeyMapItrRange.second; ++KeyMapItr) { - if (KeyMapItr->second == CacheKey) { - LockedCache.get().KeyMap.erase(KeyMapItr); - break; - } - } - - // Get size of the program. - size_t ProgramSize = MCachedPrograms.ProgramSizeMap[It->second->Val]; - // Evict program from the cache. - ProgCache.Cache.erase(It); - // Remove program size from the cache size. - MCachedPrograms.ProgramCacheSizeInBytes -= ProgramSize; - MCachedPrograms.ProgramSizeMap.erase(NativePrg); - - traceProgram("Program evicted.", CacheKey); - } else - // This should never happen. - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Program not found in the cache."); - - CurrCacheSize = MCachedPrograms.ProgramCacheSizeInBytes; + CurrCacheSize = removeProgramByKey(CacheKey, ProgCache); // Remove the program from the eviction list. MEvictionList.popFront(); } @@ -724,6 +732,24 @@ class KernelProgramCache { } } + void removeAllRelatedEntries(uint32_t ImageId) { + auto LockedCache = acquireCachedPrograms(); + auto &ProgCache = LockedCache.get(); + + auto It = std::find_if( + ProgCache.KeyMap.begin(), ProgCache.KeyMap.end(), + [&ImageId](const auto &Entry) { return ImageId == Entry.first.first; }); + if (It == ProgCache.KeyMap.end()) + return; + + auto Key = It->second; + removeProgramByKey(Key, ProgCache); + { + auto LockedEvictionList = acquireEvictionList(); + LockedEvictionList.get().erase(Key); + } + } + private: std::mutex MProgramCacheMutex; std::mutex MKernelsPerProgramCacheMutex; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 21ee135074ef0..f2e98c8b68219 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -246,7 +246,7 @@ ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, { std::lock_guard Lock(MNativeProgramsMutex); // associate the UR program with the image it was created for - NativePrograms.insert({Res, &Img}); + NativePrograms.insert({Res, {Ctx, &Img}}); } Ctx->addDeviceGlobalInitializer(Res, Devices, &Img); @@ -928,7 +928,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // removal of map entries with same handle (obviously invalid entries). std::ignore = NativePrograms.erase(BuiltProgram.get()); for (const RTDeviceBinaryImage *Img : ImgWithDeps) { - NativePrograms.insert({BuiltProgram.get(), Img}); + NativePrograms.insert({BuiltProgram.get(), {ContextImpl, Img}}); } } @@ -1994,25 +1994,23 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { } } } - m_DeviceImages.insert(std::move(Img)); + m_DeviceImages.insert({RawImg, std::move(Img)}); } } void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); + auto DevImgIt = m_DeviceImages.find(RawImg); + if (DevImgIt == m_DeviceImages.end()) + continue; const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; // Treat the image as empty one if (EntriesB == EntriesE) continue; - // Retrieve RTDeviceBinaryImage by looking up the first offload entry - kernel_id FirstKernelID = getSYCLKernelID(RawImg->EntriesBegin->name); - auto RTDBImages = getRawDeviceImages({FirstKernelID}); - assert(RTDBImages.size() == 1); - - RTDeviceBinaryImage *Img = *RTDBImages.begin(); + RTDeviceBinaryImage *Img = DevImgIt->second.get(); // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); @@ -2036,10 +2034,15 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { continue; } - auto It = m_KernelName2KernelIDs.find(EntriesIt->name); - assert(It != m_KernelName2KernelIDs.end()); - m_KernelName2KernelIDs.erase(It); - m_KernelIDs2BinImage.erase(It->second); + // remove everything associated with this KernelName + m_KernelUsesAssert.erase(EntriesIt->name); + m_KernelImplicitLocalArgPos.erase(EntriesIt->name); + + if (auto It = m_KernelName2KernelIDs.find(EntriesIt->name); + It != m_KernelName2KernelIDs.end()) { + m_KernelName2KernelIDs.erase(It); + m_KernelIDs2BinImage.erase(It->second); + } } // Drop reverse mapping @@ -2051,13 +2054,50 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { m_ExportedSymbolImages.erase(ESProp->Name); } - // TODO: Handle other runtime info that was set up by `addImages` - assert(Img->getVirtualFunctions().empty()); - assert(Img->getAssertUsed().empty()); - assert(!Img->getProperty("sanUsed")); - assert(Img->getImplicitLocalArg().empty()); - assert(Img->getDeviceGlobals().empty()); - assert(Img->getHostPipes().empty()); + for (const sycl_device_binary_property &VFProp : + Img->getVirtualFunctions()) { + std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + for (const auto &SetName : detail::split_string(StrValue, ',')) + m_VFSet2BinImage.erase(SetName); + } + + { + std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + auto DeviceGlobals = Img->getDeviceGlobals(); + for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { + if (auto DevGlobalIt = m_DeviceGlobals.find(DeviceGlobal->Name); + DevGlobalIt != m_DeviceGlobals.end()) { + auto findDevGlobalByValue = std::find_if( + m_Ptr2DeviceGlobal.begin(), m_Ptr2DeviceGlobal.end(), + [&DevGlobalIt](const std::pair &Entry) { + return Entry.second == DevGlobalIt->second.get(); + }); + if (findDevGlobalByValue != m_Ptr2DeviceGlobal.end()) + m_Ptr2DeviceGlobal.erase(findDevGlobalByValue); + m_DeviceGlobals.erase(DevGlobalIt); + } + } + } + + { + std::lock_guard HostPipesGuard(m_HostPipesMutex); + auto HostPipes = Img->getHostPipes(); + for (const sycl_device_binary_property &HostPipe : HostPipes) { + if (auto HostPipesIt = m_HostPipes.find(HostPipe->Name); + HostPipesIt != m_HostPipes.end()) { + auto findHostPipesByValue = std::find_if( + m_Ptr2HostPipe.begin(), m_Ptr2HostPipe.end(), + [&HostPipesIt]( + const std::pair &Entry) { + return Entry.second == HostPipesIt->second.get(); + }); + if (findHostPipesByValue != m_Ptr2HostPipe.end()) + m_Ptr2HostPipe.erase(findHostPipesByValue); + m_HostPipes.erase(HostPipesIt); + } + } + } // Purge references to the image in native programs map { @@ -2067,17 +2107,17 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // entry without calling UR release for (auto It = NativePrograms.begin(); It != NativePrograms.end();) { auto CurIt = It++; - if (CurIt->second == Img) + if (CurIt->second.second == Img) { + if (auto ContextImpl = CurIt->second.first.lock()) { + ContextImpl->getKernelProgramCache().removeAllRelatedEntries( + Img->getImageID()); + } NativePrograms.erase(CurIt); + } } } - // Finally, destroy the image by erasing the associated unique ptr - auto It = - std::find_if(m_DeviceImages.begin(), m_DeviceImages.end(), - [Img](const auto &UPtr) { return UPtr.get() == Img; }); - assert(It != m_DeviceImages.end()); - m_DeviceImages.erase(It); + m_DeviceImages.erase(DevImgIt); } } @@ -2135,7 +2175,7 @@ ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, std::lock_guard Lock(MNativeProgramsMutex); auto Range = NativePrograms.equal_range(NativePrg); for (auto ImgIt = Range.first; ImgIt != Range.second; ++ImgIt) { - auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second); + auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second.second); if (MapIt == m_EliminatedKernelArgMasks.end()) continue; auto ArgMaskMapIt = MapIt->second.find(KernelName); @@ -2824,7 +2864,8 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, std::ignore = NativePrograms.erase(LinkedProg); for (const device_image_plain &Img : ImgWithDeps) { NativePrograms.insert( - {LinkedProg, getSyclObjImpl(Img)->get_bin_image_ref()}); + {LinkedProg, + {ContextImpl, getSyclObjImpl(Img)->get_bin_image_ref()}}); } } @@ -3594,6 +3635,8 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) { // Executed as a part of current module's (.exe, .dll) static initialization extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) { - (void)desc; - // TODO implement the function + // Partial cleanup is not necessary at shutdown + if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) + return; + sycl::detail::ProgramManager::getInstance().removeImages(desc); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index ecb3359d93dcc..96f2ef63003dc 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -376,6 +376,7 @@ class ProgramManager { collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device &Dev); +protected: /// The three maps below are used during kernel resolution. Any kernel is /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; @@ -425,7 +426,8 @@ class ProgramManager { /// Keeps all device images we are refering to during program lifetime. Used /// for proper cleanup. - std::unordered_set m_DeviceImages; + std::unordered_map + m_DeviceImages; /// Maps names of built-in kernels to their unique kernel IDs. /// Access must be guarded by the m_BuiltInKernelIDsMutex mutex. @@ -452,7 +454,9 @@ class ProgramManager { // the underlying program disposed of), so the map can't be used in any way // other than binary image lookup with known live UrProgram as the key. // NOTE: access is synchronized via the MNativeProgramsMutex - std::unordered_multimap + std::unordered_multimap< + ur_program_handle_t, + std::pair, const RTDeviceBinaryImage *>> NativePrograms; /// Protects NativePrograms that can be changed by class' methods. diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp index d042e8d711ebb..4d4910efa78e3 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp @@ -13,11 +13,8 @@ // RUN: %{compile} -o %t3.out -DRUN_MIDDLE_AFTER // RUN: %{run} %t3.out -// This causes SEG. FAULT. -// Enable the lines below when the issue is fixed: -// https://github.com/intel/llvm/issues/16031 -// %{compile} -o %t4.out -DRUN_LAST -// %{run} %t4.out +// RUN: %{compile} -o %t4.out -DRUN_LAST +// RUN: %{run} %t4.out #include diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp index ef90a0a3c9651..bab8130bbab68 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp @@ -13,11 +13,8 @@ // RUN: %{compile} -o %t3.out -DRUN_MIDDLE_AFTER // RUN: env SYCL_UR_TRACE=2 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes=CHECK-MIDDLE-AFTER,CHECK --implicit-check-not=piProgramBuild -// clang-format off -// This causes SEG. FAULT. -// Enable the lines below when the issue is fixed - https://github.com/intel/llvm/issues/16031 -// %{compile} -DRUN_LAST -// env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild +// RUN: %{compile} -o %t4.out -DRUN_LAST +// RUN: env SYCL_UR_TRACE=2 %{run} %t4.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild // clang-format on #include diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index fea80d6b08c3e..9c8fdef642893 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -247,7 +247,24 @@ class MockDeviceImage { MDeviceTargetSpec(DeviceTargetSpec), MCompileOptions(CompileOptions), MLinkOptions(LinkOptions), MManifest(std::move(Manifest)), MBinary(std::move(Binary)), MOffloadEntries(std::move(OffloadEntries)), - MPropertySet(std::move(PropertySet)) {} + MPropertySet(std::move(PropertySet)) { + MNativeHandle = { + MVersion, + MKind, + MFormat, + MDeviceTargetSpec.c_str(), + MCompileOptions.c_str(), + MLinkOptions.c_str(), + MManifest.empty() ? nullptr : &*MManifest.cbegin(), + MManifest.empty() ? nullptr : &*MManifest.crbegin() + 1, + &*MBinary.begin(), + (&*MBinary.begin()) + MBinary.size(), + MOffloadEntries.begin(), + MOffloadEntries.end(), + MPropertySet.begin(), + MPropertySet.end(), + }; + } public: /// Constructs an arbitrary device image. @@ -299,24 +316,7 @@ class MockDeviceImage { MockDeviceImage(std::vector &&OffloadEntries) : MockDeviceImage(std::move(OffloadEntries), {}) {} - sycl_device_binary_struct convertToNativeType() { - return sycl_device_binary_struct{ - MVersion, - MKind, - MFormat, - MDeviceTargetSpec.c_str(), - MCompileOptions.c_str(), - MLinkOptions.c_str(), - MManifest.empty() ? nullptr : &*MManifest.cbegin(), - MManifest.empty() ? nullptr : &*MManifest.crbegin() + 1, - &*MBinary.begin(), - (&*MBinary.begin()) + MBinary.size(), - MOffloadEntries.begin(), - MOffloadEntries.end(), - MPropertySet.begin(), - MPropertySet.end(), - }; - } + sycl_device_binary_struct convertToNativeType() { return MNativeHandle; } const unsigned char *getBinaryPtr() { return &*MBinary.begin(); } private: @@ -330,6 +330,7 @@ class MockDeviceImage { std::vector MBinary; internal::LifetimeExtender MOffloadEntries; MockPropertySet MPropertySet; + sycl_device_binary_struct MNativeHandle; }; /// Convenience wrapper around sycl_device_binaries_struct, that manages mock diff --git a/sycl/unittests/program_manager/CMakeLists.txt b/sycl/unittests/program_manager/CMakeLists.txt index d919de3dd6748..2b9577b67d9a3 100644 --- a/sycl/unittests/program_manager/CMakeLists.txt +++ b/sycl/unittests/program_manager/CMakeLists.txt @@ -5,6 +5,7 @@ add_sycl_unittest(ProgramManagerTests OBJECT itt_annotations.cpp SubDevices.cpp passing_link_and_compile_options.cpp + Cleanup.cpp ) add_subdirectory(arg_mask) diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp new file mode 100644 index 0000000000000..6e8a9bd596eb8 --- /dev/null +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -0,0 +1,401 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include + +class ProgramManagerExposed : public sycl::detail::ProgramManager { +public: + std::unordered_multimap & + getKernelID2BinImage() { + return m_KernelIDs2BinImage; + } + + std::unordered_map &getKernelName2KernelID() { + return m_KernelName2KernelIDs; + } + + std::unordered_map>> & + getBinImage2KernelId() { + return m_BinImg2KernelIDs; + } + + std::unordered_multimap & + getServiceKernels() { + return m_ServiceKernels; + } + + std::unordered_multimap & + getExportedSymbolImages() { + return m_ExportedSymbolImages; + } + + std::unordered_map> & + getDeviceImages() { + return m_DeviceImages; + } + + std::unordered_map> & + getVFSet2BinImage() { + return m_VFSet2BinImage; + } + + std::unordered_multimap< + ur_program_handle_t, + std::pair, + const sycl::detail::RTDeviceBinaryImage *>> & + getNativePrograms() { + return NativePrograms; + } + + std::unordered_map< + const sycl::detail::RTDeviceBinaryImage *, + std::unordered_map> & + getEliminatedKernelArgMask() { + return m_EliminatedKernelArgMasks; + } + + std::set &getKernelUsesAssert() { return m_KernelUsesAssert; } + + std::unordered_map &getKernelImplicitLocalArgPos() { + return m_KernelImplicitLocalArgPos; + } + + std::unordered_map> & + getHostPipes() { + return m_HostPipes; + } + + std::unordered_map & + getPtrToHostPipe() { + return m_Ptr2HostPipe; + } + + std::unordered_map> & + getDeviceGlobals() { + return m_DeviceGlobals; + } + + std::unordered_map & + getPtrToDeviceGlobal() { + return m_Ptr2DeviceGlobal; + } +}; + +namespace { +std::vector +createPropertySet(const std::vector &Symbols) { + std::vector Props; + for (const std::string &Symbol : Symbols) { + std::vector Storage(sizeof(uint32_t)); + uint32_t Val = 1; + auto *DataPtr = reinterpret_cast(&Val); + std::uninitialized_copy(DataPtr, DataPtr + sizeof(uint32_t), + Storage.data()); + + sycl::unittest::MockProperty Prop(Symbol, Storage, + SYCL_PROPERTY_TYPE_UINT32); + + Props.push_back(Prop); + } + return Props; +} + +std::vector +createVFPropertySet(const std::string &VFSets) { + std::vector Props; + uint64_t PropSize = VFSets.size(); + std::vector Storage(/* bytes for size */ 8 + PropSize + + /* null terminator */ 1); + auto *SizePtr = reinterpret_cast(&PropSize); + std::uninitialized_copy(SizePtr, SizePtr + sizeof(uint64_t), Storage.data()); + std::uninitialized_copy(VFSets.data(), VFSets.data() + PropSize, + Storage.data() + /* bytes for size */ 8); + Storage.back() = '\0'; + const std::string PropName = "uses-virtual-functions-set"; + sycl::unittest::MockProperty Prop(PropName, Storage, + SYCL_PROPERTY_TYPE_BYTE_ARRAY); + + Props.push_back(Prop); + return Props; +} + +std::string generateRefName(const std::string &ImageId, + const std::string &FeatureName) { + return FeatureName + "_" + ImageId; +} + +sycl::ext::oneapi::experimental::device_global DeviceGlobalA; +sycl::ext::oneapi::experimental::device_global DeviceGlobalB; +sycl::ext::oneapi::experimental::device_global DeviceGlobalC; + +class PipeIDA; +class PipeIDB; +class PipeIDC; +using PipeA = sycl::ext::intel::experimental::pipe; +using PipeB = sycl::ext::intel::experimental::pipe; +using PipeC = sycl::ext::intel::experimental::pipe; + +sycl::unittest::MockDeviceImage generateImage(const std::string &ImageId) { + sycl::unittest::MockPropertySet PropSet; + + std::initializer_list KernelNames{ + generateRefName(ImageId, "Kernel"), + generateRefName(ImageId, "__sycl_service_kernel__")}; + const std::vector ExportedSymbols{ + generateRefName(ImageId, "Exported")}; + const std::vector ImportedSymbols{ + generateRefName(ImageId, "Imported")}; + const std::vector ImplicitLocalArg{KernelNames.begin()[0]}; + const std::string &VirtualFunctions{generateRefName(ImageId, "VF")}; + std::vector KernelEAM{0b0000001}; + sycl::unittest::MockProperty EAMKernelPOI = + sycl::unittest::makeKernelParamOptInfo(KernelNames.begin()[0], 1, + KernelEAM); + std::vector ImgKPOI{std::move(EAMKernelPOI)}; + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS, + createPropertySet(ExportedSymbols)); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS, + createPropertySet(ImportedSymbols)); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS, + createVFPropertySet(VirtualFunctions)); + setKernelUsesAssert(std::vector{KernelNames.begin()[0]}, + PropSet); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG, + createPropertySet(ImplicitLocalArg)); + PropSet.insert(__SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO, std::move(ImgKPOI)); + + PropSet.insert( + __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS, + std::vector{ + sycl::unittest::makeDeviceGlobalInfo( + generateRefName(ImageId, "DeviceGlobal"), sizeof(int), 0)}); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_HOST_PIPES, + std::vector{ + sycl::unittest::makeHostPipeInfo( + generateRefName(ImageId, "HostPipe"), sizeof(int))}); + std::vector Bin{0}; + + std::vector Entries = + sycl::unittest::makeEmptyKernels(KernelNames); + + sycl::unittest::MockDeviceImage Img{SYCL_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN, + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +sycl::unittest::MockDeviceImage +generateImageKernelOnly(const std::string &ImageId) { + sycl::unittest::MockPropertySet PropSet; + + std::initializer_list KernelNames{ + generateRefName(ImageId, "Kernel")}; + std::vector Bin{0}; + + std::vector Entries = + sycl::unittest::makeEmptyKernels(KernelNames); + + sycl::unittest::MockDeviceImage Img{SYCL_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN, + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +static std::array ImagesToKeep = { + generateImage("A"), generateImage("B")}; +static std::array ImagesToRemove = { + generateImage("C")}; + +static std::array ImagesToKeepKernelOnly = { + generateImageKernelOnly("A"), generateImageKernelOnly("B")}; +static std::array ImagesToRemoveKernelOnly = + {generateImageKernelOnly("C")}; + +template +void convertAndAddImages( + ProgramManagerExposed &PM, + std::array Images, + sycl_device_binary_struct *NativeImages, + sycl_device_binaries_struct &AllBinaries) { + constexpr auto ImageSize = Images.size(); + for (size_t Idx = 0; Idx < ImageSize; ++Idx) + NativeImages[Idx] = Images[Idx].convertToNativeType(); + + AllBinaries = sycl_device_binaries_struct{ + SYCL_DEVICE_BINARIES_VERSION, ImageSize, NativeImages, nullptr, nullptr, + }; + + PM.addImages(&AllBinaries); +} + +void checkAllInvolvedContainers(ProgramManagerExposed &PM, size_t ExpectedCount, + const std::string &Comment) { + EXPECT_EQ(PM.getKernelID2BinImage().size(), ExpectedCount) << Comment; + { + EXPECT_EQ(PM.getKernelName2KernelID().size(), ExpectedCount) << Comment; + EXPECT_TRUE( + PM.getKernelName2KernelID().count(generateRefName("A", "Kernel")) > 0) + << Comment; + EXPECT_TRUE( + PM.getKernelName2KernelID().count(generateRefName("B", "Kernel")) > 0) + << Comment; + } + EXPECT_EQ(PM.getBinImage2KernelId().size(), ExpectedCount) << Comment; + { + EXPECT_EQ(PM.getServiceKernels().size(), ExpectedCount) << Comment; + EXPECT_TRUE(PM.getServiceKernels().count( + generateRefName("A", "__sycl_service_kernel__")) > 0) + << Comment; + EXPECT_TRUE(PM.getServiceKernels().count( + generateRefName("B", "__sycl_service_kernel__")) > 0) + << Comment; + } + { + EXPECT_EQ(PM.getExportedSymbolImages().size(), ExpectedCount) << Comment; + EXPECT_TRUE(PM.getExportedSymbolImages().count( + generateRefName("A", "Exported")) > 0) + << Comment; + EXPECT_TRUE(PM.getExportedSymbolImages().count( + generateRefName("B", "Exported")) > 0) + << Comment; + } + EXPECT_EQ(PM.getDeviceImages().size(), ExpectedCount) << Comment; + { + EXPECT_EQ(PM.getVFSet2BinImage().size(), ExpectedCount) << Comment; + EXPECT_TRUE(PM.getVFSet2BinImage().count(generateRefName("A", "VF")) > 0) + << Comment; + EXPECT_TRUE(PM.getVFSet2BinImage().count(generateRefName("B", "VF")) > 0) + << Comment; + } + + EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedCount) << Comment; + { + EXPECT_EQ(PM.getKernelUsesAssert().size(), ExpectedCount) << Comment; + EXPECT_TRUE(PM.getKernelUsesAssert().count(generateRefName("A", "Kernel")) > + 0) + << Comment; + EXPECT_TRUE(PM.getKernelUsesAssert().count(generateRefName("B", "Kernel")) > + 0) + << Comment; + } + EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedCount) << Comment; + + { + EXPECT_EQ(PM.getDeviceGlobals().size(), ExpectedCount) << Comment; + EXPECT_TRUE( + PM.getDeviceGlobals().count(generateRefName("A", "DeviceGlobal")) > 0) + << Comment; + EXPECT_TRUE( + PM.getDeviceGlobals().count(generateRefName("B", "DeviceGlobal")) > 0) + << Comment; + } + EXPECT_EQ(PM.getPtrToDeviceGlobal().size(), ExpectedCount) << Comment; + + { + EXPECT_EQ(PM.getHostPipes().size(), ExpectedCount) << Comment; + EXPECT_TRUE(PM.getHostPipes().count(generateRefName("A", "HostPipe")) > 0) + << Comment; + EXPECT_TRUE(PM.getHostPipes().count(generateRefName("B", "HostPipe")) > 0) + << Comment; + } + EXPECT_EQ(PM.getPtrToHostPipe().size(), ExpectedCount) << Comment; +} + +TEST(ImageRemoval, BaseContainers) { + ProgramManagerExposed PM; + + sycl_device_binary_struct NativeImages[ImagesToKeep.size()]; + sycl_device_binaries_struct AllBinaries; + convertAndAddImages(PM, ImagesToKeep, NativeImages, AllBinaries); + + sycl_device_binary_struct NativeImagesForRemoval[ImagesToRemove.size()]; + sycl_device_binaries_struct TestBinaries; + convertAndAddImages(PM, ImagesToRemove, NativeImagesForRemoval, TestBinaries); + + PM.addOrInitDeviceGlobalEntry(&DeviceGlobalA, + generateRefName("A", "DeviceGlobal").c_str()); + PM.addOrInitDeviceGlobalEntry(&DeviceGlobalB, + generateRefName("B", "DeviceGlobal").c_str()); + PM.addOrInitDeviceGlobalEntry(&DeviceGlobalC, + generateRefName("C", "DeviceGlobal").c_str()); + PM.addOrInitHostPipeEntry(PipeA::get_host_ptr(), + generateRefName("A", "HostPipe").c_str()); + PM.addOrInitHostPipeEntry(PipeB::get_host_ptr(), + generateRefName("B", "HostPipe").c_str()); + PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), + generateRefName("C", "HostPipe").c_str()); + + checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), + "Check failed before removal"); + + PM.removeImages(&TestBinaries); + + checkAllInvolvedContainers(PM, ImagesToKeep.size(), + "Check failed after removal"); +} + +TEST(ImageRemoval, NativePrograms) { + ProgramManagerExposed PM; + + sycl_device_binary_struct NativeImages[ImagesToKeepKernelOnly.size()]; + sycl_device_binaries_struct AllBinaries; + convertAndAddImages(PM, ImagesToKeepKernelOnly, NativeImages, AllBinaries); + + sycl_device_binary_struct + NativeImagesForRemoval[ImagesToRemoveKernelOnly.size()]; + sycl_device_binaries_struct TestBinaries; + convertAndAddImages(PM, ImagesToRemoveKernelOnly, NativeImagesForRemoval, + TestBinaries); + + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + const sycl::device Dev = Plt.get_devices()[0]; + sycl::queue Queue{Dev}; + auto Ctx = Queue.get_context(); + auto ProgramA = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + sycl::detail::getSyclObjImpl(Dev), + generateRefName("A", "Kernel")); + auto ProgramB = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + sycl::detail::getSyclObjImpl(Dev), + generateRefName("B", "Kernel")); + std::ignore = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + sycl::detail::getSyclObjImpl(Dev), + generateRefName("C", "Kernel")); + + EXPECT_EQ(PM.getNativePrograms().size(), + ImagesToRemoveKernelOnly.size() + ImagesToKeepKernelOnly.size()); + + PM.removeImages(&TestBinaries); + + EXPECT_EQ(PM.getNativePrograms().size(), ImagesToKeepKernelOnly.size()); + EXPECT_TRUE(PM.getNativePrograms().count(ProgramA) > 0); + EXPECT_TRUE(PM.getNativePrograms().count(ProgramB) > 0); +} +} // anonymous namespace diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 74e5dc5fecf42..80c7ddf07c1b2 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -250,8 +250,10 @@ inline ur_result_t customProgramRetain(void *pParams) { class ProgramManagerTest { public: - static std::unordered_multimap & + static std::unordered_multimap< + ur_program_handle_t, + std::pair, + const sycl::detail::RTDeviceBinaryImage *>> & getNativePrograms() { return sycl::detail::ProgramManager::getInstance().NativePrograms; }