diff --git a/.github/workflows/sycl-post-commit.yml b/.github/workflows/sycl-post-commit.yml index 200a6a7e2129d..8ec35ab596538 100644 --- a/.github/workflows/sycl-post-commit.yml +++ b/.github/workflows/sycl-post-commit.yml @@ -52,6 +52,11 @@ jobs: runner: '["Linux", "arc"]' extra_lit_opts: --param matrix-xmx8=True reset_intel_gpu: true + - name: Intel Battlemage Graphics with Level Zero + runner: '["Linux", "bmg"]' + target_devices: level_zero:gpu + # The new Xe kernel driver used by BMG doesn't support resetting. + reset_intel_gpu: false - name: AMD/HIP runner: '["Linux", "amdgpu"]' image_options: -u 1001 --device=/dev/dri --device=/dev/kfd diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 735ecb55fc80c..5fe6f551e02f7 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -2362,14 +2362,6 @@ Expected> linkAndWrapDeviceFiles( HasNonSYCLOffloadKinds = true; } - // Write any remaining device inputs to an output file. - SmallVector InputFiles; - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } if (HasSYCLOffloadKind) { SmallVector InputFiles; // Write device inputs to an output file for the linker. diff --git a/devops/dependencies-igc-dev.json b/devops/dependencies-igc-dev.json index 37b9f160ec3f2..7160a02db6f71 100644 --- a/devops/dependencies-igc-dev.json +++ b/devops/dependencies-igc-dev.json @@ -1,10 +1,10 @@ { "linux": { "igc_dev": { - "github_tag": "igc-dev-61b96b3", - "version": "61b96b3", - "updated_at": "2025-01-15T17:43:30Z", - "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2435370337/zip", + "github_tag": "igc-dev-4cc8dff", + "version": "4cc8dff", + "updated_at": "2025-02-10T10:27:30Z", + "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2564401848/zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" } } diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh index a878f2807fd0e..6c47fce4bdd06 100755 --- a/devops/scripts/install_build_tools.sh +++ b/devops/scripts/install_build_tools.sh @@ -10,7 +10,6 @@ apt update && apt install -yqq \ python3-psutil \ python-is-python3 \ python3-pip \ - zstd \ ocl-icd-opencl-dev \ vim \ libffi-dev \ @@ -21,6 +20,7 @@ apt update && apt install -yqq \ zstd \ zip \ unzip \ + pigz \ jq \ curl \ libhwloc-dev \ diff --git a/devops/scripts/update_drivers.py b/devops/scripts/update_drivers.py index c96d7dffed2d8..41ea70aeaf089 100644 --- a/devops/scripts/update_drivers.py +++ b/devops/scripts/update_drivers.py @@ -38,7 +38,7 @@ def uplift_linux_igfx_driver(config, platform_tag, igc_dev_only): config[platform_tag]["igc_dev"]["version"] = igcdevver config[platform_tag]["igc_dev"]["updated_at"] = igc_dev["updated_at"] config[platform_tag]["igc_dev"]["url"] = get_artifacts_download_url( - "intel/intel-graphics-compiler", "IGC_Ubuntu22.04_llvm14_clang-" + igcdevver + "intel/intel-graphics-compiler", "IGC_Ubuntu24.04_llvm14_clang-" + igcdevver ) return config diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index e628ebb1ae260..cdb8b04b11da6 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -59,11 +59,39 @@ struct UnaryPlus { } }; -struct VecOperators { +// Tag to map/templatize the mixin for prefix/postfix inc/dec operators. +struct IncDec {}; + +template struct IncDecImpl { + using element_type = typename from_incomplete::element_type; + using vec_t = simplify_if_swizzle_t>; + +public: + friend SelfOperandTy &operator++(SelfOperandTy &x) { + x += element_type{1}; + return x; + } + friend SelfOperandTy &operator--(SelfOperandTy &x) { + x -= element_type{1}; + return x; + } + friend auto operator++(SelfOperandTy &x, int) { + vec_t tmp{x}; + x += element_type{1}; + return tmp; + } + friend auto operator--(SelfOperandTy &x, int) { + vec_t tmp{x}; + x -= element_type{1}; + return tmp; + } +}; + +template struct VecOperators { + static_assert(is_vec_v); + template static constexpr auto apply(const ArgTys &...Args) { - using Self = nth_type_t<0, ArgTys...>; - static_assert(is_vec_v); static_assert(((std::is_same_v && ...))); using element_type = typename Self::element_type; @@ -163,6 +191,41 @@ struct VecOperators { res[i] = Op(Args[i]...); return res; } + + // Uglier than possible due to + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282. + template struct OpMixin; + + template + struct OpMixin>> + : public IncDecImpl {}; + +#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR) \ + template \ + struct OpMixin>> { \ + friend auto operator OPERATOR(const Self &v) { return apply(v); } \ + }; + + __SYCL_VEC_UOP_MIXIN(std::negate, -) + __SYCL_VEC_UOP_MIXIN(std::logical_not, !) + __SYCL_VEC_UOP_MIXIN(UnaryPlus, +) + + template + struct OpMixin>>> { + template ::element_type> + friend std::enable_if_t, Self> operator~(const Self &v) { + return apply>(v); + } + }; + +#undef __SYCL_VEC_UOP_MIXIN + + template + struct __SYCL_EBO CombineImpl : public OpMixin... {}; + + struct Combined + : public CombineImpl, std::logical_not, + std::bit_not, UnaryPlus, IncDec> {}; }; // Macros to populate binary operation on sycl::vec. @@ -174,7 +237,7 @@ struct VecOperators { template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ const vec_t & Rhs) { \ - return VecOperators::apply(Lhs, Rhs); \ + return VecOperators::template apply(Lhs, Rhs); \ } \ \ template \ @@ -200,65 +263,11 @@ struct VecOperators { return Lhs; \ } -/**************************************************************** - * vec_arith_common - * / | \ - * / | \ - * vec_arith vec_arith ... vec_arith - * \ | / - * \ | / - * sycl::vec - * - * vec_arith_common is the base class for vec_arith. It contains - * the common math operators of sycl::vec for all types. - * vec_arith is the derived class that contains the math operators - * specialized for certain types. sycl::vec inherits from vec_arith. - * *************************************************************/ -template class vec_arith_common; -template struct vec_helper; - template -class vec_arith : public vec_arith_common { +class vec_arith : public VecOperators>::Combined { protected: using vec_t = vec; using ocl_t = detail::fixed_width_signed; - template using vec_data = vec_helper; - - // operator!. - friend vec operator!(const vec_t &Rhs) { - return VecOperators::apply>(Rhs); - } - - // operator +. - friend vec_t operator+(const vec_t &Lhs) { - return VecOperators::apply(Lhs); - } - - // operator -. - friend vec_t operator-(const vec_t &Lhs) { - return VecOperators::apply>(Lhs); - } - -// Unary operations on sycl::vec -// FIXME: Don't allow Unary operators on vec after -// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed. -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec_t &operator UOP(vec_t & Rhs) { \ - Rhs OPASSIGN DataT{1}; \ - return Rhs; \ - } \ - friend vec_t operator UOP(vec_t &Lhs, int) { \ - vec_t Ret(Lhs); \ - Lhs OPASSIGN DataT{1}; \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP // The logical operations on scalar types results in 0/1, while for vec<>, // logical operations should result in 0 and -1 (similar to OpenCL vectors). @@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common { template \ friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ const vec_t & Lhs, const vec_t & Rhs) { \ - return VecOperators::apply(Lhs, Rhs); \ + return VecOperators::template apply(Lhs, Rhs); \ } \ \ template \ @@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common { #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) template class vec_arith - : public vec_arith_common { + : public VecOperators>::template OpMixin< + std::bit_not> { protected: // NumElements can never be zero. Still using the redundant check to avoid // incomplete type errors. using DataT = typename std::conditional_t; using vec_t = vec; - template using vec_data = vec_helper; // Special <<, >> operators for std::byte. // std::byte is not an arithmetic type and it only supports the following @@ -376,25 +385,6 @@ class vec_arith }; #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template class vec_arith_common { -protected: - using vec_t = vec; - - static constexpr bool IsBfloat16 = - std::is_same_v; - - // operator~() available only when: dataT != float && dataT != double - // && dataT != half - template - friend std::enable_if_t, vec_t> - operator~(const vec_t &Rhs) { - return VecOperators::apply>(Rhs); - } - - // friends - template friend class __SYCL_EBO vec; -}; - #undef __SYCL_BINOP } // namespace detail diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 67d00bd9ea7de..1b626e8fc341e 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -435,7 +435,6 @@ class __SYCL_EBO vec template friend class __SYCL_EBO vec; // To allow arithmetic operators access private members of vec. template friend class detail::vec_arith; - template friend class detail::vec_arith_common; }; ///////////////////////// class sycl::vec ///////////////////////// diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg index dd59d9aec6b5c..29bd23b43efe6 100644 --- a/sycl/test-e2e/AddressSanitizer/lit.local.cfg +++ b/sycl/test-e2e/AddressSanitizer/lit.local.cfg @@ -19,3 +19,6 @@ config.unsupported_features += ['gpu-intel-gen12'] # CMPLRLLVM-64052 config.unsupported_features += ['spirv-backend'] + +# https://github.com/intel/llvm/issues/16920 +config.unsupported_features += ['arch-intel_gpu_bmg_g21'] diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp index 2697855754f74..bac155b16b3c4 100644 --- a/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp +++ b/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // NOTE: Tests fetch_add for acquire and release memory ordering. diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp index c29f3a3005a0b..24bb5ccb19214 100644 --- a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp +++ b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp @@ -1,5 +1,7 @@ -// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16924 #include "atomic_memory_order.h" #include diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index 08f0d943a2dd0..d157c68dcaca2 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -5,11 +5,11 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// RUN: %clangxx -fsycl %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} +// RUN: %clangxx -fsycl %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} // RUN: %{run} %t.out // Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA ) -// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} // Flaky timeout on CPU. Enable when fixed. diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index 719bf4709ae4c..f233d198535ca 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -7,8 +7,8 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // If CUDA, test "new" again for sm_75/generic -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} +// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} +// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} #include "bfloat16_builtins.hpp" diff --git a/sycl/test-e2e/BFloat16/bfloat16_type.cpp b/sycl/test-e2e/BFloat16/bfloat16_type.cpp index 7e7972f949522..20ac1d311ed9c 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_type.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_type.cpp @@ -1,4 +1,4 @@ -// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %} +// RUN: %if target-nvidia %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %} // RUN: %if cuda %{ %{run} %t.cuda.out %} // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp index 4932284647b0d..3865b6b3ed0cd 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // https://github.com/intel/llvm/issues/14308 // Temporarily re-enabled for testing purposes. // If you are facing issues with this test, please leave a comment in: diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp index 2ed8d6265aa5f..85d19c3707662 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out //==-------- fpga_pipes_legacy_ns.cpp - SYCL FPGA pipes test ---------------==// diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp index 060de2e07adb1..6404b6f092e09 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Basic/interop/interop_all_backends.cpp b/sycl/test-e2e/Basic/interop/interop_all_backends.cpp index 902ab1f1d09b9..c4dec5e6f670c 100644 --- a/sycl/test-e2e/Basic/interop/interop_all_backends.cpp +++ b/sycl/test-e2e/Basic/interop/interop_all_backends.cpp @@ -1,8 +1,8 @@ -// XFAIL: any-device-is-cuda +// XFAIL: target-nvidia // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16070 -// RUN: %if any-device-is-opencl %{ %{build} -o %t-opencl.out %} -// RUN: %if any-device-is-cuda %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %} -// RUN: %if any-device-is-hip %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %} +// RUN: %if target-spir %{ %{build} -o %t-opencl.out %} +// RUN: %if target-nvidia %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %} +// RUN: %if target-amd %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %} #include #include diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp index 415c07e67f013..29d7f4559e2c5 100644 --- a/sycl/test-e2e/Basic/multisource_spv_obj.cpp +++ b/sycl/test-e2e/Basic/multisource_spv_obj.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // // Separate kernel sources and host code sources // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL diff --git a/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp b/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp index d61f1f69234a3..340d48f34fb32 100644 --- a/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp +++ b/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp index e639260be5714..54c392410175f 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp @@ -1,6 +1,6 @@ // Tests whether or not cluster launch was successful, with the correct ranges // that were passed via enqueue functions extension -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp index 01db70b11464a..5654af717b461 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp @@ -1,6 +1,6 @@ // Tests whether or not cluster launch was successful, with the correct ranges // that were passed via parallel for overload -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index 0460defa72104..e15d361a5ca3e 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -1,6 +1,6 @@ // Checks whether or not event Dependencies are honored by // urEnqueueKernelLaunchCustomExp -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Config/config.cpp b/sycl/test-e2e/Config/config.cpp index 5f82d3fa28111..d4741661e8e5a 100644 --- a/sycl/test-e2e/Config/config.cpp +++ b/sycl/test-e2e/Config/config.cpp @@ -8,13 +8,13 @@ // RUN: %{build} %debug_option %O0 -o %t.out // RUN: echo SYCL_PRINT_EXECUTION_GRAPH=always > %t.cfg // RUN: %{run-unfiltered-devices} env SYCL_CONFIG_FILE_NAME=%t.cfg %t.out -// RUN: %if run-mode %{cat *.dot > /dev/null%} -// RUN: %if run-mode %{rm *.dot%} +// RUN: %{run-aux} cat *.dot > /dev/null +// RUN: %{run-aux} rm *.dot // RUN: %{run-unfiltered-devices} env SYCL_PRINT_EXECUTION_GRAPH=always %t.out -// RUN: %if run-mode %{cat *.dot > /dev/null%} -// RUN: %if run-mode %{rm *.dot%} +// RUN: %{run-aux} cat *.dot > /dev/null +// RUN: %{run-aux} rm *.dot // RUN: %{run-unfiltered-devices} %t.out -// RUN: %if run-mode %{not cat *.dot > /dev/null%} +// RUN: %{run-aux} not cat *.dot > /dev/null #include diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp index b7c93167b7fb7..1125deb7b8a19 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with dynamic libraries. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // DEFINE: %{dynamic_lib_options} = -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs %if windows %{-DMAKE_DLL %} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp index fda36e3af7b6b..a0aabc1cfcc8b 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp @@ -5,10 +5,10 @@ // RUN: %{run} %t.out // The name mangling for free function kernels currently does not work with PTX. -// UNSUPPORTED: cuda +// UNSUPPORTED: target-nvidia // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. -// XFAIL: hip +// XFAIL: target-amd // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742 // XFAIL: spirv-backend && run-mode diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp index e188871acc7cd..55b536babdce0 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp index ea1875dafe52b..b83b73736c496 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with objects. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp index e8de04cf82eb5..38f14f01a41a2 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp @@ -1,7 +1,7 @@ // Test -fsycl-allow-device-image-dependencies with a single dynamic library on // Windows and Linux. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // RUN: %clangxx --offload-new-driver -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs \ diff --git a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp index 1bdaf3b1d6270..05d176ced16c8 100644 --- a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with dynamic libraries. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // DEFINE: %{dynamic_lib_options} = -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs %if windows %{-DMAKE_DLL %} // DEFINE: %{dynamic_lib_suffix} = %if windows %{dll%} %else %{so%} diff --git a/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp b/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp index 440935e31e3eb..a8d32703bd16a 100644 --- a/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceImageDependencies/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/objects.cpp index eea085dc9b905..8d2daf9228c2d 100644 --- a/sycl/test-e2e/DeviceImageDependencies/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/objects.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with objects. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %clangxx -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o // RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o diff --git a/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp index f0a65b6f1f056..9d72eab7ee8e3 100644 --- a/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp @@ -1,7 +1,7 @@ // Test -fsycl-allow-device-image-dependencies with a single dynamic library on Windows // and Linux. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %clangxx -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs \ // RUN: %S/Inputs/a.cpp \ diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 97a92bc2b3885..86fdb3e7c87ab 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -7,7 +7,7 @@ // RUN: %if !gpu %{ %{run} %t2.out %} // // // Check that --fast-math works with cmath funcs for CUDA -// RUN: %if any-device-is-cuda %{ %{build} -Wno-nan-infinity-disabled -fno-builtin %{mathflags} -o %t3.out -ffast-math -DSYCL_E2E_FASTMATH %} +// RUN: %if target-nvidia %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -Wno-nan-infinity-disabled -fno-builtin %{mathflags} -o %t3.out -ffast-math -DSYCL_E2E_FASTMATH %} // RUN: %if cuda %{ %{run} %t3.out %} #include "math_utils.hpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp index 91d3449d1e74f..4e6f7944ad88e 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 2914 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 2914 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../BitonicSortK.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp index cbb609681a85a..8fb30097d50df 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 2969 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 2969 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../BitonicSortKv2.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp index 42d906545207c..ee49ef1039c14 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29138"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29138" #include "../Stencil.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp index 8c148091356fa..e9fed184270ed 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp @@ -12,7 +12,7 @@ // RUN: mkdir -p %t.dir && %{build} -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.dir/exec.out // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 149 _simd16_entry_0001.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29735"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 149 _simd16_entry_0001.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29735" #include "../../InvokeSimd/invoke_simd_smoke.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp index 3b5db73fb611c..9a09df6d1d0df 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir %if igc-dev %{ 1059 %} %else %{ 1116 %} ZTSZZ7runTestjjjRdS_ENKUlRN4sycl3_V17handlerEE_clES3_E3K16.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir %if igc-dev %{ 1059 %} %else %{ 1116 %} ZTSZZ7runTestjjjRdS_ENKUlRN4sycl3_V17handlerEE_clES3_E3K16.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../matrix_transpose.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp index e306889c9c4b6..2af6f830b8f3f 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29138"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29138" #include "../stencil2.cpp" diff --git a/sycl/test-e2e/ESIMD/accessor_local.cpp b/sycl/test-e2e/ESIMD/accessor_local.cpp index 21382f8ee401c..e566ade99b754 100644 --- a/sycl/test-e2e/ESIMD/accessor_local.cpp +++ b/sycl/test-e2e/ESIMD/accessor_local.cpp @@ -1,6 +1,4 @@ // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // This test verifies usage of local_accessor methods operator[] diff --git a/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp b/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp index ce8b4d30d0928..9d7a79c8fe2ad 100644 --- a/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp +++ b/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // This test verifies usage of block_load/block_store for local_accessor. diff --git a/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp b/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp index c63411b286b32..664e7709a7e81 100644 --- a/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp +++ b/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp index 790b7dc2a92da..fd4a013ee200b 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // REQUIRES: arch-intel_gpu_pvc || gpu-intel-dg2 // REQUIRES-INTEL-DRIVER: lin: 26690, win: 101.4576 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // diff --git a/sycl/test-e2e/ESIMD/preemption.cpp b/sycl/test-e2e/ESIMD/preemption.cpp index 30c11306c08ca..0e2e11632c3e0 100644 --- a/sycl/test-e2e/ESIMD/preemption.cpp +++ b/sycl/test-e2e/ESIMD/preemption.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: gpu-intel-dg2 || arch-intel_gpu_pvc // RUN: %{build} -o %t.out // RUN: env IGC_DumpToCustomDir=%t.dump IGC_ShaderDumpEnable=1 %{run} %t.out -// RUN: %if run-mode %{grep enablePreemption %t.dump/*.asm%} +// RUN: %{run-aux} grep enablePreemption %t.dump/*.asm // The test expects to see "enablePreemption" switch in the compilation // switches. It fails if does not find it. diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp index 3adfc92ccf256..ae6c374c97535 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp index ab7ff36dd7c19..f9988030bcb61 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp index 0b415f878e85f..1a6e81829511b 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp index b2347d9b6de6e..be162c09f7e4e 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 257b5a4e4457f..c1fdeb58acec7 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -5,7 +5,7 @@ // TODO: Currently using the -Wno-deprecated-declarations flag due to issue // https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd // remove the flag once the issue is resolved. -// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // Disabled temporarily while investigation into the failure is ongoing. diff --git a/sycl/test-e2e/InlineAsm/letter_example.cpp b/sycl/test-e2e/InlineAsm/letter_example.cpp index 780e33d77e803..f6d3df75871c4 100644 --- a/sycl/test-e2e/InlineAsm/letter_example.cpp +++ b/sycl/test-e2e/InlineAsm/letter_example.cpp @@ -1,4 +1,6 @@ // REQUIRES: sg-16,aspect-usm_shared_allocations +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp index db96f418927dd..56b1cc95749b9 100644 --- a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp +++ b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp @@ -1,4 +1,6 @@ // REQUIRES: sg-32,aspect-usm_shared_allocations +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/KernelAndProgram/build-log.cpp b/sycl/test-e2e/KernelAndProgram/build-log.cpp index dc6c7bc97cd91..ae9903163f626 100644 --- a/sycl/test-e2e/KernelAndProgram/build-log.cpp +++ b/sycl/test-e2e/KernelAndProgram/build-log.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // TODO: rewrite this into a unit-test // RUN: %{build} -DGPU -o %t_gpu.out diff --git a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp index 7ebf391a631b5..41a7ba7af3cd7 100644 --- a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip || ze_debug +// UNSUPPORTED: target-nvidia || target-amd || ze_debug // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -o %t.out // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -DGPU -o %t_gpu.out diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp index c37ce90530192..2f233aaa731a7 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp @@ -1,4 +1,4 @@ -// REQUIRES: level_zero +// REQUIRES: target-spir, level_zero // RUN: %{build} -Xsycl-target-linker=spir64 -foo -o %t.out // RUN: %{run} %t.out //==--- level-zero-link-flags.cpp - Error handling for link flags --==// diff --git a/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp index 496f3bb168cc1..7d02f30cafed7 100644 --- a/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp +++ b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp @@ -6,12 +6,12 @@ // or SYCL_CACHE_TRACE is set to 0. // RUN: env SYCL_CACHE_IN_MEM=0 %{run} %t.out 2> %t.trace1 -// RUN: %if run-mode %{ FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s %} +// RUN: %{run-aux} FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s // RUN: env SYCL_CACHE_TRACE=0 %{run} %t.out 2> %t.trace2 -// RUN: %if run-mode %{ FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s %} +// RUN: %{run-aux} FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s // RUN: env SYCL_CACHE_TRACE=2 %{run} %t.out 2> %t.trace3 -// RUN: %if run-mode %{ FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE %} +// RUN: %{run-aux} FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE #include diff --git a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp index 047f5bfbb970b..dc1bd16b06ce1 100644 --- a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp +++ b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp index 2340a6d96c06e..d113b389c3b91 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp @@ -18,7 +18,7 @@ // -- Test again, with caching. // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir SYCL_CACHE_MAX_SIZE=30000 -// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{run-aux} rm -rf %t/cache_dir // RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefix=CHECK // CHECK: [Persistent Cache]: enabled diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp index e1d3a76c70b4d..d63cbbc17a778 100644 --- a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp +++ b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp @@ -1,6 +1,6 @@ // Test that llvm.bitreverse is lowered correctly by llvm-spirv. -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // XFAIL: spirv-backend // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16318, CMPLRLLVM-62187 diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp index 976d30b6be49c..70ac1760bf9de 100644 --- a/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp +++ b/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp @@ -1,7 +1,7 @@ // Test that llvm.bitreverse is lowered correctly by llvm-spirv for 2/4-bit // types. -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // XFAIL: gpu // XFAIL-TRACKER: https://github.com/intel/intel-graphics-compiler/issues/330 diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index 7f44294e1123d..9eaa4f0044dad 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -20,6 +20,8 @@ // test. // XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 #include "common.hpp" #include "joint_matrix_bf16_fill_k_cache_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp index a290a8cb00a6f..cbcd97d5c6c9b 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp @@ -20,6 +20,8 @@ // test. // XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 #include "common.hpp" #include "joint_matrix_bf16_fill_k_cache_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp b/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp index 4ca96773f4d8d..cfb475617b099 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp @@ -10,7 +10,8 @@ // other triples // REQUIRES: aspect-ext_intel_matrix - +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 // UNSUPPORTED: gpu-intel-dg2, cpu // UNSUPPORTED-INTENDED: Checked load/stores are not supported by DG2 and CPU HW diff --git a/sycl/test-e2e/NewOffloadDriver/lit.local.cfg b/sycl/test-e2e/NewOffloadDriver/lit.local.cfg index 1e48e3bd2ad52..892e3cddea99e 100644 --- a/sycl/test-e2e/NewOffloadDriver/lit.local.cfg +++ b/sycl/test-e2e/NewOffloadDriver/lit.local.cfg @@ -3,5 +3,5 @@ import platform config.unsupported_features += ['accelerator'] config.substitutions.append( - ("%{embed-ir}", "%if any-device-is-hip || any-device-is-cuda %{ -fsycl-embed-ir %}") + ("%{embed-ir}", "%if target-amd || target-nvidia %{ -fsycl-embed-ir %}") ) diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp index 4906b91350259..9e2f59c9ae3e8 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp index fc1f83d558be7..e1f2245485f1a 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp index bb8c56d1a8c2d..3f74339a6e640 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp index c8eb129a06ad4..816611ea3be44 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp index e59061f6b88a2..bede263e59ad5 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp index 4a2c7fd24ac00..c58010ab666d8 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp index c1854ad044146..6fe6ae756a87a 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp index 735adb82df87d..a832c6ab30046 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp index 015fa167647db..dfc2366c3cb7e 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp index 85960235d2e3c..31ad10d456011 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp index 9c9f366f6a588..add12df70e305 100644 --- a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Regression/acos.cpp b/sycl/test-e2e/Regression/acos.cpp index 59cb130be0756..42018de69e26f 100644 --- a/sycl/test-e2e/Regression/acos.cpp +++ b/sycl/test-e2e/Regression/acos.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/SPVDumpUse/basic.cpp b/sycl/test-e2e/SPVDumpUse/basic.cpp index 1d009acaa0c7d..f4cf391066992 100644 --- a/sycl/test-e2e/SPVDumpUse/basic.cpp +++ b/sycl/test-e2e/SPVDumpUse/basic.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl || level_zero +// REQUIRES: target-spir // // SYCL_USE_KERNEL_SPV assumes no dead arguments elimination, need to produce // SPV under the same conditions. diff --git a/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp b/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp index ea9402ef301f2..87caa04102df6 100644 --- a/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp +++ b/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl || level_zero +// REQUIRES: target-spir // // SYCL_USE_KERNEL_SPV assumes no dead arguments elimination, need to produce // SPV under the same conditions. diff --git a/sycl/test-e2e/SubGroup/attributes.cpp b/sycl/test-e2e/SubGroup/attributes.cpp index 118349321b4b0..e2f97292a1a23 100644 --- a/sycl/test-e2e/SubGroup/attributes.cpp +++ b/sycl/test-e2e/SubGroup/attributes.cpp @@ -1,6 +1,6 @@ // TODO: Despite using a supported required subgroup size compile_sub_group_size // reports as 0 on cuda and hip -// XFAIL: cuda || hip +// XFAIL: target-nvidia || target-amd // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14357 // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index b81405f316e7c..1feb4c4b01163 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,5 +1,5 @@ // REQUIRES: cuda || hip || level_zero -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/VirtualFunctions/lit.local.cfg b/sycl/test-e2e/VirtualFunctions/lit.local.cfg index 3626f5dc6d960..e514c02f4d433 100644 --- a/sycl/test-e2e/VirtualFunctions/lit.local.cfg +++ b/sycl/test-e2e/VirtualFunctions/lit.local.cfg @@ -4,4 +4,4 @@ import os # paths like "../../../helper.hpp" in them, so let's just register a # substitution to add directory with helper headers into include search path config.substitutions.append(("%helper-includes", "-I {}".format(os.path.dirname(os.path.abspath(__file__))))) -config.required_features += ['aspect-ext_oneapi_virtual_functions'] +config.required_features += ['aspect-ext_oneapi_virtual_functions', 'target-spir'] diff --git a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp index 48db619d94081..9383fbe7a1724 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-usm_shared_allocations // // On CPU it segfaults within the kernel that performs virtual function call. -// XFAIL: cpu +// XFAIL: cpu && opencl && linux // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15080 // UNSUPPORTED: gpu // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15068 diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp index 1aeb63636cc71..e00fd26271372 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp @@ -2,7 +2,7 @@ // REQUIRES: windows // DEFINE: %{link-flags}=%if cl_options %{ /clang:-ld3d12 /clang:-ldxgi /clang:-ldxguid %} %else %{ -ld3d12 -ldxgi -ldxguid %} -// RUN: %{build} %{link-flags} -o %t.out %if any-device-is-level_zero %{ -DDISABLE_UNORM_TESTS %} +// RUN: %{build} %{link-flags} -o %t.out %if target-spir %{ -DDISABLE_UNORM_TESTS %} // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out #pragma clang diagnostic ignored "-Waddress-of-temporary" diff --git a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp index beef3c7fcf09c..fa797958c1aa6 100644 --- a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp @@ -1,5 +1,6 @@ // REQUIRES: aspect-ext_oneapi_bindless_images - +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16923 // UNSUPPORTED: hip // UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index 46335e2b1d850..6cd2eec98ca4e 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DENABLE_LINEAR_TILING -DTEST_L0_SUPPORTED_VK_FORMAT %} +// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes -DENABLE_LINEAR_TILING -DTEST_L0_SUPPORTED_VK_FORMAT %} // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index dccb3a2828aec..8dd0b4f3f8243 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} +// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index 849d94e6d1a20..c8f5dbd4e9815 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -306,7 +306,7 @@ def get_extra_env(sycl_devices): # Filter commands based on testing mode is_run_line = any( i in directive.command - for i in ["%{run}", "%{run-unfiltered-devices}", "%if run-mode"] + for i in ["%{run}", "%{run-unfiltered-devices}", "%{run-aux}"] ) ignore_line_filtering = ( diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 5010362b366df..55688b5a29928 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -78,6 +78,9 @@ else: lit_config.error("Invalid argument for test-mode") +# Dummy substitution to indicate line should be a run line +config.substitutions.append(("%{run-aux}", "")) + # Cleanup environment variables which may affect tests possibly_dangerous_env_vars = [ "COMPILER_PATH", diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp index 123f5a2c73dbd..f43c60d8a3589 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip || (windows && level_zero) -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 609652a58b17d..b3c48d0867b9f 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp b/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp index 41421ee2b9d85..a53408157ecf6 100644 --- a/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp +++ b/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp @@ -29,7 +29,7 @@ // // ===---------------------------------------------------------------------===// -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp index a8c916b5fe3c4..e73ac212fb5fd 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -87,7 +87,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]] @@ -97,11 +97,11 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]] // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]] @@ -111,11 +111,11 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] -// CHECK-NEXT: [[EXTRACTVEC4_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMax(vec a, vec b) { @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.58") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 11fe56b0b54c3..7074116fa1b96 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.24") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -103,7 +103,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z18TestFtoBFDeviceRNERN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META28:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META28:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -128,7 +128,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] @@ -155,7 +155,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.56") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 2f24e0c1aa2a5..819337fea1d17 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -32,13 +32,13 @@ using namespace sycl; SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] // CHECK-NEXT: ret void @@ -46,7 +46,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.29") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.36") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -90,13 +90,13 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.33") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> // CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] // CHECK-NEXT: ret void @@ -104,10 +104,10 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.41") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.41", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.60", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.47") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.70") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -164,11 +164,11 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.83") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.83") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 -// CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVEC4_I_I]], [[LOADVEC4_I_I2]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 +// CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVECN_I_I]], [[LOADVECN_I_I2]] // CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> // CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[CMP]] to <3 x i8> // CHECK-NEXT: ret <3 x i8> [[SEXT]] @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.62") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.88") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.98") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.98") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -210,9 +210,9 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.94") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.136") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.146") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.146") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.94", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.136", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) @@ -249,12 +249,12 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.105") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.105") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.155") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.155") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] -// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] +// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> @@ -264,7 +264,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.112") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.166") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.166") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) @@ -277,7 +277,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.118") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.118") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.176") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.176") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) @@ -289,7 +289,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.125") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.182") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) @@ -302,7 +302,7 @@ SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.132") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.138") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.193") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.203") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) @@ -315,7 +315,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.122") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) @@ -327,9 +327,9 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.215") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.146", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.215", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) @@ -358,10 +358,10 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.151") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.151") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.224") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.224") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.151", align 32 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.224", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]]