Merge from 'sycl' to 'sycl-web' (9 commits)

intel · Feb 11, 2025 · 90094f5 · 90094f5
2 parents 2fe0875 + 36ce10e
commit 90094f5
Show file tree

Hide file tree

Showing 93 changed files with 256 additions and 244 deletions.
diff --git a/.github/workflows/sycl-post-commit.yml b/.github/workflows/sycl-post-commit.yml
@@ -52,6 +52,11 @@ jobs:
             runner: '["Linux", "arc"]'
             extra_lit_opts: --param matrix-xmx8=True
             reset_intel_gpu: true
+          - name: Intel Battlemage Graphics with Level Zero
+            runner: '["Linux", "bmg"]'
+            target_devices: level_zero:gpu
+            # The new Xe kernel driver used by BMG doesn't support resetting.
+            reset_intel_gpu: false
           - name: AMD/HIP
             runner: '["Linux", "amdgpu"]'
             image_options: -u 1001 --device=/dev/dri --device=/dev/kfd

diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -2362,14 +2362,6 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
         HasNonSYCLOffloadKinds = true;
     }
 
-    // Write any remaining device inputs to an output file.
-    SmallVector<StringRef> InputFiles;
-    for (const OffloadFile &File : Input) {
-      auto FileNameOrErr = writeOffloadFile(File);
-      if (!FileNameOrErr)
-        return FileNameOrErr.takeError();
-      InputFiles.emplace_back(*FileNameOrErr);
-    }
     if (HasSYCLOffloadKind) {
       SmallVector<StringRef> InputFiles;
       // Write device inputs to an output file for the linker.

diff --git a/devops/dependencies-igc-dev.json b/devops/dependencies-igc-dev.json
@@ -1,10 +1,10 @@
 {
   "linux": {
     "igc_dev": {
-      "github_tag": "igc-dev-61b96b3",
-      "version": "61b96b3",
-      "updated_at": "2025-01-15T17:43:30Z",
-      "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2435370337/zip",
+      "github_tag": "igc-dev-4cc8dff",
+      "version": "4cc8dff",
+      "updated_at": "2025-02-10T10:27:30Z",
+      "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2564401848/zip",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     }
   }

diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh
@@ -10,7 +10,6 @@ apt update && apt install -yqq \
       python3-psutil \
       python-is-python3 \
       python3-pip \
-      zstd \
       ocl-icd-opencl-dev \
       vim \
       libffi-dev \
@@ -21,6 +20,7 @@ apt update && apt install -yqq \
       zstd \
       zip \
       unzip \
+      pigz \
       jq \
       curl \
       libhwloc-dev \

diff --git a/devops/scripts/update_drivers.py b/devops/scripts/update_drivers.py
@@ -38,7 +38,7 @@ def uplift_linux_igfx_driver(config, platform_tag, igc_dev_only):
         config[platform_tag]["igc_dev"]["version"] = igcdevver
         config[platform_tag]["igc_dev"]["updated_at"] = igc_dev["updated_at"]
         config[platform_tag]["igc_dev"]["url"] = get_artifacts_download_url(
-            "intel/intel-graphics-compiler", "IGC_Ubuntu22.04_llvm14_clang-" + igcdevver
+            "intel/intel-graphics-compiler", "IGC_Ubuntu24.04_llvm14_clang-" + igcdevver
         )
         return config
 

diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp
@@ -59,11 +59,39 @@ struct UnaryPlus {
   }
 };
 
-struct VecOperators {
+// Tag to map/templatize the mixin for prefix/postfix inc/dec operators.
+struct IncDec {};
+
+template <typename SelfOperandTy> struct IncDecImpl {
+  using element_type = typename from_incomplete<SelfOperandTy>::element_type;
+  using vec_t = simplify_if_swizzle_t<std::remove_const_t<SelfOperandTy>>;
+
+public:
+  friend SelfOperandTy &operator++(SelfOperandTy &x) {
+    x += element_type{1};
+    return x;
+  }
+  friend SelfOperandTy &operator--(SelfOperandTy &x) {
+    x -= element_type{1};
+    return x;
+  }
+  friend auto operator++(SelfOperandTy &x, int) {
+    vec_t tmp{x};
+    x += element_type{1};
+    return tmp;
+  }
+  friend auto operator--(SelfOperandTy &x, int) {
+    vec_t tmp{x};
+    x -= element_type{1};
+    return tmp;
+  }
+};
+
+template <typename Self> struct VecOperators {
+  static_assert(is_vec_v<Self>);
+
   template <typename OpTy, typename... ArgTys>
   static constexpr auto apply(const ArgTys &...Args) {
-    using Self = nth_type_t<0, ArgTys...>;
-    static_assert(is_vec_v<Self>);
     static_assert(((std::is_same_v<Self, ArgTys> && ...)));
 
     using element_type = typename Self::element_type;
@@ -163,6 +191,41 @@ struct VecOperators {
         res[i] = Op(Args[i]...);
     return res;
   }
+
+  // Uglier than possible due to
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282.
+  template <typename Op, typename = void> struct OpMixin;
+
+  template <typename Op>
+  struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, IncDec>>>
+      : public IncDecImpl<Self> {};
+
+#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR)                                     \
+  template <typename Op>                                                       \
+  struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, OP>>> {               \
+    friend auto operator OPERATOR(const Self &v) { return apply<OP>(v); }      \
+  };
+
+  __SYCL_VEC_UOP_MIXIN(std::negate<void>, -)
+  __SYCL_VEC_UOP_MIXIN(std::logical_not<void>, !)
+  __SYCL_VEC_UOP_MIXIN(UnaryPlus, +)
+
+  template <typename Op>
+  struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, std::bit_not<void>>>> {
+    template <typename T = typename from_incomplete<Self>::element_type>
+    friend std::enable_if_t<!is_vgenfloat_v<T>, Self> operator~(const Self &v) {
+      return apply<std::bit_not<void>>(v);
+    }
+  };
+
+#undef __SYCL_VEC_UOP_MIXIN
+
+  template <typename... Op>
+  struct __SYCL_EBO CombineImpl : public OpMixin<Op>... {};
+
+  struct Combined
+      : public CombineImpl<std::negate<void>, std::logical_not<void>,
+                           std::bit_not<void>, UnaryPlus, IncDec> {};
 };
 
 // Macros to populate binary operation on sycl::vec.
@@ -174,7 +237,7 @@ struct VecOperators {
   template <typename T = DataT>                                                \
   friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs,     \
                                                         const vec_t & Rhs) {   \
-    return VecOperators::apply<FUNCTOR>(Lhs, Rhs);                             \
+    return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs);             \
   }                                                                            \
                                                                                \
   template <typename T = DataT>                                                \
@@ -200,65 +263,11 @@ struct VecOperators {
     return Lhs;                                                                \
   }
 
-/****************************************************************
- *                       vec_arith_common
- *                 /           |             \
- *                /            |               \
- *     vec_arith<int>     vec_arith<float> ...   vec_arith<byte>
- *                \            |               /
- *                 \           |              /
- *                        sycl::vec<T>
- *
- * vec_arith_common is the base class for vec_arith. It contains
- * the common math operators of sycl::vec for all types.
- * vec_arith is the derived class that contains the math operators
- * specialized for certain types. sycl::vec inherits from vec_arith.
- * *************************************************************/
-template <typename DataT, int NumElements> class vec_arith_common;
-template <typename DataT> struct vec_helper;
-
 template <typename DataT, int NumElements>
-class vec_arith : public vec_arith_common<DataT, NumElements> {
+class vec_arith : public VecOperators<vec<DataT, NumElements>>::Combined {
 protected:
   using vec_t = vec<DataT, NumElements>;
   using ocl_t = detail::fixed_width_signed<sizeof(DataT)>;
-  template <typename T> using vec_data = vec_helper<T>;
-
-  // operator!.
-  friend vec<ocl_t, NumElements> operator!(const vec_t &Rhs) {
-    return VecOperators::apply<std::logical_not<void>>(Rhs);
-  }
-
-  // operator +.
-  friend vec_t operator+(const vec_t &Lhs) {
-    return VecOperators::apply<UnaryPlus>(Lhs);
-  }
-
-  // operator -.
-  friend vec_t operator-(const vec_t &Lhs) {
-    return VecOperators::apply<std::negate<void>>(Lhs);
-  }
-
-// Unary operations on sycl::vec
-// FIXME: Don't allow Unary operators on vec<bool> after
-// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed.
-#ifdef __SYCL_UOP
-#error "Undefine __SYCL_UOP macro"
-#endif
-#define __SYCL_UOP(UOP, OPASSIGN)                                              \
-  friend vec_t &operator UOP(vec_t & Rhs) {                                    \
-    Rhs OPASSIGN DataT{1};                                                     \
-    return Rhs;                                                                \
-  }                                                                            \
-  friend vec_t operator UOP(vec_t &Lhs, int) {                                 \
-    vec_t Ret(Lhs);                                                            \
-    Lhs OPASSIGN DataT{1};                                                     \
-    return Ret;                                                                \
-  }
-
-  __SYCL_UOP(++, +=)
-  __SYCL_UOP(--, -=)
-#undef __SYCL_UOP
 
   // The logical operations on scalar types results in 0/1, while for vec<>,
   // logical operations should result in 0 and -1 (similar to OpenCL vectors).
@@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
   template <typename T = DataT>                                                \
   friend std::enable_if_t<(COND), vec<ocl_t, NumElements>> operator RELLOGOP(  \
       const vec_t & Lhs, const vec_t & Rhs) {                                  \
-    return VecOperators::apply<FUNCTOR>(Lhs, Rhs);                             \
+    return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs);             \
   }                                                                            \
                                                                                \
   template <typename T = DataT>                                                \
@@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
 template <int NumElements>
 class vec_arith<std::byte, NumElements>
-    : public vec_arith_common<std::byte, NumElements> {
+    : public VecOperators<vec<std::byte, NumElements>>::template OpMixin<
+          std::bit_not<void>> {
 protected:
   // NumElements can never be zero. Still using the redundant check to avoid
   // incomplete type errors.
   using DataT = typename std::conditional_t<NumElements == 0, int, std::byte>;
   using vec_t = vec<DataT, NumElements>;
-  template <typename T> using vec_data = vec_helper<T>;
 
   // Special <<, >> operators for std::byte.
   // std::byte is not an arithmetic type and it only supports the following
@@ -376,25 +385,6 @@ class vec_arith<std::byte, NumElements>
 };
 #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
 
-template <typename DataT, int NumElements> class vec_arith_common {
-protected:
-  using vec_t = vec<DataT, NumElements>;
-
-  static constexpr bool IsBfloat16 =
-      std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;
-
-  // operator~() available only when: dataT != float && dataT != double
-  // && dataT != half
-  template <typename T = DataT>
-  friend std::enable_if_t<!detail::is_vgenfloat_v<T>, vec_t>
-  operator~(const vec_t &Rhs) {
-    return VecOperators::apply<std::bit_not<void>>(Rhs);
-  }
-
-  // friends
-  template <typename T1, int T2> friend class __SYCL_EBO vec;
-};
-
 #undef __SYCL_BINOP
 
 } // namespace detail

diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp
@@ -435,7 +435,6 @@ class __SYCL_EBO vec
   template <typename T1, int T2> friend class __SYCL_EBO vec;
   // To allow arithmetic operators access private members of vec.
   template <typename T1, int T2> friend class detail::vec_arith;
-  template <typename T1, int T2> friend class detail::vec_arith_common;
 };
 ///////////////////////// class sycl::vec /////////////////////////
 

diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg
@@ -19,3 +19,6 @@ config.unsupported_features += ['gpu-intel-gen12']
 
 # CMPLRLLVM-64052
 config.unsupported_features += ['spirv-backend']
+
+# https://github.com/intel/llvm/issues/16920
+config.unsupported_features += ['arch-intel_gpu_bmg_g21']
diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
+// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
 
 // NOTE: Tests fetch_add for acquire and release memory ordering.

diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp
@@ -1,5 +1,7 @@
-// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
+// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
+// UNSUPPORTED: arch-intel_gpu_bmg_g21
+// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16924
 
 #include "atomic_memory_order.h"
 #include <cmath>

diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp
@@ -5,11 +5,11 @@
 // + below sm_80 always uses generic impls
 
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
-// RUN: %clangxx -fsycl %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags}
+// RUN: %clangxx -fsycl %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags}
 // RUN: %{run} %t.out
 
 // Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA )
-// RUN:  %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %}
+// RUN:  %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %}
 // RUN:  %if preview-breaking-changes-supported %{  %{run} %t2.out  %}
 
 // Flaky timeout on CPU. Enable when fixed.

diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp
@@ -7,8 +7,8 @@
 // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
 
 // If CUDA, test "new" again for sm_75/generic
-// RUN:  %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts}  -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75  %s -o %t3.out %{mathflags} %} %}
-// RUN:  %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{  %{run} %t3.out  %} %}
+// RUN:  %if target-nvidia %{ %if preview-breaking-changes-supported %{  %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts}  -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75  %s -o %t3.out %{mathflags} %} %}
+// RUN:  %if target-nvidia %{ %if preview-breaking-changes-supported %{  %{run} %t3.out  %} %}
 
 #include "bfloat16_builtins.hpp"
 

diff --git a/sycl/test-e2e/BFloat16/bfloat16_type.cpp b/sycl/test-e2e/BFloat16/bfloat16_type.cpp
@@ -1,4 +1,4 @@
-// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %}
+// RUN: %if target-nvidia %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %}
 // RUN: %if cuda %{ %{run} %t.cuda.out %}
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out

diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp
@@ -1,4 +1,4 @@
-// REQUIRES: accelerator
+// REQUIRES: target-spir, accelerator
 // https://github.com/intel/llvm/issues/14308
 // Temporarily re-enabled for testing purposes.
 // If you are facing issues with this test, please leave a comment in:

diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp
@@ -1,4 +1,4 @@
-// REQUIRES: accelerator
+// REQUIRES: target-spir, accelerator
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 //==-------- fpga_pipes_legacy_ns.cpp - SYCL FPGA pipes test ---------------==//

diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: accelerator
+// REQUIRES: target-spir, accelerator
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 

diff --git a/sycl/test-e2e/Basic/interop/interop_all_backends.cpp b/sycl/test-e2e/Basic/interop/interop_all_backends.cpp
@@ -1,8 +1,8 @@
-// XFAIL: any-device-is-cuda
+// XFAIL: target-nvidia
 // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16070
-// RUN: %if any-device-is-opencl %{ %{build} -o %t-opencl.out %}
-// RUN: %if any-device-is-cuda %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %}
-// RUN: %if any-device-is-hip %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %}
+// RUN: %if target-spir %{ %{build} -o %t-opencl.out %}
+// RUN: %if target-nvidia %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %}
+// RUN: %if target-amd %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %}
 
 #include <sycl/backend.hpp>
 #include <sycl/detail/core.hpp>

diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: cuda || hip
+// UNSUPPORTED: target-nvidia || target-amd
 //
 // Separate kernel sources and host code sources
 // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL

diff --git a/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp b/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp
@@ -1,4 +1,4 @@
-// REQUIRES: accelerator
+// REQUIRES: target-spir, accelerator
 
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out | FileCheck %s

diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp
@@ -1,6 +1,6 @@
 // Tests whether or not cluster launch was successful, with the correct ranges
 // that were passed via enqueue functions extension
-// REQUIRES: aspect-ext_oneapi_cuda_cluster_group
+// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group
 // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out
 // RUN: %{run} %t.out