Skip to content

Commit

Permalink
Merge from 'sycl' to 'sycl-web' (9 commits)
Browse files Browse the repository at this point in the history
  • Loading branch information
iclsrc committed Feb 11, 2025
2 parents 2fe0875 + 36ce10e commit 90094f5
Show file tree
Hide file tree
Showing 93 changed files with 256 additions and 244 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/sycl-post-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ jobs:
runner: '["Linux", "arc"]'
extra_lit_opts: --param matrix-xmx8=True
reset_intel_gpu: true
- name: Intel Battlemage Graphics with Level Zero
runner: '["Linux", "bmg"]'
target_devices: level_zero:gpu
# The new Xe kernel driver used by BMG doesn't support resetting.
reset_intel_gpu: false
- name: AMD/HIP
runner: '["Linux", "amdgpu"]'
image_options: -u 1001 --device=/dev/dri --device=/dev/kfd
Expand Down
8 changes: 0 additions & 8 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2362,14 +2362,6 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
HasNonSYCLOffloadKinds = true;
}

// Write any remaining device inputs to an output file.
SmallVector<StringRef> InputFiles;
for (const OffloadFile &File : Input) {
auto FileNameOrErr = writeOffloadFile(File);
if (!FileNameOrErr)
return FileNameOrErr.takeError();
InputFiles.emplace_back(*FileNameOrErr);
}
if (HasSYCLOffloadKind) {
SmallVector<StringRef> InputFiles;
// Write device inputs to an output file for the linker.
Expand Down
8 changes: 4 additions & 4 deletions devops/dependencies-igc-dev.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"linux": {
"igc_dev": {
"github_tag": "igc-dev-61b96b3",
"version": "61b96b3",
"updated_at": "2025-01-15T17:43:30Z",
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2435370337/zip",
"github_tag": "igc-dev-4cc8dff",
"version": "4cc8dff",
"updated_at": "2025-02-10T10:27:30Z",
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2564401848/zip",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
}
}
Expand Down
2 changes: 1 addition & 1 deletion devops/scripts/install_build_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ apt update && apt install -yqq \
python3-psutil \
python-is-python3 \
python3-pip \
zstd \
ocl-icd-opencl-dev \
vim \
libffi-dev \
Expand All @@ -21,6 +20,7 @@ apt update && apt install -yqq \
zstd \
zip \
unzip \
pigz \
jq \
curl \
libhwloc-dev \
Expand Down
2 changes: 1 addition & 1 deletion devops/scripts/update_drivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def uplift_linux_igfx_driver(config, platform_tag, igc_dev_only):
config[platform_tag]["igc_dev"]["version"] = igcdevver
config[platform_tag]["igc_dev"]["updated_at"] = igc_dev["updated_at"]
config[platform_tag]["igc_dev"]["url"] = get_artifacts_download_url(
"intel/intel-graphics-compiler", "IGC_Ubuntu22.04_llvm14_clang-" + igcdevver
"intel/intel-graphics-compiler", "IGC_Ubuntu24.04_llvm14_clang-" + igcdevver
)
return config

Expand Down
152 changes: 71 additions & 81 deletions sycl/include/sycl/detail/vector_arith.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,39 @@ struct UnaryPlus {
}
};

struct VecOperators {
// Tag to map/templatize the mixin for prefix/postfix inc/dec operators.
struct IncDec {};

template <typename SelfOperandTy> struct IncDecImpl {
using element_type = typename from_incomplete<SelfOperandTy>::element_type;
using vec_t = simplify_if_swizzle_t<std::remove_const_t<SelfOperandTy>>;

public:
friend SelfOperandTy &operator++(SelfOperandTy &x) {
x += element_type{1};
return x;
}
friend SelfOperandTy &operator--(SelfOperandTy &x) {
x -= element_type{1};
return x;
}
friend auto operator++(SelfOperandTy &x, int) {
vec_t tmp{x};
x += element_type{1};
return tmp;
}
friend auto operator--(SelfOperandTy &x, int) {
vec_t tmp{x};
x -= element_type{1};
return tmp;
}
};

template <typename Self> struct VecOperators {
static_assert(is_vec_v<Self>);

template <typename OpTy, typename... ArgTys>
static constexpr auto apply(const ArgTys &...Args) {
using Self = nth_type_t<0, ArgTys...>;
static_assert(is_vec_v<Self>);
static_assert(((std::is_same_v<Self, ArgTys> && ...)));

using element_type = typename Self::element_type;
Expand Down Expand Up @@ -163,6 +191,41 @@ struct VecOperators {
res[i] = Op(Args[i]...);
return res;
}

// Uglier than possible due to
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282.
template <typename Op, typename = void> struct OpMixin;

template <typename Op>
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, IncDec>>>
: public IncDecImpl<Self> {};

#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR) \
template <typename Op> \
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, OP>>> { \
friend auto operator OPERATOR(const Self &v) { return apply<OP>(v); } \
};

__SYCL_VEC_UOP_MIXIN(std::negate<void>, -)
__SYCL_VEC_UOP_MIXIN(std::logical_not<void>, !)
__SYCL_VEC_UOP_MIXIN(UnaryPlus, +)

template <typename Op>
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, std::bit_not<void>>>> {
template <typename T = typename from_incomplete<Self>::element_type>
friend std::enable_if_t<!is_vgenfloat_v<T>, Self> operator~(const Self &v) {
return apply<std::bit_not<void>>(v);
}
};

#undef __SYCL_VEC_UOP_MIXIN

template <typename... Op>
struct __SYCL_EBO CombineImpl : public OpMixin<Op>... {};

struct Combined
: public CombineImpl<std::negate<void>, std::logical_not<void>,
std::bit_not<void>, UnaryPlus, IncDec> {};
};

// Macros to populate binary operation on sycl::vec.
Expand All @@ -174,7 +237,7 @@ struct VecOperators {
template <typename T = DataT> \
friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \
const vec_t & Rhs) { \
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
} \
\
template <typename T = DataT> \
Expand All @@ -200,65 +263,11 @@ struct VecOperators {
return Lhs; \
}

/****************************************************************
* vec_arith_common
* / | \
* / | \
* vec_arith<int> vec_arith<float> ... vec_arith<byte>
* \ | /
* \ | /
* sycl::vec<T>
*
* vec_arith_common is the base class for vec_arith. It contains
* the common math operators of sycl::vec for all types.
* vec_arith is the derived class that contains the math operators
* specialized for certain types. sycl::vec inherits from vec_arith.
* *************************************************************/
template <typename DataT, int NumElements> class vec_arith_common;
template <typename DataT> struct vec_helper;

template <typename DataT, int NumElements>
class vec_arith : public vec_arith_common<DataT, NumElements> {
class vec_arith : public VecOperators<vec<DataT, NumElements>>::Combined {
protected:
using vec_t = vec<DataT, NumElements>;
using ocl_t = detail::fixed_width_signed<sizeof(DataT)>;
template <typename T> using vec_data = vec_helper<T>;

// operator!.
friend vec<ocl_t, NumElements> operator!(const vec_t &Rhs) {
return VecOperators::apply<std::logical_not<void>>(Rhs);
}

// operator +.
friend vec_t operator+(const vec_t &Lhs) {
return VecOperators::apply<UnaryPlus>(Lhs);
}

// operator -.
friend vec_t operator-(const vec_t &Lhs) {
return VecOperators::apply<std::negate<void>>(Lhs);
}

// Unary operations on sycl::vec
// FIXME: Don't allow Unary operators on vec<bool> after
// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed.
#ifdef __SYCL_UOP
#error "Undefine __SYCL_UOP macro"
#endif
#define __SYCL_UOP(UOP, OPASSIGN) \
friend vec_t &operator UOP(vec_t & Rhs) { \
Rhs OPASSIGN DataT{1}; \
return Rhs; \
} \
friend vec_t operator UOP(vec_t &Lhs, int) { \
vec_t Ret(Lhs); \
Lhs OPASSIGN DataT{1}; \
return Ret; \
}

__SYCL_UOP(++, +=)
__SYCL_UOP(--, -=)
#undef __SYCL_UOP

// The logical operations on scalar types results in 0/1, while for vec<>,
// logical operations should result in 0 and -1 (similar to OpenCL vectors).
Expand All @@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
template <typename T = DataT> \
friend std::enable_if_t<(COND), vec<ocl_t, NumElements>> operator RELLOGOP( \
const vec_t & Lhs, const vec_t & Rhs) { \
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
} \
\
template <typename T = DataT> \
Expand Down Expand Up @@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
template <int NumElements>
class vec_arith<std::byte, NumElements>
: public vec_arith_common<std::byte, NumElements> {
: public VecOperators<vec<std::byte, NumElements>>::template OpMixin<
std::bit_not<void>> {
protected:
// NumElements can never be zero. Still using the redundant check to avoid
// incomplete type errors.
using DataT = typename std::conditional_t<NumElements == 0, int, std::byte>;
using vec_t = vec<DataT, NumElements>;
template <typename T> using vec_data = vec_helper<T>;

// Special <<, >> operators for std::byte.
// std::byte is not an arithmetic type and it only supports the following
Expand Down Expand Up @@ -376,25 +385,6 @@ class vec_arith<std::byte, NumElements>
};
#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)

template <typename DataT, int NumElements> class vec_arith_common {
protected:
using vec_t = vec<DataT, NumElements>;

static constexpr bool IsBfloat16 =
std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;

// operator~() available only when: dataT != float && dataT != double
// && dataT != half
template <typename T = DataT>
friend std::enable_if_t<!detail::is_vgenfloat_v<T>, vec_t>
operator~(const vec_t &Rhs) {
return VecOperators::apply<std::bit_not<void>>(Rhs);
}

// friends
template <typename T1, int T2> friend class __SYCL_EBO vec;
};

#undef __SYCL_BINOP

} // namespace detail
Expand Down
1 change: 0 additions & 1 deletion sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,6 @@ class __SYCL_EBO vec
template <typename T1, int T2> friend class __SYCL_EBO vec;
// To allow arithmetic operators access private members of vec.
template <typename T1, int T2> friend class detail::vec_arith;
template <typename T1, int T2> friend class detail::vec_arith_common;
};
///////////////////////// class sycl::vec /////////////////////////

Expand Down
3 changes: 3 additions & 0 deletions sycl/test-e2e/AddressSanitizer/lit.local.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ config.unsupported_features += ['gpu-intel-gen12']

# CMPLRLLVM-64052
config.unsupported_features += ['spirv-backend']

# https://github.com/intel/llvm/issues/16920
config.unsupported_features += ['arch-intel_gpu_bmg_g21']
2 changes: 1 addition & 1 deletion sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{run} %t.out

// NOTE: Tests fetch_add for acquire and release memory ordering.
Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
// RUN: %{run} %t.out
// UNSUPPORTED: arch-intel_gpu_bmg_g21
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16924

#include "atomic_memory_order.h"
#include <cmath>
Expand Down
4 changes: 2 additions & 2 deletions sycl/test-e2e/BFloat16/bfloat16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
// + below sm_80 always uses generic impls

// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}
// RUN: %clangxx -fsycl %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags}
// RUN: %clangxx -fsycl %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags}
// RUN: %{run} %t.out

// Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA )
// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %}
// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %}
// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %}

// Flaky timeout on CPU. Enable when fixed.
Expand Down
4 changes: 2 additions & 2 deletions sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}

// If CUDA, test "new" again for sm_75/generic
// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %}
// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %}
// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %}
// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %}

#include "bfloat16_builtins.hpp"

Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/BFloat16/bfloat16_type.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %}
// RUN: %if target-nvidia %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %}
// RUN: %if cuda %{ %{run} %t.cuda.out %}
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: accelerator
// REQUIRES: target-spir, accelerator
// https://github.com/intel/llvm/issues/14308
// Temporarily re-enabled for testing purposes.
// If you are facing issues with this test, please leave a comment in:
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: accelerator
// REQUIRES: target-spir, accelerator
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
//==-------- fpga_pipes_legacy_ns.cpp - SYCL FPGA pipes test ---------------==//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: accelerator
// REQUIRES: target-spir, accelerator
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

Expand Down
8 changes: 4 additions & 4 deletions sycl/test-e2e/Basic/interop/interop_all_backends.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// XFAIL: any-device-is-cuda
// XFAIL: target-nvidia
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16070
// RUN: %if any-device-is-opencl %{ %{build} -o %t-opencl.out %}
// RUN: %if any-device-is-cuda %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %}
// RUN: %if any-device-is-hip %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %}
// RUN: %if target-spir %{ %{build} -o %t-opencl.out %}
// RUN: %if target-nvidia %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %}
// RUN: %if target-amd %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %}

#include <sycl/backend.hpp>
#include <sycl/detail/core.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/multisource_spv_obj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: cuda || hip
// UNSUPPORTED: target-nvidia || target-amd
//
// Separate kernel sources and host code sources
// RUN: %{build} -fsycl-device-obj=spirv -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: accelerator
// REQUIRES: target-spir, accelerator

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Tests whether or not cluster launch was successful, with the correct ranges
// that were passed via enqueue functions extension
// REQUIRES: aspect-ext_oneapi_cuda_cluster_group
// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group
// RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out
// RUN: %{run} %t.out

Expand Down
Loading

0 comments on commit 90094f5

Please sign in to comment.