Skip to content

Commit 36ce10e

Browse files
[NFCI][SYCL] Split vec's unary ops into individual mixins (#16946)
We don't want constraints on non-template hidden friend operators to be implemented by templatizing it. It would be trivial with C++20 concepts, but we're limited to C++17. The proposed class hierarchy for `vecto_arith` helper had intended that, but implementation was only partial. We want to fix that while implementing the proposed specification changes to the vec/swizzle so setup infrastructure to do so now, to minimize the amount of "preview-breaking-changes" customizations later. The idea is to split each operator into an individual unrestrained mixin, so that the constraints could be implemented on top of it via `detail::ApplyIf` helper. This particular PR only implements such a change for unary operators. The rest will be implemented in a subsequent change to ease review process.
1 parent 50f0552 commit 36ce10e

File tree

5 files changed

+124
-135
lines changed

5 files changed

+124
-135
lines changed

sycl/include/sycl/detail/vector_arith.hpp

+71-81
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,39 @@ struct UnaryPlus {
5959
}
6060
};
6161

62-
struct VecOperators {
62+
// Tag to map/templatize the mixin for prefix/postfix inc/dec operators.
63+
struct IncDec {};
64+
65+
template <typename SelfOperandTy> struct IncDecImpl {
66+
using element_type = typename from_incomplete<SelfOperandTy>::element_type;
67+
using vec_t = simplify_if_swizzle_t<std::remove_const_t<SelfOperandTy>>;
68+
69+
public:
70+
friend SelfOperandTy &operator++(SelfOperandTy &x) {
71+
x += element_type{1};
72+
return x;
73+
}
74+
friend SelfOperandTy &operator--(SelfOperandTy &x) {
75+
x -= element_type{1};
76+
return x;
77+
}
78+
friend auto operator++(SelfOperandTy &x, int) {
79+
vec_t tmp{x};
80+
x += element_type{1};
81+
return tmp;
82+
}
83+
friend auto operator--(SelfOperandTy &x, int) {
84+
vec_t tmp{x};
85+
x -= element_type{1};
86+
return tmp;
87+
}
88+
};
89+
90+
template <typename Self> struct VecOperators {
91+
static_assert(is_vec_v<Self>);
92+
6393
template <typename OpTy, typename... ArgTys>
6494
static constexpr auto apply(const ArgTys &...Args) {
65-
using Self = nth_type_t<0, ArgTys...>;
66-
static_assert(is_vec_v<Self>);
6795
static_assert(((std::is_same_v<Self, ArgTys> && ...)));
6896

6997
using element_type = typename Self::element_type;
@@ -163,6 +191,41 @@ struct VecOperators {
163191
res[i] = Op(Args[i]...);
164192
return res;
165193
}
194+
195+
// Uglier than possible due to
196+
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282.
197+
template <typename Op, typename = void> struct OpMixin;
198+
199+
template <typename Op>
200+
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, IncDec>>>
201+
: public IncDecImpl<Self> {};
202+
203+
#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR) \
204+
template <typename Op> \
205+
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, OP>>> { \
206+
friend auto operator OPERATOR(const Self &v) { return apply<OP>(v); } \
207+
};
208+
209+
__SYCL_VEC_UOP_MIXIN(std::negate<void>, -)
210+
__SYCL_VEC_UOP_MIXIN(std::logical_not<void>, !)
211+
__SYCL_VEC_UOP_MIXIN(UnaryPlus, +)
212+
213+
template <typename Op>
214+
struct OpMixin<Op, std::enable_if_t<std::is_same_v<Op, std::bit_not<void>>>> {
215+
template <typename T = typename from_incomplete<Self>::element_type>
216+
friend std::enable_if_t<!is_vgenfloat_v<T>, Self> operator~(const Self &v) {
217+
return apply<std::bit_not<void>>(v);
218+
}
219+
};
220+
221+
#undef __SYCL_VEC_UOP_MIXIN
222+
223+
template <typename... Op>
224+
struct __SYCL_EBO CombineImpl : public OpMixin<Op>... {};
225+
226+
struct Combined
227+
: public CombineImpl<std::negate<void>, std::logical_not<void>,
228+
std::bit_not<void>, UnaryPlus, IncDec> {};
166229
};
167230

168231
// Macros to populate binary operation on sycl::vec.
@@ -174,7 +237,7 @@ struct VecOperators {
174237
template <typename T = DataT> \
175238
friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \
176239
const vec_t & Rhs) { \
177-
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
240+
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
178241
} \
179242
\
180243
template <typename T = DataT> \
@@ -200,65 +263,11 @@ struct VecOperators {
200263
return Lhs; \
201264
}
202265

203-
/****************************************************************
204-
* vec_arith_common
205-
* / | \
206-
* / | \
207-
* vec_arith<int> vec_arith<float> ... vec_arith<byte>
208-
* \ | /
209-
* \ | /
210-
* sycl::vec<T>
211-
*
212-
* vec_arith_common is the base class for vec_arith. It contains
213-
* the common math operators of sycl::vec for all types.
214-
* vec_arith is the derived class that contains the math operators
215-
* specialized for certain types. sycl::vec inherits from vec_arith.
216-
* *************************************************************/
217-
template <typename DataT, int NumElements> class vec_arith_common;
218-
template <typename DataT> struct vec_helper;
219-
220266
template <typename DataT, int NumElements>
221-
class vec_arith : public vec_arith_common<DataT, NumElements> {
267+
class vec_arith : public VecOperators<vec<DataT, NumElements>>::Combined {
222268
protected:
223269
using vec_t = vec<DataT, NumElements>;
224270
using ocl_t = detail::fixed_width_signed<sizeof(DataT)>;
225-
template <typename T> using vec_data = vec_helper<T>;
226-
227-
// operator!.
228-
friend vec<ocl_t, NumElements> operator!(const vec_t &Rhs) {
229-
return VecOperators::apply<std::logical_not<void>>(Rhs);
230-
}
231-
232-
// operator +.
233-
friend vec_t operator+(const vec_t &Lhs) {
234-
return VecOperators::apply<UnaryPlus>(Lhs);
235-
}
236-
237-
// operator -.
238-
friend vec_t operator-(const vec_t &Lhs) {
239-
return VecOperators::apply<std::negate<void>>(Lhs);
240-
}
241-
242-
// Unary operations on sycl::vec
243-
// FIXME: Don't allow Unary operators on vec<bool> after
244-
// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed.
245-
#ifdef __SYCL_UOP
246-
#error "Undefine __SYCL_UOP macro"
247-
#endif
248-
#define __SYCL_UOP(UOP, OPASSIGN) \
249-
friend vec_t &operator UOP(vec_t & Rhs) { \
250-
Rhs OPASSIGN DataT{1}; \
251-
return Rhs; \
252-
} \
253-
friend vec_t operator UOP(vec_t &Lhs, int) { \
254-
vec_t Ret(Lhs); \
255-
Lhs OPASSIGN DataT{1}; \
256-
return Ret; \
257-
}
258-
259-
__SYCL_UOP(++, +=)
260-
__SYCL_UOP(--, -=)
261-
#undef __SYCL_UOP
262271

263272
// The logical operations on scalar types results in 0/1, while for vec<>,
264273
// logical operations should result in 0 and -1 (similar to OpenCL vectors).
@@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
272281
template <typename T = DataT> \
273282
friend std::enable_if_t<(COND), vec<ocl_t, NumElements>> operator RELLOGOP( \
274283
const vec_t & Lhs, const vec_t & Rhs) { \
275-
return VecOperators::apply<FUNCTOR>(Lhs, Rhs); \
284+
return VecOperators<vec_t>::template apply<FUNCTOR>(Lhs, Rhs); \
276285
} \
277286
\
278287
template <typename T = DataT> \
@@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
325334
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
326335
template <int NumElements>
327336
class vec_arith<std::byte, NumElements>
328-
: public vec_arith_common<std::byte, NumElements> {
337+
: public VecOperators<vec<std::byte, NumElements>>::template OpMixin<
338+
std::bit_not<void>> {
329339
protected:
330340
// NumElements can never be zero. Still using the redundant check to avoid
331341
// incomplete type errors.
332342
using DataT = typename std::conditional_t<NumElements == 0, int, std::byte>;
333343
using vec_t = vec<DataT, NumElements>;
334-
template <typename T> using vec_data = vec_helper<T>;
335344

336345
// Special <<, >> operators for std::byte.
337346
// std::byte is not an arithmetic type and it only supports the following
@@ -376,25 +385,6 @@ class vec_arith<std::byte, NumElements>
376385
};
377386
#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
378387

379-
template <typename DataT, int NumElements> class vec_arith_common {
380-
protected:
381-
using vec_t = vec<DataT, NumElements>;
382-
383-
static constexpr bool IsBfloat16 =
384-
std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;
385-
386-
// operator~() available only when: dataT != float && dataT != double
387-
// && dataT != half
388-
template <typename T = DataT>
389-
friend std::enable_if_t<!detail::is_vgenfloat_v<T>, vec_t>
390-
operator~(const vec_t &Rhs) {
391-
return VecOperators::apply<std::bit_not<void>>(Rhs);
392-
}
393-
394-
// friends
395-
template <typename T1, int T2> friend class __SYCL_EBO vec;
396-
};
397-
398388
#undef __SYCL_BINOP
399389

400390
} // namespace detail

sycl/include/sycl/vector.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,6 @@ class __SYCL_EBO vec
435435
template <typename T1, int T2> friend class __SYCL_EBO vec;
436436
// To allow arithmetic operators access private members of vec.
437437
template <typename T1, int T2> friend class detail::vec_arith;
438-
template <typename T1, int T2> friend class detail::vec_arith_common;
439438
};
440439
///////////////////////// class sycl::vec /////////////////////////
441440

sycl/test/check_device_code/vector/vector_bf16_builtins.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
6969
}
7070

7171
// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
72-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
72+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
7373
// CHECK-NEXT: entry:
7474
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
7575
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -87,7 +87,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
8787
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8888
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]]
8989
// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]]
90-
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]]
90+
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]]
9191
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]]
9292
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]]
9393
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]]
@@ -97,11 +97,11 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
9797
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9898
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]]
9999
// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]]
100-
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]]
100+
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]]
101101
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]]
102102
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]]
103-
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
104-
// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
103+
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
104+
// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
105105
// CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]]
106106
// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]])
107107
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]]
@@ -111,19 +111,19 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
111111
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
112112
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]]
113113
// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]]
114-
// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]]
114+
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]]
115115
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]]
116116
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]]
117-
// CHECK-NEXT: [[EXTRACTVEC4_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
118-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]]
117+
// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
118+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]]
119119
// CHECK-NEXT: ret void
120120
//
121121
SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
122122
return experimental::fmax(a, b);
123123
}
124124

125125
// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
126-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
126+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
127127
// CHECK-NEXT: entry:
128128
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
129129
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
@@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
149149
}
150150

151151
// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
152-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
152+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
153153
// CHECK-NEXT: entry:
154154
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
155155
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
185185
}
186186

187187
// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
188-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
188+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
189189
// CHECK-NEXT: entry:
190190
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
191191
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
@@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
221221
}
222222

223223
// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
224-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.58") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
224+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
225225
// CHECK-NEXT: entry:
226226
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
227227
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2

0 commit comments

Comments
 (0)