@@ -121,7 +121,8 @@ __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc) {
121
121
// / any element's memory location can be disabled via the input vector of
122
122
// / predicates (mask).
123
123
// / @tparam Tx Element type, must be of size 4 or less.
124
- // / @tparam N Number of elements to read; can be \c 8, \c 16 or \c 32.
124
+ // / @tparam N Number of elements to read; can be \c 1, \c 2, \c 4, \c 8, \c 16
125
+ // / or \c 32.
125
126
// / @param p The base address.
126
127
// / @param offsets the vector of 32-bit offsets in bytes. For each lane \c i,
127
128
// / ((byte*)p + offsets[i]) must be element size aligned.
@@ -130,7 +131,7 @@ __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc) {
130
131
// / undefined.
131
132
// /
132
133
template <typename Tx, int N, class T = detail::__raw_t <Tx>>
133
- __ESIMD_API std::enable_if_t <N == 8 || N == 16 || N == 32 , simd<Tx, N>>
134
+ __ESIMD_API std::enable_if_t <detail::isPowerOf2(N, 32 ) , simd<Tx, N>>
134
135
gather (const Tx *p, simd<uint32_t , N> offsets, simd_mask<N> mask = 1 ) {
135
136
simd<uint64_t , N> offsets_i = convert<uint64_t >(offsets);
136
137
simd<uint64_t , N> addrs (reinterpret_cast <uint64_t >(p));
@@ -154,15 +155,16 @@ gather(const Tx *p, simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
154
155
// / value of the corresponding element in the input offset vector. Access to
155
156
// / any element's memory location can be disabled via the input mask.
156
157
// / @tparam Tx Element type, must be of size 4 or less.
157
- // / @tparam N Number of elements to write; can be \c 8, \c 16 or \c 32.
158
+ // / @tparam N Number of elements to write; can be \c 1, \c 2, \c 4, \c 8, \c 16
159
+ // / or \c 32.
158
160
// / @param p The base address.
159
161
// / @param offsets A vector of 32-bit offsets in bytes. For each lane \c i,
160
162
// / ((byte*)p + offsets[i]) must be element size aligned.
161
163
// / @param vals The vector to scatter.
162
164
// / @param mask The access mask, defaults to all 1s.
163
165
// /
164
166
template <typename Tx, int N, class T = detail::__raw_t <Tx>>
165
- __ESIMD_API std::enable_if_t <N == 8 || N == 16 || N == 32 >
167
+ __ESIMD_API std::enable_if_t <detail::isPowerOf2(N, 32 ) >
166
168
scatter (Tx *p, simd<uint32_t , N> offsets, simd<Tx, N> vals,
167
169
simd_mask<N> mask = 1 ) {
168
170
simd<uint64_t , N> offsets_i = convert<uint64_t >(offsets);
0 commit comments