Skip to content

Commit

Permalink
Refactored simd reductions; removed signed shift left tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ldh4 committed Sep 17, 2024
1 parent 63d191e commit 4f3871c
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 75 deletions.
121 changes: 73 additions & 48 deletions simd/src/Kokkos_SIMD_Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ namespace Kokkos {

namespace Experimental {

namespace simd_abi {
class scalar;
}

template <class T, class Abi>
class simd;

Expand Down Expand Up @@ -134,7 +138,7 @@ template <class T>

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator+(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator+(
Experimental::simd<T, Abi> const& lhs, U rhs) {
using result_member = decltype(lhs[0] + rhs);
return Experimental::simd<result_member, Abi>(lhs) +
Expand All @@ -143,16 +147,18 @@ template <class T, class U, class Abi,

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator+(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator+(
U lhs, Experimental::simd<T, Abi> const& rhs) {
using result_member = decltype(lhs + rhs[0]);
return Experimental::simd<result_member, Abi>(lhs) +
Experimental::simd<result_member, Abi>(rhs);
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator+=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator+=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs + std::forward<U>(rhs);
return lhs;
}
Expand All @@ -166,7 +172,7 @@ KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator+=(

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator-(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator-(
Experimental::simd<T, Abi> const& lhs, U rhs) {
using result_member = decltype(lhs[0] - rhs);
return Experimental::simd<result_member, Abi>(lhs) -
Expand All @@ -175,30 +181,32 @@ template <class T, class U, class Abi,

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator-(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator-(
U lhs, Experimental::simd<T, Abi> const& rhs) {
using result_member = decltype(lhs - rhs[0]);
return Experimental::simd<result_member, Abi>(lhs) -
Experimental::simd<result_member, Abi>(rhs);
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator-=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator-=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs - std::forward<U>(rhs);
return lhs;
}

template <class M, class T, class U>
KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator-=(
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION where_expression<M, T>& operator-=(
where_expression<M, T>& lhs, U&& rhs) {
lhs = lhs.value() - std::forward<U>(rhs);
return lhs;
}

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator*(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator*(
Experimental::simd<T, Abi> const& lhs, U rhs) {
using result_member = decltype(lhs[0] * rhs);
return Experimental::simd<result_member, Abi>(lhs) *
Expand All @@ -207,30 +215,32 @@ template <class T, class U, class Abi,

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator*(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator*(
U lhs, Experimental::simd<T, Abi> const& rhs) {
using result_member = decltype(lhs * rhs[0]);
return Experimental::simd<result_member, Abi>(lhs) *
Experimental::simd<result_member, Abi>(rhs);
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator*=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator*=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs * std::forward<U>(rhs);
return lhs;
}

template <class M, class T, class U>
KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator*=(
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION where_expression<M, T>& operator*=(
where_expression<M, T>& lhs, U&& rhs) {
lhs = lhs.value() * std::forward<U>(rhs);
return lhs;
}

template <class T, class Abi,
std::enable_if_t<std::is_integral_v<T>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator/(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator/(
Experimental::simd<T, Abi> const& lhs,
Experimental::simd<T, Abi> const& rhs) {
return Experimental::simd<T, Abi>(
Expand All @@ -239,7 +249,7 @@ template <class T, class Abi,

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator/(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator/(
Experimental::simd<T, Abi> const& lhs, U rhs) {
using result_member = decltype(lhs[0] / rhs);
return Experimental::simd<result_member, Abi>(lhs) /
Expand All @@ -248,37 +258,43 @@ template <class T, class U, class Abi,

template <class T, class U, class Abi,
std::enable_if_t<std::is_arithmetic_v<U>, bool> = false>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator/(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION auto operator/(
U lhs, Experimental::simd<T, Abi> const& rhs) {
using result_member = decltype(lhs / rhs[0]);
return Experimental::simd<result_member, Abi>(lhs) /
Experimental::simd<result_member, Abi>(rhs);
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator/=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator/=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs / std::forward<U>(rhs);
return lhs;
}

template <class M, class T, class U>
KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator/=(
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION where_expression<M, T>& operator/=(
where_expression<M, T>& lhs, U&& rhs) {
lhs = lhs.value() / std::forward<U>(rhs);
return lhs;
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator>>=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator>>=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs >> std::forward<U>(rhs);
return lhs;
}

template <class T, class U, class Abi>
KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator<<=(simd<T, Abi>& lhs,
U&& rhs) {
template <
class T, class U, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi>& operator<<=(
simd<T, Abi>& lhs, U&& rhs) {
lhs = lhs << std::forward<U>(rhs);
return lhs;
}
Expand Down Expand Up @@ -332,83 +348,92 @@ template <typename T>
return Kokkos::round(x);
}

// fallback implementations of simd reductions:

// common implementations of host only simd reductions:
template <class T, class Abi, class BinaryOperation = std::plus<>>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, BinaryOperation binary_op = {}) {
auto v = where(true, x);
return reduce(v, binary_op);
}

template <class T, class Abi, class BinaryOperation>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
T identity_element, BinaryOperation binary_op) {
if (none_of(mask)) return identity_element;
auto v = where(mask, x);
return reduce(v, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::plus<> binary_op = {}) noexcept {
return reduce(x, mask, T(0), binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::multiplies<> binary_op) noexcept {
return reduce(x, mask, T(0), binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_and<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_or<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_xor<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return reduce_min(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
return reduce_min(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return reduce_max(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
Expand Down
20 changes: 16 additions & 4 deletions simd/src/Kokkos_SIMD_Common_Math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ namespace Kokkos {

namespace Experimental {

namespace simd_abi {
class scalar;
}

template <class T, class Abi>
class simd;

Expand Down Expand Up @@ -58,7 +62,9 @@ hmax(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
}
#endif

template <typename T, typename Abi>
template <
typename T, typename Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce_min(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
Expand All @@ -70,7 +76,9 @@ reduce_min(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
return result;
}

template <class T, class Abi>
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce_max(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
Expand All @@ -82,7 +90,9 @@ reduce_max(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
return result;
}

template <class T, class Abi, class BinaryOperation = std::plus<>>
template <
class T, class Abi, class BinaryOperation = std::plus<>,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x,
BinaryOperation op = {}) {
Expand All @@ -95,7 +105,9 @@ reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x,
return result;
}

template <class T, class Abi>
template <
class T, class Abi,
std::enable_if_t<!std::is_same_v<Abi, simd_abi::scalar>, bool> = false>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x, T,
std::plus<>) {
Expand Down
Loading

0 comments on commit 4f3871c

Please sign in to comment.