Skip to content

Commit

Permalink
Expanded simd reduction operation
Browse files Browse the repository at this point in the history
Only fallback impls
  • Loading branch information
ldh4 committed Dec 20, 2023
1 parent 317e84a commit 1e1202d
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 88 deletions.
83 changes: 83 additions & 0 deletions simd/src/Kokkos_SIMD_Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,89 @@ template <typename T>
return Kokkos::round(x);
}

// fallback implementations of simd reductions:

template <class T, class Abi, class BinaryOperation = std::plus<>>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, BinaryOperation binary_op = {}) {
auto v = where(true, x);
return reduce(v, binary_op);
}

template <class T, class Abi, class BinaryOperation>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
T identity_element, BinaryOperation binary_op) {
if (none_of(mask)) return identity_element;
auto v = where(mask, x);
return reduce(v, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::plus<> binary_op = {}) noexcept {
return reduce(x, mask, T(0), binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::multiplies<> binary_op) noexcept {
return reduce(x, mask, T(0), binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_and<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_or<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce(
const simd<T, Abi>& x, const typename simd<T, Abi>::mask_type& mask,
std::bit_xor<> binary_op) noexcept {
return reduce(x, mask, 0, binary_op);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return hmin(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
return hmin(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return hmax(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
return hmax(v);
}

} // namespace Experimental
} // namespace Kokkos

Expand Down
19 changes: 13 additions & 6 deletions simd/src/Kokkos_SIMD_Common_Math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,26 @@ hmax(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
return result;
}

template <class T, class Abi>
template <class T, class Abi, class BinaryOperation = std::plus<>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x, T,
std::plus<>) {
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x,
BinaryOperation op = {}) {
auto const& v = x.impl_get_value();
auto const& m = x.impl_get_mask();
auto result = Kokkos::reduction_identity<T>::sum();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result += v[i];
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = op(result, v[i]);
}
return result;
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x, T,
std::plus<>) {
return reduce(x, std::plus<>());
}

} // namespace Experimental

template <class T, class Abi>
Expand Down
221 changes: 147 additions & 74 deletions simd/unit_tests/include/SIMDTesting_Ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,35 +331,38 @@ class log_op {

class hmin {
public:
template <typename T>
auto on_host(T const& a) const {
return Kokkos::Experimental::hmin(a);
}
template <typename T>
auto on_host_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::min();
for (std::size_t i = 0; i < v.size(); ++i) {
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::hmin(w);
}
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = Kokkos::min(result, v[i]);
}
return result;
}

template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
return Kokkos::Experimental::hmin(a);
}
template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::min();
for (std::size_t i = 0; i < v.size(); ++i) {
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::hmin(w);
}
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = Kokkos::min(result, v[i]);
}
return result;
Expand All @@ -368,77 +371,147 @@ class hmin {

class hmax {
public:
template <typename T>
auto on_host(T const& a) const {
return Kokkos::Experimental::hmax(a);
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::hmax(w);
}
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = Kokkos::max(result, v[i]);
}
return result;
}
template <typename T>
auto on_host_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::max();
for (std::size_t i = 0; i < v.size(); ++i) {
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::hmax(w);
}
template <typename T, typename MaskType = bool>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask = true) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = Kokkos::max(result, v[i]);
}
return result;
}
};

template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
return Kokkos::Experimental::hmax(a);
template <typename BinaryOperation = std::plus<>>
class reduce_where_expr {
public:
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::reduce(w, BinaryOperation());
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a, MaskType mask) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = BinaryOperation()(result, v[i]);
}
return result;
}
template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::max();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result = Kokkos::max(result, v[i]);
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a, MaskType mask) const {
auto w = Kokkos::Experimental::where(mask, a);
return Kokkos::Experimental::reduce(w, BinaryOperation());
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask) const {
auto w = Kokkos::Experimental::where(mask, a);
auto const& v = w.impl_get_value();
auto const& m = w.impl_get_mask();
auto result = v[0];
for (std::size_t i = 1; i < v.size(); ++i) {
if (m[i]) result = BinaryOperation()(result, v[i]);
}
return result;
}
};

class reduce {
class reduce_min {
public:
template <typename T>
auto on_host(T const& a) const {
using DataType = typename T::value_type::value_type;
return Kokkos::Experimental::reduce(a, DataType(0), std::plus<>());
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce_min(a, mask);
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a, MaskType mask) const {
return hmin().on_host_serial(a, mask);
}
template <typename T>
auto on_host_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::sum();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result += v[i];
}
return result;
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce_min(a, mask);
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask) const {
return hmin().on_device_serial(a, mask);
}
};

template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a) const {
using DataType = typename T::value_type::value_type;
return Kokkos::Experimental::reduce(a, DataType(0), std::plus<>());
class reduce_max {
public:
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce_max(a, mask);
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a, MaskType mask) const {
return hmax().on_host_serial(a, mask);
}
template <typename T>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a) const {
using DataType = typename T::value_type::value_type;

auto const& v = a.impl_get_value();
auto const& m = a.impl_get_mask();
auto result = Kokkos::reduction_identity<DataType>::sum();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result += v[i];
}
return result;
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce_max(a, mask);
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask) const {
return hmax().on_device_serial(a, mask);
}
};

template <typename BinaryOperation = std::plus<>>
class reduce {
public:
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce(a, mask, BinaryOperation());
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_host_serial(T const& a, MaskType mask) const {
return reduce_where_expr<BinaryOperation>().on_host_serial(a, mask);
}

template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device(T const& a, MaskType mask) const {
return Kokkos::Experimental::reduce(a, mask, BinaryOperation());
}
template <typename T, typename MaskType>
KOKKOS_INLINE_FUNCTION auto on_device_serial(T const& a,
MaskType mask) const {
return reduce_where_expr<BinaryOperation>().on_device_serial(a, mask);
}
};

Expand Down
Loading

0 comments on commit 1e1202d

Please sign in to comment.