Skip to content

Commit

Permalink
Rebased and added reduce functions to newly added simd types
Browse files Browse the repository at this point in the history
  • Loading branch information
ldh4 committed Sep 17, 2024
1 parent 293f6f0 commit 959bf66
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 107 deletions.
10 changes: 10 additions & 0 deletions simd/src/Kokkos_SIMD_AVX2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1533,6 +1533,11 @@ class simd<std::int32_t, simd_abi::avx2_fixed_size<4>> {
gen(std::integral_constant<std::size_t, 2>()),
gen(std::integral_constant<std::size_t, 3>()))) {
}
template <typename FlagType>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
value_type const* ptr, FlagType flag) {
copy_from(ptr, flag);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
__m128i const& value_in)
: m_value(value_in) {}
Expand Down Expand Up @@ -1736,6 +1741,11 @@ class simd<std::int32_t, simd_abi::avx2_fixed_size<8>> {
gen(std::integral_constant<std::size_t, 5>()),
gen(std::integral_constant<std::size_t, 6>()),
gen(std::integral_constant<std::size_t, 7>()))) {}
template <typename FlagType>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
value_type const* ptr, FlagType flag) {
copy_from(ptr, flag);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
__m256i const& value_in)
: m_value(value_in) {}
Expand Down
215 changes: 203 additions & 12 deletions simd/src/Kokkos_SIMD_AVX512.hpp

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions simd/src/Kokkos_SIMD_Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,30 +389,30 @@ template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return hmin(v);
return reduce_min(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_min(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
return hmin(v);
return reduce_min(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x) noexcept {
auto v = where(true, x);
return hmax(v);
return reduce_max(v);
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr T reduce_max(
const simd<T, Abi>& x,
const typename simd<T, Abi>::mask_type& mask) noexcept {
auto v = where(mask, x);
return hmax(v);
return reduce_max(v);
}

} // namespace Experimental
Expand Down
28 changes: 26 additions & 2 deletions simd/src/Kokkos_SIMD_Common_Math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ template <class M, class T>
class const_where_expression;

template <typename T, typename Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
[[nodiscard]] KOKKOS_DEPRECATED KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
hmin(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
auto const& m = x.impl_get_mask();
Expand All @@ -45,7 +45,7 @@ hmin(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
[[nodiscard]] KOKKOS_DEPRECATED KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
hmax(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
auto const& m = x.impl_get_mask();
Expand All @@ -56,6 +56,30 @@ hmax(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
return result;
}

template <typename T, typename Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce_min(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
auto const& m = x.impl_get_mask();
auto result = Kokkos::reduction_identity<T>::min();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result = Kokkos::min(result, v[i]);
}
return result;
}

template <class T, class Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce_max(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x) {
auto const& v = x.impl_get_value();
auto const& m = x.impl_get_mask();
auto result = Kokkos::reduction_identity<T>::max();
for (std::size_t i = 0; i < v.size(); ++i) {
if (m[i]) result = Kokkos::max(result, v[i]);
}
return result;
}

template <class T, class Abi, class BinaryOperation = std::plus<>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION T
reduce(const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> const& x,
Expand Down
177 changes: 177 additions & 0 deletions simd/src/Kokkos_SIMD_NEON.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,11 @@ class simd<float, simd_abi::neon_fixed_size<4>> {
m_value = vsetq_lane_f32(gen(std::integral_constant<std::size_t, 3>()),
m_value, 3);
}
template <typename FlagType>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
value_type const* ptr, FlagType flag) {
copy_from(ptr, flag);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
float32x4_t const& value_in)
: m_value(value_in) {}
Expand Down Expand Up @@ -1472,6 +1477,11 @@ class simd<std::int32_t, simd_abi::neon_fixed_size<4>> {
m_value = vsetq_lane_s32(gen(std::integral_constant<std::size_t, 3>()),
m_value, 3);
}
template <typename FlagType>
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
value_type const* ptr, FlagType flag) {
copy_from(ptr, flag);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd(
int32x4_t const& value_in)
: m_value(value_in) {}
Expand Down Expand Up @@ -2712,6 +2722,173 @@ class where_expression<simd_mask<std::uint64_t, simd_abi::neon_fixed_size<2>>,
}
};

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce_min(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<2>>,
simd<std::int32_t, simd_abi::neon_fixed_size<2>>> const& x) {
return vminv_s32(static_cast<int32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce_max(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<2>>,
simd<std::int32_t, simd_abi::neon_fixed_size<2>>> const& x) {
return vmaxv_s32(static_cast<int32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<2>>,
simd<std::int32_t, simd_abi::neon_fixed_size<2>>> const& x,
std::int32_t, std::plus<>) {
return vaddv_s32(static_cast<int32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce_min(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<4>>,
simd<std::int32_t, simd_abi::neon_fixed_size<4>>> const& x) {
return vminvq_s32(static_cast<int32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce_max(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<4>>,
simd<std::int32_t, simd_abi::neon_fixed_size<4>>> const& x) {
return vmaxvq_s32(static_cast<int32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t reduce(
const_where_expression<
simd_mask<std::int32_t, simd_abi::neon_fixed_size<4>>,
simd<std::int32_t, simd_abi::neon_fixed_size<4>>> const& x,
std::int32_t, std::plus<>) {
return vaddvq_s32(static_cast<int32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce_min(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<2>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<2>>> const& x) {
return vminv_u32(static_cast<uint32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce_max(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<2>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<2>>> const& x) {
return vmaxv_u32(static_cast<uint32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<2>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<2>>> const& x,
std::uint32_t, std::plus<>) {
return vaddv_u32(static_cast<uint32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce_min(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<4>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<4>>> const& x) {
return vminvq_u32(static_cast<uint32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce_max(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<4>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<4>>> const& x) {
return vmaxvq_u32(static_cast<uint32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint32_t reduce(
const_where_expression<
simd_mask<std::uint32_t, simd_abi::neon_fixed_size<4>>,
simd<std::uint32_t, simd_abi::neon_fixed_size<4>>> const& x,
std::uint32_t, std::plus<>) {
return vaddvq_u32(static_cast<uint32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int64_t reduce(
const_where_expression<
simd_mask<std::int64_t, simd_abi::neon_fixed_size<2>>,
simd<std::int64_t, simd_abi::neon_fixed_size<2>>> const& x,
std::int64_t, std::plus<>) {
return vaddvq_s64(static_cast<int64x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::uint64_t reduce(
const_where_expression<
simd_mask<std::uint64_t, simd_abi::neon_fixed_size<2>>,
simd<std::uint64_t, simd_abi::neon_fixed_size<2>>> const& x,
std::uint64_t, std::plus<>) {
return vaddvq_u64(static_cast<uint64x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double reduce_min(
const_where_expression<simd_mask<double, simd_abi::neon_fixed_size<2>>,
simd<double, simd_abi::neon_fixed_size<2>>> const&
x) {
return vminvq_f64(static_cast<float64x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double reduce_max(
const_where_expression<simd_mask<double, simd_abi::neon_fixed_size<2>>,
simd<double, simd_abi::neon_fixed_size<2>>> const&
x) {
return vmaxvq_f64(static_cast<float64x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double reduce(
const_where_expression<simd_mask<double, simd_abi::neon_fixed_size<2>>,
simd<double, simd_abi::neon_fixed_size<2>>> const& x,
double, std::plus<>) {
return vaddvq_f64(static_cast<float64x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce_min(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<2>>,
simd<float, simd_abi::neon_fixed_size<2>>> const&
x) {
return vminv_f32(static_cast<float32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce_max(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<2>>,
simd<float, simd_abi::neon_fixed_size<2>>> const&
x) {
return vmaxv_f32(static_cast<float32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<2>>,
simd<float, simd_abi::neon_fixed_size<2>>> const& x,
float, std::plus<>) {
return vaddv_f32(static_cast<float32x2_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce_min(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<4>>,
simd<float, simd_abi::neon_fixed_size<4>>> const&
x) {
return vminvq_f32(static_cast<float32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce_max(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<4>>,
simd<float, simd_abi::neon_fixed_size<4>>> const&
x) {
return vmaxvq_f32(static_cast<float32x4_t>(x.impl_get_value()));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION float reduce(
const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<4>>,
simd<float, simd_abi::neon_fixed_size<4>>> const& x,
float, std::plus<>) {
return vaddvq_f32(static_cast<float32x4_t>(x.impl_get_value()));
}

} // namespace Experimental
} // namespace Kokkos

Expand Down
20 changes: 19 additions & 1 deletion simd/src/Kokkos_SIMD_Scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ reduce(const_where_expression<simd_mask<T, simd_abi::scalar>,
}

template <class T>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T
[[nodiscard]] KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION T
hmax(const_where_expression<simd_mask<T, simd_abi::scalar>,
simd<T, simd_abi::scalar>> const& x) {
return static_cast<bool>(x.impl_get_mask())
Expand All @@ -437,13 +437,31 @@ hmax(const_where_expression<simd_mask<T, simd_abi::scalar>,

template <class T>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T
reduce_max(const_where_expression<simd_mask<T, simd_abi::scalar>,
simd<T, simd_abi::scalar>> const& x) {
return static_cast<bool>(x.impl_get_mask())
? static_cast<T>(x.impl_get_value())
: Kokkos::reduction_identity<T>::max();
}

template <class T>
[[nodiscard]] KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION T
hmin(const_where_expression<simd_mask<T, simd_abi::scalar>,
simd<T, simd_abi::scalar>> const& x) {
return static_cast<bool>(x.impl_get_mask())
? static_cast<T>(x.impl_get_value())
: Kokkos::reduction_identity<T>::min();
}

template <class T>
[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T
reduce_min(const_where_expression<simd_mask<T, simd_abi::scalar>,
simd<T, simd_abi::scalar>> const& x) {
return static_cast<bool>(x.impl_get_mask())
? static_cast<T>(x.impl_get_value())
: Kokkos::reduction_identity<T>::min();
}

} // namespace Experimental
} // namespace Kokkos

Expand Down
Loading

0 comments on commit 959bf66

Please sign in to comment.