Skip to content

Commit

Permalink
Add 16-bit and 8-bit mullo.
Browse files Browse the repository at this point in the history
  • Loading branch information
kouchy committed Jul 2, 2018
1 parent 9d6340e commit c4af2cd
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/mipp_impl_AVX.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2085,6 +2085,11 @@
inline reg mul<int32_t>(const reg v1, const reg v2) {
return _mm256_castsi256_ps(_mm256_mullo_epi32(_mm256_castps_si256(v1), _mm256_castps_si256(v2)));
}

template <>
inline reg mul<int16_t>(const reg v1, const reg v2) {
return _mm256_castsi256_ps(_mm256_mullo_epi16(_mm256_castps_si256(v1), _mm256_castps_si256(v2)));
}
#endif

// ------------------------------------------------------------------------------------------------------------ div
Expand Down
7 changes: 7 additions & 0 deletions src/mipp_impl_AVX512.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2563,6 +2563,13 @@
return _mm512_castsi512_ps(_mm512_mullo_epi32(_mm512_castps_si512(v1), _mm512_castps_si512(v2)));
}

#if defined(__AVX512BW__)
template <>
inline reg mul<int16_t>(const reg v1, const reg v2) {
return _mm512_castsi512_ps(_mm512_mullo_epi16(_mm512_castps_si512(v1), _mm512_castps_si512(v2)));
}
#endif

// ------------------------------------------------------------------------------------------------------------ div
#if defined(__AVX512F__)
template <>
Expand Down
10 changes: 10 additions & 0 deletions src/mipp_impl_NEON.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1798,6 +1798,16 @@
return (reg) vmulq_s32((int32x4_t) v1, (int32x4_t) v2);
}

template <>
inline reg mul<int16_t>(const reg v1, const reg v2) {
return (reg) vmulq_s16((int16x8_t) v1, (int16x8_t) v2);
}

template <>
inline reg mul<int8_t>(const reg v1, const reg v2) {
return (reg) vmulq_s8((int8x16_t) v1, (int8x16_t) v2);
}

// ------------------------------------------------------------------------------------------------------------ div
#ifdef __aarch64__
template <>
Expand Down
7 changes: 7 additions & 0 deletions src/mipp_impl_SSE.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1973,6 +1973,13 @@
}
#endif

#ifdef __SSE2__
template <>
inline reg mul<int16_t>(const reg v1, const reg v2) {
return _mm_castsi128_ps(_mm_mullo_epi16(_mm_castps_si128(v1), _mm_castps_si128(v2)));
}
#endif

// ------------------------------------------------------------------------------------------------------------ div
template <>
inline reg div<float>(const reg v1, const reg v2) {
Expand Down
54 changes: 54 additions & 0 deletions tests/src/arithmetic_operations/mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ void test_reg_mul()
REQUIRE(*((T*)&r3 +i) == res);
#endif
}

std::iota(inputs1, inputs1 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
std::iota(inputs2, inputs2 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());

std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);

r1 = mipp::load<T>(inputs1);
r2 = mipp::load<T>(inputs2);
r3 = mipp::mul <T>(r1, r2);

for (auto i = 0; i < mipp::N<T>(); i++)
{
T res = inputs1[i] * inputs2[i];
#if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
REQUIRE(*((T*)&r3 +i) == Approx(res));
#else
REQUIRE(*((T*)&r3 +i) == res);
#endif
}
}

#ifndef MIPP_NO
Expand All @@ -44,6 +64,12 @@ TEST_CASE("Multiplication - mipp::reg", "[mipp::mul]")
#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
SECTION("datatype = int32_t") { test_reg_mul<int32_t>(); }
#endif
#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
SECTION("datatype = int16_t") { test_reg_mul<int16_t>(); }
#endif
#endif
#if defined(MIPP_NEON)
SECTION("datatype = int8_t") { test_reg_mul<int8_t>(); }
#endif
}
#endif
Expand Down Expand Up @@ -72,8 +98,29 @@ void test_Reg_mul()
REQUIRE(r3[i] == res);
#endif
}

std::iota(inputs1, inputs1 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
std::iota(inputs2, inputs2 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());

std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);

r1 = inputs1;
r2 = inputs2;
r3 = r1 * r2;

for (auto i = 0; i < mipp::N<T>(); i++)
{
T res = inputs1[i] * inputs2[i];
#if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
REQUIRE(r3[i] == Approx(res));
#else
REQUIRE(r3[i] == res);
#endif
}
}

#ifndef MIPP_NO
TEST_CASE("Multiplication - mipp::Reg", "[mipp::mul]")
{
#if defined(MIPP_64BIT)
Expand All @@ -85,8 +132,15 @@ TEST_CASE("Multiplication - mipp::Reg", "[mipp::mul]")
#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
SECTION("datatype = int32_t") { test_Reg_mul<int32_t>(); }
#endif
#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
SECTION("datatype = int16_t") { test_Reg_mul<int16_t>(); }
#endif
#endif
#if defined(MIPP_NEON)
SECTION("datatype = int8_t") { test_Reg_mul<int8_t>(); }
#endif
}
#endif

template <typename T>
void test_reg_maskz_mul()
Expand Down

0 comments on commit c4af2cd

Please sign in to comment.