Add 16-bit and 8-bit mullo.

OlivierHartmann · Jul 2, 2018 · c4af2cd · c4af2cd
1 parent 9d6340e
commit c4af2cd
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 0 deletions.
diff --git a/src/mipp_impl_AVX.hxx b/src/mipp_impl_AVX.hxx
@@ -2085,6 +2085,11 @@
 	inline reg mul<int32_t>(const reg v1, const reg v2) {
 		return _mm256_castsi256_ps(_mm256_mullo_epi32(_mm256_castps_si256(v1), _mm256_castps_si256(v2)));
 	}
+
+	template <>
+	inline reg mul<int16_t>(const reg v1, const reg v2) {
+		return _mm256_castsi256_ps(_mm256_mullo_epi16(_mm256_castps_si256(v1), _mm256_castps_si256(v2)));
+	}
 #endif
 
 	// ------------------------------------------------------------------------------------------------------------ div

diff --git a/src/mipp_impl_AVX512.hxx b/src/mipp_impl_AVX512.hxx
@@ -2563,6 +2563,13 @@
 		return _mm512_castsi512_ps(_mm512_mullo_epi32(_mm512_castps_si512(v1), _mm512_castps_si512(v2)));
 	}
 
+#if defined(__AVX512BW__)
+	template <>
+	inline reg mul<int16_t>(const reg v1, const reg v2) {
+		return _mm512_castsi512_ps(_mm512_mullo_epi16(_mm512_castps_si512(v1), _mm512_castps_si512(v2)));
+	}
+#endif
+
 	// ------------------------------------------------------------------------------------------------------------ div
 #if defined(__AVX512F__)
 	template <>

diff --git a/src/mipp_impl_NEON.hxx b/src/mipp_impl_NEON.hxx
@@ -1798,6 +1798,16 @@
 		return (reg) vmulq_s32((int32x4_t) v1, (int32x4_t) v2);
 	}
 
+	template <>
+	inline reg mul<int16_t>(const reg v1, const reg v2) {
+		return (reg) vmulq_s16((int16x8_t) v1, (int16x8_t) v2);
+	}
+
+	template <>
+	inline reg mul<int8_t>(const reg v1, const reg v2) {
+		return (reg) vmulq_s8((int8x16_t) v1, (int8x16_t) v2);
+	}
+
 	// ------------------------------------------------------------------------------------------------------------ div
 #ifdef __aarch64__
 	template <>

diff --git a/src/mipp_impl_SSE.hxx b/src/mipp_impl_SSE.hxx
@@ -1973,6 +1973,13 @@
 	}
 #endif
 
+#ifdef __SSE2__
+	template <>
+	inline reg mul<int16_t>(const reg v1, const reg v2) {
+		return _mm_castsi128_ps(_mm_mullo_epi16(_mm_castps_si128(v1), _mm_castps_si128(v2)));
+	}
+#endif
+
 	// ------------------------------------------------------------------------------------------------------------ div
 	template <>
 	inline reg div<float>(const reg v1, const reg v2) {

diff --git a/tests/src/arithmetic_operations/mul.cpp b/tests/src/arithmetic_operations/mul.cpp
@@ -30,6 +30,26 @@ void test_reg_mul()
 		REQUIRE(*((T*)&r3 +i) == res);
 #endif
 	}
+
+	std::iota(inputs1, inputs1 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
+	std::iota(inputs2, inputs2 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
+
+	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
+	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
+
+	r1 = mipp::load<T>(inputs1);
+	r2 = mipp::load<T>(inputs2);
+	r3 = mipp::mul <T>(r1, r2);
+
+	for (auto i = 0; i < mipp::N<T>(); i++)
+	{
+		T res = inputs1[i] * inputs2[i];
+#if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
+		REQUIRE(*((T*)&r3 +i) == Approx(res));
+#else
+		REQUIRE(*((T*)&r3 +i) == res);
+#endif
+	}
 }
 
 #ifndef MIPP_NO
@@ -44,6 +64,12 @@ TEST_CASE("Multiplication - mipp::reg", "[mipp::mul]")
 #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 	SECTION("datatype = int32_t") { test_reg_mul<int32_t>(); }
 #endif
+#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
+	SECTION("datatype = int16_t") { test_reg_mul<int16_t>(); }
+#endif
+#endif
+#if defined(MIPP_NEON)
+	SECTION("datatype = int8_t") { test_reg_mul<int8_t>(); }
 #endif
 }
 #endif
@@ -72,8 +98,29 @@ void test_Reg_mul()
 		REQUIRE(r3[i] == res);
 #endif
 	}
+
+	std::iota(inputs1, inputs1 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
+	std::iota(inputs2, inputs2 + mipp::N<T>(), std::numeric_limits<T>::max() - mipp::N<T>());
+
+	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
+	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
+
+	r1 = inputs1;
+	r2 = inputs2;
+	r3 = r1 * r2;
+
+	for (auto i = 0; i < mipp::N<T>(); i++)
+	{
+		T res = inputs1[i] * inputs2[i];
+#if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
+		REQUIRE(r3[i] == Approx(res));
+#else
+		REQUIRE(r3[i] == res);
+#endif
+	}
 }
 
+#ifndef MIPP_NO
 TEST_CASE("Multiplication - mipp::Reg", "[mipp::mul]")
 {
 #if defined(MIPP_64BIT)
@@ -85,8 +132,15 @@ TEST_CASE("Multiplication - mipp::Reg", "[mipp::mul]")
 #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 	SECTION("datatype = int32_t") { test_Reg_mul<int32_t>(); }
 #endif
+#if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
+	SECTION("datatype = int16_t") { test_Reg_mul<int16_t>(); }
+#endif
+#endif
+#if defined(MIPP_NEON)
+	SECTION("datatype = int8_t") { test_Reg_mul<int8_t>(); }
 #endif
 }
+#endif
 
 template <typename T>
 void test_reg_maskz_mul()