diff --git a/src/layer/x86/shufflechannel_x86.cpp b/src/layer/x86/shufflechannel_x86.cpp index f4326289b8b..8afb22b2e2e 100644 --- a/src/layer/x86/shufflechannel_x86.cpp +++ b/src/layer/x86/shufflechannel_x86.cpp @@ -343,9 +343,9 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt for (int i = 0; i < size; i++) { __m256 _p0 = _mm256_loadu_ps(ptr0); - // macro `_mm256_loadu2_m128` is declared in IntelĀ® Intrinsics Guide but somehow missed in - // __m256 _p1 = _mm256_loadu2_m128(ptr2, ptr1); - __m256 _p1 = _mm256_set_m128(_mm_loadu_ps(ptr2), _mm_loadu_ps(ptr1)); + + __m256 _p1 = _mm256_castps128_ps256(_mm_loadu_ps(ptr1)); + _p1 = _mm256_insertf128_ps(_p1, _mm_loadu_ps(ptr2), 1); __m256 _lo = _mm256_unpacklo_ps(_p0, _p1); __m256 _hi = _mm256_unpackhi_ps(_p0, _p1);