14
14
// //////////////////////////////////////////////////////////////////////////////
15
15
// //////////////////////////////////////////////////////////////////////////////
16
16
// Dependencies
17
- #include " PublicLibs/ArchSpecificLibs/Shuffle/x86_128/AdjacentLanePermute_x86_128.h"
18
- #include " PublicLibs/ArchSpecificLibs/Shuffle/x86_256/AdjacentLanePermute_x86_256.h"
17
+ #include " PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_256_x86_AVX2.h"
19
18
#include " DigitViewer2/RawToDecKernels/Kernels_dec_to_i64_x64_AVX2.h"
20
19
#include " DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_AVX2.h"
21
20
namespace DigitViewer2 {
@@ -34,27 +33,19 @@ YM_FORCE_INLINE bool dec_to_i64_u4_x64_AVX2(__m256i* T, const char* raw, upL_t b
34
33
do {
35
34
__m256i a0, b0, c0;
36
35
37
- c0 = _mm256_setr_m128i (
38
- SIMD::mm_splitload_si128 (raw + 0 , raw + 19 ),
39
- SIMD::mm_splitload_si128 (raw + 38 , raw + 57 )
40
- );
41
- #if 0
42
- b0 = _mm256_setr_m128i(
43
- SIMD::mm_splitload_si128(raw + 3, raw + 22),
44
- SIMD::mm_splitload_si128(raw + 41, raw + 60)
45
- );
46
- a0 = _mm256_setr_m128i(
47
- SIMD::mm_splitload_si128(raw + 11, raw + 30),
48
- SIMD::mm_splitload_si128(raw + 49, raw + 68)
49
- );
50
- #else
36
+ {
37
+ __m128i x0, x1;
38
+ SIMD::splitload (x0, raw + 0 , raw + 19 );
39
+ SIMD::splitload (x1, raw + 38 , raw + 57 );
40
+ c0 = _mm256_setr_m128i (x0, x1);
41
+ }
42
+
51
43
__m256i r0;
52
- r0 = SIMD::mm256_splitload_si256 ( raw + 3 , raw + 41 );
53
- a0 = SIMD::mm256_splitload_si256 ( raw + 22 , raw + 60 );
44
+ SIMD::splitload (r0, raw + 3 , raw + 41 );
45
+ SIMD::splitload (a0, raw + 22 , raw + 60 );
54
46
55
47
b0 = _mm256_unpacklo_epi64 (r0, a0);
56
48
a0 = _mm256_unpackhi_epi64 (r0, a0);
57
- #endif
58
49
59
50
c0 = _mm256_and_si256 (c0, _mm256_set1_epi64x (0x0000000000ffffff ));
60
51
@@ -83,20 +74,13 @@ YM_FORCE_INLINE void i64_to_dec_u4_x64_AVX2(char* raw, const __m256i* T, upL_t b
83
74
__m256i a0, b0, c0;
84
75
RawToDec::i64_to_dec_x64_AVX2 (_mm256_loadu_si256 (T), a0, b0, c0);
85
76
86
- SIMD::mm_splitstore_si128 (raw + 0 , raw + 19 , _mm256_castsi256_si128 (c0));
87
- SIMD::mm_splitstore_si128 (raw + 38 , raw + 57 , _mm256_extracti128_si256 (c0, 1 ));
88
- #if 0
89
- SIMD::mm_splitstore_si128(raw + 3, raw + 22, _mm256_castsi256_si128(b0));
90
- SIMD::mm_splitstore_si128(raw + 41, raw + 60, _mm256_extracti128_si256(b0, 1));
77
+ SIMD::splitstore (_mm256_castsi256_si128 (c0), raw + 0 , raw + 19 );
78
+ SIMD::splitstore (_mm256_extracti128_si256 (c0, 1 ), raw + 38 , raw + 57 );
91
79
92
- SIMD::mm_splitstore_si128(raw + 11, raw + 30, _mm256_castsi256_si128(a0));
93
- SIMD::mm_splitstore_si128(raw + 49, raw + 68, _mm256_extracti128_si256(a0, 1));
94
- #else
95
80
__m256i y0 = _mm256_unpacklo_epi64 (b0, a0);
96
81
__m256i y1 = _mm256_unpackhi_epi64 (b0, a0);
97
- SIMD::mm256_splitstore_si256 (raw + 3 , raw + 41 , y0 );
98
- SIMD::mm256_splitstore_si256 (raw + 22 , raw + 60 , y1 );
99
- #endif
82
+ SIMD::splitstore (y0 , raw + 3 , raw + 41 );
83
+ SIMD::splitstore (y1 , raw + 22 , raw + 60 );
100
84
101
85
raw += 76 ;
102
86
T += 1 ;
0 commit comments