Skip to content

Commit 2da54f2

Browse files
committed
Update to v0.8.5.
1 parent cd52bd8 commit 2da54f2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1832
-219
lines changed

trunk/Digits/Pi.txt

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@
9595
62,831,853,071,796 | 7817924264 // UAS Grisons (2021)
9696

9797
100,000,000,000,000 | 3095295560 // Emma Haruka Iwao (2022)
98+
105,000,000,000,000 | 8558373926 // StorageReview.com (2024)
99+
202,112,290,000,000 | 3622511852 // StorageReview.com (2024)
98100

99101

100102
// Super Pi Sizes

trunk/Source/DigitViewer2/DigitCount/Kernels/DigitCount_x64_AVX2.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// Dependencies
1717
#include <immintrin.h>
1818
#include "PublicLibs/CompilerSettings.h"
19-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_256/Transpose_64x4x4_x86_AVX2.h"
19+
#include "PublicLibs/ArchSpecificLibs/Shuffle/Transpose_64x4x4_x86_AVX2.h"
2020
namespace DigitViewer2{
2121
namespace DigitCount{
2222
////////////////////////////////////////////////////////////////////////////////
@@ -93,7 +93,7 @@ void accumulate_b32_AVX2_64x4(u64_t digits[4], const __m256i* raw_digits, upL_t
9393
sum1 = reduce_u8_to_u64_AVX2(sum1);
9494
sum2 = reduce_u8_to_u64_AVX2(sum2);
9595
sum3 = reduce_u8_to_u64_AVX2(sum3);
96-
SIMD::transpose_i64_4x4_AVX2(sum0, sum1, sum2, sum3);
96+
SIMD::transpose_i64x4x4_x86_AVX2(sum0, sum1, sum2, sum3);
9797

9898
sum0 = _mm256_add_epi64(sum0, sum1);
9999
sum2 = _mm256_add_epi64(sum2, sum3);

trunk/Source/DigitViewer2/DigitCount/Kernels/DigitCount_x64_AVX512-BW.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <immintrin.h>
1818
#include "PublicLibs/CompilerSettings.h"
1919
#include "PublicLibs/ArchSpecificLibs/x86_AVX512.h"
20-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_512/Transpose_64x8x8_x86_AVX512.h"
20+
#include "PublicLibs/ArchSpecificLibs/Shuffle/Transpose_64x8x8_x86_AVX512.h"
2121
namespace DigitViewer2{
2222
namespace DigitCount{
2323
////////////////////////////////////////////////////////////////////////////////
@@ -91,7 +91,7 @@ YM_FORCE_INLINE void accumulate_b64_AVX512_64x10(u64_t digits[10], const __m512i
9191
sum8 = reduce_u8_to_u64_AVX512(sum8);
9292
sum9 = reduce_u8_to_u64_AVX512(sum9);
9393

94-
SIMD::transpose_i64_8x8_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
94+
SIMD::transpose_i64x8x8_x86_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
9595

9696
sum0 = _mm512_add_epi64(sum0, sum1);
9797
sum2 = _mm512_add_epi64(sum2, sum3);
@@ -155,7 +155,7 @@ void accumulate_b64_AVX512_64x8(u64_t digits[8], const __m512i* raw_digits, upL_
155155
sum5 = reduce_u8_to_u64_AVX512(sum5);
156156
sum6 = reduce_u8_to_u64_AVX512(sum6);
157157
sum7 = reduce_u8_to_u64_AVX512(sum7);
158-
SIMD::transpose_i64_8x8_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
158+
SIMD::transpose_i64x8x8_x86_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
159159

160160
sum0 = _mm512_add_epi64(sum0, sum1);
161161
sum2 = _mm512_add_epi64(sum2, sum3);
@@ -238,7 +238,7 @@ YM_FORCE_INLINE void accumulate_b64_AVX512_64x16(u64_t digits[16], const __m512i
238238
sum5 = reduce_u8_to_u64_AVX512(sum5);
239239
sum6 = reduce_u8_to_u64_AVX512(sum6);
240240
sum7 = reduce_u8_to_u64_AVX512(sum7);
241-
SIMD::transpose_i64_8x8_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
241+
SIMD::transpose_i64x8x8_x86_AVX512(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
242242
sum8 = reduce_u8_to_u64_AVX512(sum8);
243243
sum9 = reduce_u8_to_u64_AVX512(sum9);
244244
sumA = reduce_u8_to_u64_AVX512(sumA);
@@ -247,7 +247,7 @@ YM_FORCE_INLINE void accumulate_b64_AVX512_64x16(u64_t digits[16], const __m512i
247247
sumD = reduce_u8_to_u64_AVX512(sumD);
248248
sumE = reduce_u8_to_u64_AVX512(sumE);
249249
sumF = reduce_u8_to_u64_AVX512(sumF);
250-
SIMD::transpose_i64_8x8_AVX512(sum8, sum9, sumA, sumB, sumC, sumD, sumE, sumF);
250+
SIMD::transpose_i64x8x8_x86_AVX512(sum8, sum9, sumA, sumB, sumC, sumD, sumE, sumF);
251251

252252
sum0 = _mm512_add_epi64(sum0, sum1);
253253
sum2 = _mm512_add_epi64(sum2, sum3);

trunk/Source/DigitViewer2/RawToCompressed/Kernels/i64ToDec_x64_AVX2.h

+14-30
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@
1414
////////////////////////////////////////////////////////////////////////////////
1515
////////////////////////////////////////////////////////////////////////////////
1616
// Dependencies
17-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_128/AdjacentLanePermute_x86_128.h"
18-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_256/AdjacentLanePermute_x86_256.h"
17+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_256_x86_AVX2.h"
1918
#include "DigitViewer2/RawToDecKernels/Kernels_dec_to_i64_x64_AVX2.h"
2019
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_AVX2.h"
2120
namespace DigitViewer2{
@@ -34,27 +33,19 @@ YM_FORCE_INLINE bool dec_to_i64_u4_x64_AVX2(__m256i* T, const char* raw, upL_t b
3433
do{
3534
__m256i a0, b0, c0;
3635

37-
c0 = _mm256_setr_m128i(
38-
SIMD::mm_splitload_si128(raw + 0, raw + 19),
39-
SIMD::mm_splitload_si128(raw + 38, raw + 57)
40-
);
41-
#if 0
42-
b0 = _mm256_setr_m128i(
43-
SIMD::mm_splitload_si128(raw + 3, raw + 22),
44-
SIMD::mm_splitload_si128(raw + 41, raw + 60)
45-
);
46-
a0 = _mm256_setr_m128i(
47-
SIMD::mm_splitload_si128(raw + 11, raw + 30),
48-
SIMD::mm_splitload_si128(raw + 49, raw + 68)
49-
);
50-
#else
36+
{
37+
__m128i x0, x1;
38+
SIMD::splitload(x0, raw + 0, raw + 19);
39+
SIMD::splitload(x1, raw + 38, raw + 57);
40+
c0 = _mm256_setr_m128i(x0, x1);
41+
}
42+
5143
__m256i r0;
52-
r0 = SIMD::mm256_splitload_si256(raw + 3, raw + 41);
53-
a0 = SIMD::mm256_splitload_si256(raw + 22, raw + 60);
44+
SIMD::splitload(r0, raw + 3, raw + 41);
45+
SIMD::splitload(a0, raw + 22, raw + 60);
5446

5547
b0 = _mm256_unpacklo_epi64(r0, a0);
5648
a0 = _mm256_unpackhi_epi64(r0, a0);
57-
#endif
5849

5950
c0 = _mm256_and_si256(c0, _mm256_set1_epi64x(0x0000000000ffffff));
6051

@@ -83,20 +74,13 @@ YM_FORCE_INLINE void i64_to_dec_u4_x64_AVX2(char* raw, const __m256i* T, upL_t b
8374
__m256i a0, b0, c0;
8475
RawToDec::i64_to_dec_x64_AVX2(_mm256_loadu_si256(T), a0, b0, c0);
8576

86-
SIMD::mm_splitstore_si128(raw + 0, raw + 19, _mm256_castsi256_si128(c0));
87-
SIMD::mm_splitstore_si128(raw + 38, raw + 57, _mm256_extracti128_si256(c0, 1));
88-
#if 0
89-
SIMD::mm_splitstore_si128(raw + 3, raw + 22, _mm256_castsi256_si128(b0));
90-
SIMD::mm_splitstore_si128(raw + 41, raw + 60, _mm256_extracti128_si256(b0, 1));
77+
SIMD::splitstore(_mm256_castsi256_si128(c0), raw + 0, raw + 19);
78+
SIMD::splitstore(_mm256_extracti128_si256(c0, 1), raw + 38, raw + 57);
9179

92-
SIMD::mm_splitstore_si128(raw + 11, raw + 30, _mm256_castsi256_si128(a0));
93-
SIMD::mm_splitstore_si128(raw + 49, raw + 68, _mm256_extracti128_si256(a0, 1));
94-
#else
9580
__m256i y0 = _mm256_unpacklo_epi64(b0, a0);
9681
__m256i y1 = _mm256_unpackhi_epi64(b0, a0);
97-
SIMD::mm256_splitstore_si256(raw + 3, raw + 41, y0);
98-
SIMD::mm256_splitstore_si256(raw + 22, raw + 60, y1);
99-
#endif
82+
SIMD::splitstore(y0, raw + 3, raw + 41);
83+
SIMD::splitstore(y1, raw + 22, raw + 60);
10084

10185
raw += 76;
10286
T += 1;

trunk/Source/DigitViewer2/RawToCompressed/Kernels/i64ToDec_x64_AVX512-BW.h

+10-9
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
////////////////////////////////////////////////////////////////////////////////
1515
////////////////////////////////////////////////////////////////////////////////
1616
// Dependencies
17-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_256/AdjacentLanePermute_x86_256.h"
17+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_256_x86_AVX2.h"
1818
#include "DigitViewer2/RawToDecKernels/Kernels_dec_to_i64_x64_AVX512-BW.h"
1919
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_AVX512-BW.h"
2020
namespace DigitViewer2{
@@ -44,10 +44,11 @@ YM_FORCE_INLINE bool dec_to_i64_u8_x64_AVX512BW(__m512i* T, const char* raw, upL
4444
a0 = _mm512_i64gather_epi64(GATHER, raw + 11, 1);
4545
#else
4646
{
47-
__m256i r0 = SIMD::mm256_splitload_si256(raw + 3, raw + 41);
48-
__m256i r1 = SIMD::mm256_splitload_si256(raw + 22, raw + 60);
49-
__m256i r2 = SIMD::mm256_splitload_si256(raw + 79, raw + 117);
50-
__m256i r3 = SIMD::mm256_splitload_si256(raw + 98, raw + 136);
47+
__m256i r0, r1, r2, r3;
48+
SIMD::splitload(r0, raw + 3, raw + 41);
49+
SIMD::splitload(r1, raw + 22, raw + 60);
50+
SIMD::splitload(r2, raw + 79, raw + 117);
51+
SIMD::splitload(r3, raw + 98, raw + 136);
5152

5253
__m512i z0 = _mm512_inserti64x4(_mm512_castsi256_si512(r0), r2, 1);
5354
__m512i z1 = _mm512_inserti64x4(_mm512_castsi256_si512(r1), r3, 1);
@@ -88,10 +89,10 @@ YM_FORCE_INLINE void i64_to_dec_u8_x64_AVX512BW(char* raw, const __m512i* T, upL
8889
__m512i z0 = _mm512_unpacklo_epi64(b0, a0);
8990
__m512i z1 = _mm512_unpackhi_epi64(b0, a0);
9091

91-
SIMD::mm256_splitstore_si256(raw + 3, raw + 41, _mm512_castsi512_si256(z0));
92-
SIMD::mm256_splitstore_si256(raw + 22, raw + 60, _mm512_castsi512_si256(z1));
93-
SIMD::mm256_splitstore_si256(raw + 79, raw + 117, _mm512_extracti64x4_epi64(z0, 1));
94-
SIMD::mm256_splitstore_si256(raw + 98, raw + 136, _mm512_extracti64x4_epi64(z1, 1));
92+
SIMD::splitstore(_mm512_castsi512_si256(z0), raw + 3, raw + 41);
93+
SIMD::splitstore(_mm512_castsi512_si256(z1), raw + 22, raw + 60);
94+
SIMD::splitstore(_mm512_extracti64x4_epi64(z0, 1), raw + 79, raw + 117);
95+
SIMD::splitstore(_mm512_extracti64x4_epi64(z1, 1), raw + 98, raw + 136);
9596
#endif
9697

9798
raw += 152;

trunk/Source/DigitViewer2/RawToCompressed/Kernels/i64ToDec_x64_SSE41.h

+16-13
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
////////////////////////////////////////////////////////////////////////////////
1515
////////////////////////////////////////////////////////////////////////////////
1616
// Dependencies
17-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_128/AdjacentLanePermute_x86_128.h"
17+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_128_x86_SSE2.h"
1818
#include "DigitViewer2/RawToDecKernels/Kernels_dec_to_i64_x64_SSE41.h"
1919
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_SSE41.h"
2020
namespace DigitViewer2{
@@ -31,9 +31,10 @@ YM_FORCE_INLINE bool dec_to_i64_u2_x64_SSE41(__m128i* T, const char* raw, upL_t
3131

3232
__m128i bad = _mm_setzero_si128();
3333
do{
34-
__m128i c0 = SIMD::mm_splitload_si128(raw + 0, raw + 19);
35-
__m128i b0 = SIMD::mm_splitload_si128(raw + 3, raw + 22);
36-
__m128i a0 = SIMD::mm_splitload_si128(raw + 11, raw + 30);
34+
__m128i a0, b0, c0;
35+
SIMD::splitload(c0, raw + 0, raw + 19);
36+
SIMD::splitload(b0, raw + 3, raw + 22);
37+
SIMD::splitload(a0, raw + 11, raw + 30);
3738
c0 = _mm_and_si128(c0, _mm_set1_epi64x(0x0000000000ffffff));
3839

3940
__m128i t0;
@@ -55,12 +56,14 @@ YM_FORCE_INLINE bool dec_to_i64_u4_x64_SSE41(__m128i* T, const char* raw, upL_t
5556

5657
__m128i bad = _mm_setzero_si128();
5758
do{
58-
__m128i c0 = SIMD::mm_splitload_si128(raw + 0, raw + 19);
59-
__m128i b0 = SIMD::mm_splitload_si128(raw + 3, raw + 22);
60-
__m128i a0 = SIMD::mm_splitload_si128(raw + 11, raw + 30);
61-
__m128i c1 = SIMD::mm_splitload_si128(raw + 38, raw + 57);
62-
__m128i b1 = SIMD::mm_splitload_si128(raw + 41, raw + 60);
63-
__m128i a1 = SIMD::mm_splitload_si128(raw + 49, raw + 68);
59+
__m128i a0, b0, c0;
60+
__m128i a1, b1, c1;
61+
SIMD::splitload(c0, raw + 0, raw + 19);
62+
SIMD::splitload(b0, raw + 3, raw + 22);
63+
SIMD::splitload(a0, raw + 11, raw + 30);
64+
SIMD::splitload(c1, raw + 38, raw + 57);
65+
SIMD::splitload(b1, raw + 41, raw + 60);
66+
SIMD::splitload(a1, raw + 49, raw + 68);
6467

6568
c0 = _mm_and_si128(c0, _mm_set1_epi64x(0x0000000000ffffff));
6669
c1 = _mm_and_si128(c1, _mm_set1_epi64x(0x0000000000ffffff));
@@ -95,9 +98,9 @@ YM_FORCE_INLINE void i64_to_dec_u2_x64_SSE41(char* raw, const __m128i* T, upL_t
9598
__m128i a0, b0, c0;
9699
RawToDec::i64_to_dec_x64_SSE41(_mm_loadu_si128(T), a0, b0, c0);
97100

98-
SIMD::mm_splitstore_si128(raw + 0, raw + 19, c0);
99-
SIMD::mm_splitstore_si128(raw + 3, raw + 22, b0);
100-
SIMD::mm_splitstore_si128(raw + 11, raw + 30, a0);
101+
SIMD::splitstore(c0, raw + 0, raw + 19);
102+
SIMD::splitstore(b0, raw + 3, raw + 22);
103+
SIMD::splitstore(a0, raw + 11, raw + 30);
101104

102105
raw += 38;
103106
T += 1;

trunk/Source/DigitViewer2/WordToRaw/Kernels/WordToRaw_x64_AVX2.h

+7-13
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
// Dependencies
1717
#include "PublicLibs/CompilerSettings.h"
1818
#include "PublicLibs/Types.h"
19-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_128/AdjacentLanePermute_x86_128.h"
20-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_256/AdjacentLanePermute_x86_256.h"
19+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_128_x86_SSE2.h"
20+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_256_x86_AVX2.h"
2121
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_AVX2.h"
2222
namespace DigitViewer2{
2323
namespace WordToRaw{
@@ -83,20 +83,14 @@ YM_FORCE_INLINE void w64_to_dec_u4_x64_AVX2(char* raw, const __m256i* T, upL_t b
8383
__m256i a0, b0, c0;
8484
RawToDec::i64_to_dec_x64_AVX2(T[0], a0, b0, c0);
8585

86-
SIMD::mm_splitstore_si128(raw + 57, raw + 38, _mm256_castsi256_si128(c0));
87-
SIMD::mm_splitstore_si128(raw + 19, raw + 0, _mm256_extracti128_si256(c0, 1));
88-
#if 0
89-
SIMD::mm_splitstore_si128(raw + 60, raw + 41, _mm256_castsi256_si128(b0));
90-
SIMD::mm_splitstore_si128(raw + 22, raw + 3, _mm256_extracti128_si256(b0, 1));
86+
SIMD::splitstore(_mm256_castsi256_si128(c0), raw + 57, raw + 38);
87+
SIMD::splitstore(_mm256_extracti128_si256(c0, 1), raw + 19, raw + 0);
9188

92-
SIMD::mm_splitstore_si128(raw + 68, raw + 49, _mm256_castsi256_si128(a0));
93-
SIMD::mm_splitstore_si128(raw + 30, raw + 11, _mm256_extracti128_si256(a0, 1));
94-
#else
9589
__m256i y0 = _mm256_unpacklo_epi64(b0, a0);
9690
__m256i y1 = _mm256_unpackhi_epi64(b0, a0);
97-
SIMD::mm256_splitstore_si256(raw + 60, raw + 22, y0);
98-
SIMD::mm256_splitstore_si256(raw + 41, raw + 3, y1);
99-
#endif
91+
SIMD::splitstore(y0, raw + 60, raw + 22);
92+
SIMD::splitstore(y1, raw + 41, raw + 3);
93+
10094

10195
T += 1;
10296
}while (--blocks);

trunk/Source/DigitViewer2/WordToRaw/Kernels/WordToRaw_x64_AVX512-BW.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// Dependencies
1717
#include "PublicLibs/CompilerSettings.h"
1818
#include "PublicLibs/Types.h"
19-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_256/AdjacentLanePermute_x86_256.h"
19+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_256_x86_AVX2.h"
2020
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_AVX512-BW.h"
2121
namespace DigitViewer2{
2222
namespace WordToRaw{
@@ -94,10 +94,10 @@ YM_FORCE_INLINE void w64_to_dec_u8_x64_AVX512BW(char* raw, const __m512i* T, upL
9494
__m512i z0 = _mm512_unpacklo_epi64(b0, a0);
9595
__m512i z1 = _mm512_unpackhi_epi64(b0, a0);
9696

97-
SIMD::mm256_splitstore_si256(raw + 136, raw + 98, _mm512_castsi512_si256(z0));
98-
SIMD::mm256_splitstore_si256(raw + 117, raw + 79, _mm512_castsi512_si256(z1));
99-
SIMD::mm256_splitstore_si256(raw + 60, raw + 22, _mm512_extracti64x4_epi64(z0, 1));
100-
SIMD::mm256_splitstore_si256(raw + 41, raw + 3, _mm512_extracti64x4_epi64(z1, 1));
97+
SIMD::splitstore(_mm512_castsi512_si256(z0), raw + 136, raw + 98);
98+
SIMD::splitstore(_mm512_castsi512_si256(z1), raw + 117, raw + 79);
99+
SIMD::splitstore(_mm512_extracti64x4_epi64(z0, 1), raw + 60, raw + 22);
100+
SIMD::splitstore(_mm512_extracti64x4_epi64(z1, 1), raw + 41, raw + 3);
101101
#endif
102102

103103
T += 1;

trunk/Source/DigitViewer2/WordToRaw/Kernels/WordToRaw_x64_SSE41.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// Dependencies
1717
#include "PublicLibs/CompilerSettings.h"
1818
#include "PublicLibs/Types.h"
19-
#include "PublicLibs/ArchSpecificLibs/Shuffle/x86_128/AdjacentLanePermute_x86_128.h"
19+
#include "PublicLibs/ArchSpecificLibs/AdjacentLanePermute/AdjacentLanePermute_128_x86_SSE2.h"
2020
#include "DigitViewer2/RawToDecKernels/Kernels_i64_to_dec_x64_SSE41.h"
2121
namespace DigitViewer2{
2222
namespace WordToRaw{
@@ -83,9 +83,9 @@ YM_FORCE_INLINE void w64_to_dec_u2_x64_SSE41(char* raw, const __m128i* T, upL_t
8383
__m128i a0, b0, c0;
8484
RawToDec::i64_to_dec_x64_SSE41(T[0], a0, b0, c0);
8585

86-
SIMD::mm_splitstore_si128(raw + 19, raw + 0, c0);
87-
SIMD::mm_splitstore_si128(raw + 22, raw + 3, b0);
88-
SIMD::mm_splitstore_si128(raw + 30, raw + 11, a0);
86+
SIMD::splitstore(c0, raw + 19, raw + 0);
87+
SIMD::splitstore(b0, raw + 22, raw + 3);
88+
SIMD::splitstore(a0, raw + 30, raw + 11);
8989

9090
T += 1;
9191
}while (--blocks);

trunk/Source/Launcher/Launcher.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@ bool pause_on_warning = true;
4242
////////////////////////////////////////////////////////////////////////////////
4343
////////////////////////////////////////////////////////////////////////////////
4444
void handle_pause(CommandLine::Parameters& cmds){
45-
auto& value = cmds.CurrentValue();
45+
auto& value = cmds.current_value();
4646
cmds.advance();
4747
int pause_on_exit = static_cast<int>(StringTools::parse_sL_text(value));
4848
pause_on_warning = pause_on_exit > -1;
4949
Console::pause_on_error = pause_on_exit > -2;
5050
}
5151
void handle_colors(CommandLine::Parameters& cmds){
52-
auto& value = cmds.CurrentValue();
52+
auto& value = cmds.current_value();
5353
cmds.advance();
5454
Console::enable_colors = StringTools::parse_sL_text(value) > 0;
5555
}
5656
void handle_height(CommandLine::Parameters& cmds){
57-
auto& value = cmds.CurrentValue();
57+
auto& value = cmds.current_value();
5858
cmds.advance();
5959
int height = static_cast<int>(StringTools::parse_sL_text(value));
6060
Console::set_console_window_size(80, height);

trunk/Source/Launcher/Vendor-AMD.h

+20
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,26 @@ void dispatch_AMD(const cpu_x86& features){
2727
if (features.HW_AVX512_VP2INTERSECT){
2828
dispatch_2024_x64_Zen5(features);
2929
}
30+
#else
31+
if (features.HW_AVX512_VP2INTERSECT && can_run_2024_x64_Zen5(features)){
32+
Console::println("", 'Y');
33+
Console::println("Detected a Zen5 processor!");
34+
Console::println();
35+
Console::println("Optimizations are not yet available for this processor as they are pending");
36+
Console::println("final hardware for proper tuning.");
37+
Console::println();
38+
Console::println(" https://twitter.com/Mysticial/status/1797478508157124717");
39+
Console::println();
40+
Console::println("As the release date for Zen5 approaches, please stay tuned for an update");
41+
Console::println("to this release that will have Zen5 optimizations.");
42+
Console::println();
43+
Console::println("Falling back to the Zen4 binary...");
44+
Console::println();
45+
Console::set_color('w');
46+
if (pause_on_warning){
47+
Console::pause();
48+
}
49+
}
3050
#endif
3151
#ifdef YCR_ENABLE_2022_x64_Zen4
3252
dispatch_2022_x64_Zen4(features);

0 commit comments

Comments
 (0)