Skip to content

Commit 188f1eb

Browse files
deps: update simdjson to 3.12.2
PR-URL: #57084 Reviewed-By: Luigi Pinca <[email protected]> Reviewed-By: Rafael Gonzaga <[email protected]>
1 parent 96457b4 commit 188f1eb

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

deps/simdjson/simdjson.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2025-01-27 20:34:35 -0500. Do not edit! */
1+
/* auto-generated on 2025-02-14 16:11:36 -0500. Do not edit! */
22
/* including simdjson.cpp: */
33
/* begin file simdjson.cpp */
44
#define SIMDJSON_SRC_SIMDJSON_CPP
@@ -20813,14 +20813,18 @@ namespace simd {
2081320813

2081420814
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2081520815
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
20816-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20816+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2081720817
// get written.
2081820818
// Design consideration: it seems like a function with the
2081920819
// signature simd8<L> compress(uint32_t mask) would be
2082020820
// sensible, but the AVX ISA makes this kind of approach difficult.
2082120821
template<typename L>
2082220822
simdjson_inline void compress(uint64_t mask, L * output) const {
20823-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20823+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20824+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
20825+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20826+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20827+
_mm512_storeu_si512(output, compressed); // could use a mask
2082420828
}
2082520829

2082620830
template<typename L>
@@ -23443,14 +23447,18 @@ namespace simd {
2344323447

2344423448
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2344523449
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
23446-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23450+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2344723451
// get written.
2344823452
// Design consideration: it seems like a function with the
2344923453
// signature simd8<L> compress(uint32_t mask) would be
2345023454
// sensible, but the AVX ISA makes this kind of approach difficult.
2345123455
template<typename L>
2345223456
simdjson_inline void compress(uint64_t mask, L * output) const {
23453-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23457+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23458+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
23459+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23460+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23461+
_mm512_storeu_si512(output, compressed); // could use a mask
2345423462
}
2345523463

2345623464
template<typename L>

deps/simdjson/simdjson.h

+15-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2025-01-27 20:34:35 -0500. Do not edit! */
1+
/* auto-generated on 2025-02-14 16:11:36 -0500. Do not edit! */
22
/* including simdjson.h: */
33
/* begin file simdjson.h */
44
#ifndef SIMDJSON_H
@@ -2437,7 +2437,7 @@ namespace std {
24372437
#define SIMDJSON_SIMDJSON_VERSION_H
24382438

24392439
/** The version of simdjson being used (major.minor.revision) */
2440-
#define SIMDJSON_VERSION "3.12.0"
2440+
#define SIMDJSON_VERSION "3.12.2"
24412441

24422442
namespace simdjson {
24432443
enum {
@@ -2452,7 +2452,7 @@ enum {
24522452
/**
24532453
* The revision (major.minor.REVISION) of simdjson being used.
24542454
*/
2455-
SIMDJSON_VERSION_REVISION = 0
2455+
SIMDJSON_VERSION_REVISION = 2
24562456
};
24572457
} // namespace simdjson
24582458

@@ -17948,14 +17948,18 @@ namespace simd {
1794817948

1794917949
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
1795017950
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
17951-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
17951+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
1795217952
// get written.
1795317953
// Design consideration: it seems like a function with the
1795417954
// signature simd8<L> compress(uint32_t mask) would be
1795517955
// sensible, but the AVX ISA makes this kind of approach difficult.
1795617956
template<typename L>
1795717957
simdjson_inline void compress(uint64_t mask, L * output) const {
17958-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
17958+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
17959+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
17960+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
17961+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
17962+
_mm512_storeu_si512(output, compressed); // could use a mask
1795917963
}
1796017964

1796117965
template<typename L>
@@ -65401,14 +65405,18 @@ namespace simd {
6540165405

6540265406
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
6540365407
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
65404-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
65408+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
6540565409
// get written.
6540665410
// Design consideration: it seems like a function with the
6540765411
// signature simd8<L> compress(uint32_t mask) would be
6540865412
// sensible, but the AVX ISA makes this kind of approach difficult.
6540965413
template<typename L>
6541065414
simdjson_inline void compress(uint64_t mask, L * output) const {
65411-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
65415+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
65416+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
65417+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
65418+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
65419+
_mm512_storeu_si512(output, compressed); // could use a mask
6541265420
}
6541365421

6541465422
template<typename L>

0 commit comments

Comments
 (0)