From a3dd2627cc74a96e4a12a285e4051cfa72da8c29 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:34:25 +0100 Subject: [PATCH 01/46] wip --- ext/json/json_encoder.c | 208 +++++++++++++++++++++++++++------------- 1 file changed, 143 insertions(+), 65 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 53dd4cae2574a..c7bf460e9bd61 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -30,6 +30,10 @@ #include "zend_property_hooks.h" #include "zend_lazy_objects.h" +#include +# pragma GCC push_options +# pragma GCC target ("sse4.2") + static const char digits[] = "0123456789abcdef"; static zend_always_inline bool php_json_check_stack_limit(void) @@ -366,6 +370,68 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, } /* }}} */ +static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, unsigned char us, int options) +{ + switch (us) { + case '"': + if (options & PHP_JSON_HEX_QUOT) { + smart_str_appendl(buf, "\\u0022", 6); + } else { + smart_str_appendl(buf, "\\\"", 2); + } + break; + + case '\\': + smart_str_appendl(buf, "\\\\", 2); + break; + + case '/': + if (options & PHP_JSON_UNESCAPED_SLASHES) { + smart_str_appendc(buf, '/'); + } else { + smart_str_appendl(buf, "\\/", 2); + } + break; + + case '<': + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003C", 6); + } else { + smart_str_appendc(buf, '<'); + } + break; + + case '>': + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003E", 6); + } else { + smart_str_appendc(buf, '>'); + } + break; + + case '&': + if (options & PHP_JSON_HEX_AMP) { + smart_str_appendl(buf, "\\u0026", 6); + } else { + smart_str_appendc(buf, '&'); + } + break; + + case '\'': + if (options & PHP_JSON_HEX_APOS) { + smart_str_appendl(buf, "\\u0027", 6); + } else { + smart_str_appendc(buf, '\''); + } + break; + + default: + return false; + } + + return true; +} + zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ @@ -408,21 +474,79 @@ zend_result php_json_escape_string( 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; + while (len >= sizeof(__m128i)) { + const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); + const __m128i input_range = _mm_cmpgt_epi8(input, _mm_set1_epi8(31)); + + int input_range_mask = _mm_movemask_epi8(input_range); + if (input_range_mask != 0xffff) { + int shift = __builtin_clz(~input_range_mask); + pos += shift; + len -= shift; + break; + } + +#if 0 + const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8(34)); + const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8(38)); + const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8(39)); + const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8(47)); + const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8(60)); + const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8(62)); + const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8(92)); + + const __m128i result_34_38 = _mm_or_si128(result_34, result_38); + const __m128i result_39_47 = _mm_or_si128(result_39, result_47); + const __m128i result_60_62 = _mm_or_si128(result_60, result_62); + + const __m128i result_34_38_39_47 = _mm_or_si128(result_34_38, result_39_47); + const __m128i result_60_62_92 = _mm_or_si128(result_60_62, result_92); + + const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); + int mask = _mm_movemask_epi8(result_individual_bytes); +#else + const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8(34, 38, 39, 47, 60, 62, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + int mask = _mm_cvtsi128_si32(result_individual_bytes); +#endif + int acc = 0; + if (mask != 0) { + do { + int toggle = mask & -mask; + int bit = __builtin_ctz(mask); + mask ^= toggle; + + int len = bit - acc; + smart_str_appendl(buf, s, len + pos); + + acc += len + 1; + pos += len; + us = (unsigned char) s[pos++]; + s += pos; + pos = 0; + + bool handled = php_json_printable_ascii_escape(buf, us, options); + ZEND_ASSERT(handled == true); + } while (mask != 0); + } + + len -= sizeof(__m128i); + pos += sizeof(__m128i) - acc; + } + + if (!len) { + break; + } + us = (unsigned char)s[pos]; if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { pos++; len--; - if (len == 0) { - smart_str_appendl(buf, s, pos); - break; - } } else { if (pos) { smart_str_appendl(buf, s, pos); s += pos; pos = 0; } - us = (unsigned char)s[0]; if (UNEXPECTED(us >= 0x80)) { zend_result status; us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); @@ -485,26 +609,6 @@ zend_result php_json_escape_string( } else { s++; switch (us) { - case '"': - if (options & PHP_JSON_HEX_QUOT) { - smart_str_appendl(buf, "\\u0022", 6); - } else { - smart_str_appendl(buf, "\\\"", 2); - } - break; - - case '\\': - smart_str_appendl(buf, "\\\\", 2); - break; - - case '/': - if (options & PHP_JSON_UNESCAPED_SLASHES) { - smart_str_appendc(buf, '/'); - } else { - smart_str_appendl(buf, "\\/", 2); - } - break; - case '\b': smart_str_appendl(buf, "\\b", 2); break; @@ -525,47 +629,17 @@ zend_result php_json_escape_string( smart_str_appendl(buf, "\\t", 2); break; - case '<': - if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003C", 6); - } else { - smart_str_appendc(buf, '<'); - } - break; - - case '>': - if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003E", 6); - } else { - smart_str_appendc(buf, '>'); - } - break; - - case '&': - if (options & PHP_JSON_HEX_AMP) { - smart_str_appendl(buf, "\\u0026", 6); - } else { - smart_str_appendc(buf, '&'); - } - break; - - case '\'': - if (options & PHP_JSON_HEX_APOS) { - smart_str_appendl(buf, "\\u0027", 6); - } else { - smart_str_appendc(buf, '\''); - } - break; - default: - ZEND_ASSERT(us < ' '); - dst = smart_str_extend(buf, 6); - dst[0] = '\\'; - dst[1] = 'u'; - dst[2] = '0'; - dst[3] = '0'; - dst[4] = digits[(us >> 4) & 0xf]; - dst[5] = digits[us & 0xf]; + if (!php_json_printable_ascii_escape(buf, us, options)) { + ZEND_ASSERT(us < ' '); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = '0'; + dst[3] = '0'; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; + } break; } len--; @@ -573,6 +647,10 @@ zend_result php_json_escape_string( } } while (len); + if (pos) { + smart_str_appendl(buf, s, pos); + } + smart_str_appendc(buf, '"'); return SUCCESS; From 144b0e1675b26ee403f0c2ca459385695fb9401e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:41:27 +0100 Subject: [PATCH 02/46] shift opt --- ext/json/json_encoder.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index c7bf460e9bd61..fa4449f2b6f11 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -511,16 +511,14 @@ zend_result php_json_escape_string( int acc = 0; if (mask != 0) { do { - int toggle = mask & -mask; - int bit = __builtin_ctz(mask); - mask ^= toggle; + int len = __builtin_ctz(mask); + mask >>= len + 1; - int len = bit - acc; smart_str_appendl(buf, s, len + pos); acc += len + 1; pos += len; - us = (unsigned char) s[pos++]; + us = (unsigned char)s[pos++]; s += pos; pos = 0; From 7d485a9b27e8055aba96820b6a58b01bb06409de Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:52:20 +0100 Subject: [PATCH 03/46] Get rid of acc --- ext/json/json_encoder.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index fa4449f2b6f11..8acf57da2da20 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -508,27 +508,31 @@ zend_result php_json_escape_string( const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8(34, 38, 39, 47, 60, 62, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); int mask = _mm_cvtsi128_si32(result_individual_bytes); #endif - int acc = 0; if (mask != 0) { + int shift = __builtin_clz(mask) - 16; do { + /* Note that we shift the input forward, so we have to shift the mask as well, + * beyond the to-be-escaped character */ int len = __builtin_ctz(mask); mask >>= len + 1; smart_str_appendl(buf, s, len + pos); - acc += len + 1; pos += len; - us = (unsigned char)s[pos++]; - s += pos; + us = (unsigned char)s[pos]; + s += pos + 1; /* skip 'us' */ pos = 0; bool handled = php_json_printable_ascii_escape(buf, us, options); ZEND_ASSERT(handled == true); } while (mask != 0); + + pos += shift; + } else { + pos += sizeof(__m128i); } len -= sizeof(__m128i); - pos += sizeof(__m128i) - acc; } if (!len) { From 58f30ff1ee29bedd38ce1680b02fb126ab33ec20 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:56:54 +0100 Subject: [PATCH 04/46] SSE2 guard --- ext/json/json_encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 8acf57da2da20..9bc75560e5d60 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -474,6 +474,7 @@ zend_result php_json_escape_string( 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; +#ifdef __SSE2__ while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); const __m128i input_range = _mm_cmpgt_epi8(input, _mm_set1_epi8(31)); @@ -538,6 +539,7 @@ zend_result php_json_escape_string( if (!len) { break; } +#endif us = (unsigned char)s[pos]; if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { From 6a010584e5438b324df57602ba1bd15bfad98a71 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:58:48 +0100 Subject: [PATCH 05/46] use ascii --- ext/json/json_encoder.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 9bc75560e5d60..19d823d2eba09 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -487,14 +487,14 @@ zend_result php_json_escape_string( break; } -#if 0 - const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8(34)); - const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8(38)); - const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8(39)); - const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8(47)); - const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8(60)); - const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8(62)); - const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8(92)); +#if 1 + const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); + const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); + const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\'')); + const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8('/')); + const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8('<')); + const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8('>')); + const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\\')); const __m128i result_34_38 = _mm_or_si128(result_34, result_38); const __m128i result_39_47 = _mm_or_si128(result_39, result_47); @@ -506,7 +506,7 @@ zend_result php_json_escape_string( const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); int mask = _mm_movemask_epi8(result_individual_bytes); #else - const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8(34, 38, 39, 47, 60, 62, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8('"', '&', '\'', '/', '<', '>', '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); int mask = _mm_cvtsi128_si32(result_individual_bytes); #endif if (mask != 0) { From 55a0b0e1b568bde39d3a56a35847f5dc78839793 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:05:11 +0100 Subject: [PATCH 06/46] dynamic mask --- ext/json/json_encoder.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 19d823d2eba09..45f99a8d76708 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -432,6 +432,20 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u return true; } +#ifdef __SSE2__ +// TODO: may be unused +static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) +{ + const char sentinel = 1; /* outside of the printable range, so no false matches are possible */ + const char amp = (options & PHP_JSON_HEX_AMP) ? '&' : sentinel; + const char apos = (options & PHP_JSON_HEX_APOS) ? '\'' : sentinel; + const char slash = !(options & PHP_JSON_UNESCAPED_SLASHES) ? '/' : sentinel; + const char tag1 = (options & PHP_JSON_HEX_TAG) ? '<' : sentinel; + const char tag2 = (options & PHP_JSON_HEX_TAG) ? '>' : sentinel; + return _mm_setr_epi8('"', amp, apos, slash, tag1, tag2, '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0); +} +#endif + zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ @@ -469,6 +483,11 @@ zend_result php_json_escape_string( pos = 0; +#ifdef __SSE2__ + const __m128i sse_escape_mask = php_json_create_sse_escape_mask(options); + (void) sse_escape_mask; +#endif + do { static const uint32_t charmap[8] = { 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, @@ -487,7 +506,7 @@ zend_result php_json_escape_string( break; } -#if 1 +#if 0 const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\'')); @@ -506,7 +525,7 @@ zend_result php_json_escape_string( const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); int mask = _mm_movemask_epi8(result_individual_bytes); #else - const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8('"', '&', '\'', '/', '<', '>', '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const __m128i result_individual_bytes = _mm_cmpistrm(sse_escape_mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); int mask = _mm_cvtsi128_si32(result_individual_bytes); #endif if (mask != 0) { From 2a2008ef690d2181e6a546d25369077771b56944 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:06:49 +0100 Subject: [PATCH 07/46] comment --- ext/json/json_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 45f99a8d76708..4eda96778f84a 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -540,7 +540,7 @@ zend_result php_json_escape_string( pos += len; us = (unsigned char)s[pos]; - s += pos + 1; /* skip 'us' */ + s += pos + 1; /* skip 'us' too */ pos = 0; bool handled = php_json_printable_ascii_escape(buf, us, options); From 7c966a6f3439bb2d2222ac0d91dc0f93639fccaf Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:38:03 +0100 Subject: [PATCH 08/46] wip --- ext/json/json_encoder.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 4eda96778f84a..fc57e6a6f39ef 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -492,15 +492,22 @@ zend_result php_json_escape_string( static const uint32_t charmap[8] = { 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; - +// printf("pos %d\n", pos); #ifdef __SSE2__ while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); - const __m128i input_range = _mm_cmpgt_epi8(input, _mm_set1_epi8(31)); + const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); + + //const uint8_t *raw = (const uint8_t *) &input_range; + //for (int i =0;i<16;i++) { + // printf("%x ", raw[i]); + //} + //printf("\n"); + // TODO: problem if the first UTF-8 char comes before the first escape char int input_range_mask = _mm_movemask_epi8(input_range); - if (input_range_mask != 0xffff) { - int shift = __builtin_clz(~input_range_mask); + if (input_range_mask != 0) { + int shift = __builtin_ctz(input_range_mask); pos += shift; len -= shift; break; From 65f3b7e10c347aa611e76c0d9805a6903700e32d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 21:58:52 +0100 Subject: [PATCH 09/46] wip --- ext/json/json_encoder.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index fc57e6a6f39ef..9af8a735963db 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -492,7 +492,7 @@ zend_result php_json_escape_string( static const uint32_t charmap[8] = { 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; -// printf("pos %d\n", pos); + #ifdef __SSE2__ while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); @@ -505,11 +505,12 @@ zend_result php_json_escape_string( //printf("\n"); // TODO: problem if the first UTF-8 char comes before the first escape char + // and getting this right+performant is hard, so for now we just don't shift. int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - int shift = __builtin_ctz(input_range_mask); - pos += shift; - len -= shift; + //int shift = __builtin_ctz(input_range_mask); + //pos += shift; + //len -= shift; break; } From 124396a1ed7b03d910d863e8d84e36f6035e3b11 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:08:40 +0100 Subject: [PATCH 10/46] potential solution --- ext/json/json_encoder.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 9af8a735963db..61388f7ddc1b4 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -504,14 +504,11 @@ zend_result php_json_escape_string( //} //printf("\n"); - // TODO: problem if the first UTF-8 char comes before the first escape char - // and getting this right+performant is hard, so for now we just don't shift. + int max_shift = 16; + int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - //int shift = __builtin_ctz(input_range_mask); - //pos += shift; - //len -= shift; - break; + max_shift = __builtin_ctz(input_range_mask); } #if 0 @@ -537,7 +534,13 @@ zend_result php_json_escape_string( int mask = _mm_cvtsi128_si32(result_individual_bytes); #endif if (mask != 0) { - int shift = __builtin_clz(mask) - 16; + if (max_shift < 16) { + int shift = __builtin_ctz(mask); /* first offending character */ + pos += MIN(max_shift, shift); + len -= MIN(max_shift, shift); + break; + } + int shift = __builtin_clz(mask) - 16; /* skips over everything */ do { /* Note that we shift the input forward, so we have to shift the mask as well, * beyond the to-be-escaped character */ @@ -557,6 +560,11 @@ zend_result php_json_escape_string( pos += shift; } else { + if (max_shift < 16) { + pos += max_shift; + len -= max_shift; + break; + } pos += sizeof(__m128i); } From 182616153cbf1cfa46c6ae51c03b2e618b4628c0 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:11:07 +0100 Subject: [PATCH 11/46] remove some debug --- ext/json/json_encoder.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 61388f7ddc1b4..5386718f541d6 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -498,12 +498,6 @@ zend_result php_json_escape_string( const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); - //const uint8_t *raw = (const uint8_t *) &input_range; - //for (int i =0;i<16;i++) { - // printf("%x ", raw[i]); - //} - //printf("\n"); - int max_shift = 16; int input_range_mask = _mm_movemask_epi8(input_range); From 5a2c0347d76b19040052622c6adb4206c735a7c8 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:12:38 +0100 Subject: [PATCH 12/46] correct ifdefs, without resolver support --- ext/json/json_encoder.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 5386718f541d6..cea263739f251 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -30,9 +30,9 @@ #include "zend_property_hooks.h" #include "zend_lazy_objects.h" -#include -# pragma GCC push_options -# pragma GCC target ("sse4.2") +#ifdef ZEND_INTRIN_SSE4_2_NATIVE +# include +#endif static const char digits[] = "0123456789abcdef"; @@ -432,8 +432,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u return true; } -#ifdef __SSE2__ -// TODO: may be unused +#ifdef ZEND_INTRIN_SSE4_2_NATIVE static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) { const char sentinel = 1; /* outside of the printable range, so no false matches are possible */ @@ -483,9 +482,8 @@ zend_result php_json_escape_string( pos = 0; -#ifdef __SSE2__ +#ifdef ZEND_INTRIN_SSE4_2_NATIVE const __m128i sse_escape_mask = php_json_create_sse_escape_mask(options); - (void) sse_escape_mask; #endif do { @@ -505,7 +503,10 @@ zend_result php_json_escape_string( max_shift = __builtin_ctz(input_range_mask); } -#if 0 +#ifdef ZEND_INTRIN_SSE4_2_NATIVE /* TODO: resolver support */ + const __m128i result_individual_bytes = _mm_cmpistrm(sse_escape_mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + int mask = _mm_cvtsi128_si32(result_individual_bytes); +#else const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\'')); @@ -523,9 +524,6 @@ zend_result php_json_escape_string( const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); int mask = _mm_movemask_epi8(result_individual_bytes); -#else - const __m128i result_individual_bytes = _mm_cmpistrm(sse_escape_mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - int mask = _mm_cvtsi128_si32(result_individual_bytes); #endif if (mask != 0) { if (max_shift < 16) { From 326b982c5bc4c84194eac60e7ccd201c3c28946e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:17:16 +0100 Subject: [PATCH 13/46] Attempt to use standard bitset stuff --- ext/json/json_encoder.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index cea263739f251..0df910b704133 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -29,6 +29,7 @@ #include "zend_enum.h" #include "zend_property_hooks.h" #include "zend_lazy_objects.h" +#include "zend_bitset.h" #ifdef ZEND_INTRIN_SSE4_2_NATIVE # include @@ -500,7 +501,7 @@ zend_result php_json_escape_string( int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - max_shift = __builtin_ctz(input_range_mask); + max_shift = zend_ulong_ntz(input_range_mask); } #ifdef ZEND_INTRIN_SSE4_2_NATIVE /* TODO: resolver support */ @@ -527,16 +528,16 @@ zend_result php_json_escape_string( #endif if (mask != 0) { if (max_shift < 16) { - int shift = __builtin_ctz(mask); /* first offending character */ + int shift = zend_ulong_ntz(mask); /* first offending character */ pos += MIN(max_shift, shift); len -= MIN(max_shift, shift); break; } - int shift = __builtin_clz(mask) - 16; /* skips over everything */ + int shift = zend_ulong_nlz(mask) - 16 - (SIZEOF_ZEND_LONG == 8 ? 32 : 0); /* skips over everything */ do { /* Note that we shift the input forward, so we have to shift the mask as well, * beyond the to-be-escaped character */ - int len = __builtin_ctz(mask); + int len = zend_ulong_ntz(mask); mask >>= len + 1; smart_str_appendl(buf, s, len + pos); From db54e3f32721ffa60a1f55a28871d563387fa991 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:34:09 +0100 Subject: [PATCH 14/46] preliminary resolver support (needs more work) --- ext/json/json_encoder.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 0df910b704133..049be6ef26c4d 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -31,9 +31,12 @@ #include "zend_lazy_objects.h" #include "zend_bitset.h" -#ifdef ZEND_INTRIN_SSE4_2_NATIVE +#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) # include #endif +#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO +# include "zend_cpuinfo.h" +#endif static const char digits[] = "0123456789abcdef"; @@ -433,7 +436,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u return true; } -#ifdef ZEND_INTRIN_SSE4_2_NATIVE +#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) { const char sentinel = 1; /* outside of the printable range, so no false matches are possible */ @@ -446,6 +449,28 @@ static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) } #endif +#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO +static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("resolve_json_escape_intersection"))); + +typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input); + +ZEND_INTRIN_SSE4_2_FUNC_DECL(int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input)); +zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input) +{ + const __m128i result_individual_bytes = _mm_cmpistrm(mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + return _mm_cvtsi128_si32(result_individual_bytes); +} + +ZEND_NO_SANITIZE_ADDRESS +ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ +static php_json_compute_escape_intersection_t resolve_json_escape_intersection(void) { + if (zend_cpu_supports_sse42()) { + return php_json_sse42_compute_escape_intersection_real; + } + return NULL/*php_json_sse42_compute_escape_intersection_fallback*/; // TODO: implement this fallback +} +#endif + zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ @@ -483,7 +508,7 @@ zend_result php_json_escape_string( pos = 0; -#ifdef ZEND_INTRIN_SSE4_2_NATIVE +#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) const __m128i sse_escape_mask = php_json_create_sse_escape_mask(options); #endif @@ -504,9 +529,10 @@ zend_result php_json_escape_string( max_shift = zend_ulong_ntz(input_range_mask); } -#ifdef ZEND_INTRIN_SSE4_2_NATIVE /* TODO: resolver support */ - const __m128i result_individual_bytes = _mm_cmpistrm(sse_escape_mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - int mask = _mm_cvtsi128_si32(result_individual_bytes); +#ifdef ZEND_INTRIN_SSE4_2_NATIVE + int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); +#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) + int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); #else const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); From 8bcd6bbc6436e6b9e40eaccb29765d778ffb329c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:34:54 +0100 Subject: [PATCH 15/46] fix native build --- ext/json/json_encoder.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 049be6ef26c4d..db6406dcf85a7 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -447,12 +447,6 @@ static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) const char tag2 = (options & PHP_JSON_HEX_TAG) ? '>' : sentinel; return _mm_setr_epi8('"', amp, apos, slash, tag1, tag2, '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0); } -#endif - -#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO -static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("resolve_json_escape_intersection"))); - -typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input); ZEND_INTRIN_SSE4_2_FUNC_DECL(int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input)); zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input) @@ -460,6 +454,12 @@ zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m const __m128i result_individual_bytes = _mm_cmpistrm(mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); return _mm_cvtsi128_si32(result_individual_bytes); } +#endif + +#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO +static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("resolve_json_escape_intersection"))); + +typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input); ZEND_NO_SANITIZE_ADDRESS ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ From d7f2562e0985fe9b427aa6f5c36ef8cfc12b3964 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 07:55:10 +0100 Subject: [PATCH 16/46] let ci run without max_shift trick to compare perf --- ext/json/json_encoder.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index db6406dcf85a7..52c6174c6491b 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -522,11 +522,9 @@ zend_result php_json_escape_string( const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); - int max_shift = 16; - int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - max_shift = zend_ulong_ntz(input_range_mask); + break; } #ifdef ZEND_INTRIN_SSE4_2_NATIVE @@ -553,12 +551,6 @@ zend_result php_json_escape_string( int mask = _mm_movemask_epi8(result_individual_bytes); #endif if (mask != 0) { - if (max_shift < 16) { - int shift = zend_ulong_ntz(mask); /* first offending character */ - pos += MIN(max_shift, shift); - len -= MIN(max_shift, shift); - break; - } int shift = zend_ulong_nlz(mask) - 16 - (SIZEOF_ZEND_LONG == 8 ? 32 : 0); /* skips over everything */ do { /* Note that we shift the input forward, so we have to shift the mask as well, @@ -579,11 +571,6 @@ zend_result php_json_escape_string( pos += shift; } else { - if (max_shift < 16) { - pos += max_shift; - len -= max_shift; - break; - } pos += sizeof(__m128i); } From 2b11554daf74ebd83722eb786368a2c19a13cf49 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 08:42:09 +0100 Subject: [PATCH 17/46] Revert "let ci run without max_shift trick to compare perf" This reverts commit d7f2562e0985fe9b427aa6f5c36ef8cfc12b3964. --- ext/json/json_encoder.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 52c6174c6491b..db6406dcf85a7 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -522,9 +522,11 @@ zend_result php_json_escape_string( const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); + int max_shift = 16; + int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - break; + max_shift = zend_ulong_ntz(input_range_mask); } #ifdef ZEND_INTRIN_SSE4_2_NATIVE @@ -551,6 +553,12 @@ zend_result php_json_escape_string( int mask = _mm_movemask_epi8(result_individual_bytes); #endif if (mask != 0) { + if (max_shift < 16) { + int shift = zend_ulong_ntz(mask); /* first offending character */ + pos += MIN(max_shift, shift); + len -= MIN(max_shift, shift); + break; + } int shift = zend_ulong_nlz(mask) - 16 - (SIZEOF_ZEND_LONG == 8 ? 32 : 0); /* skips over everything */ do { /* Note that we shift the input forward, so we have to shift the mask as well, @@ -571,6 +579,11 @@ zend_result php_json_escape_string( pos += shift; } else { + if (max_shift < 16) { + pos += max_shift; + len -= max_shift; + break; + } pos += sizeof(__m128i); } From ef72f33a87775a95014457a501b7968253f13c95 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 19:07:31 +0100 Subject: [PATCH 18/46] Reduce overhead of worst case to 1.5x --- ext/json/json_encoder.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index db6406dcf85a7..7bf4cd40f0582 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -520,6 +520,7 @@ zend_result php_json_escape_string( #ifdef __SSE2__ while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); + /* signed compare, so checks for unsigned ASCII >= 0x80 as well */ const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); int max_shift = 16; @@ -559,25 +560,26 @@ zend_result php_json_escape_string( len -= MIN(max_shift, shift); break; } + int shift = zend_ulong_nlz(mask) - 16 - (SIZEOF_ZEND_LONG == 8 ? 32 : 0); /* skips over everything */ + smart_str_appendl(buf, s, pos); + s += pos; + pos = shift; + do { /* Note that we shift the input forward, so we have to shift the mask as well, * beyond the to-be-escaped character */ int len = zend_ulong_ntz(mask); mask >>= len + 1; - smart_str_appendl(buf, s, len + pos); + smart_str_appendl(buf, s, len); - pos += len; - us = (unsigned char)s[pos]; - s += pos + 1; /* skip 'us' too */ - pos = 0; + us = (unsigned char)s[len]; + s += len + 1; /* skip 'us' too */ bool handled = php_json_printable_ascii_escape(buf, us, options); ZEND_ASSERT(handled == true); } while (mask != 0); - - pos += shift; } else { if (max_shift < 16) { pos += max_shift; From e3baa233d74ed772685bf7785e4ad9624fd5acdc Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 20:33:18 +0100 Subject: [PATCH 19/46] wip1 --- ext/json/json_encoder.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 7bf4cd40f0582..4e1ffa4c38da4 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -602,17 +602,23 @@ zend_result php_json_escape_string( pos++; len--; } else { - if (pos) { - smart_str_appendl(buf, s, pos); - s += pos; - pos = 0; - } if (UNEXPECTED(us >= 0x80)) { zend_result status; - us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); + size_t pos_old = pos; + const char *cur = s+pos; + pos = 0; + us = php_next_utf8_char((unsigned char *)cur, len, &pos, &status); + pos += pos_old; + len -= pos - pos_old; /* check whether UTF8 character is correct */ if (UNEXPECTED(status != SUCCESS)) { + if (pos_old && (options & (PHP_JSON_INVALID_UTF8_IGNORE|PHP_JSON_INVALID_UTF8_SUBSTITUTE))) { + smart_str_appendl(buf, s, pos_old); + s += pos; + pos = 0; + } + if (options & PHP_JSON_INVALID_UTF8_IGNORE) { /* ignore invalid UTF8 character */ } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { @@ -637,8 +643,14 @@ zend_result php_json_escape_string( } else if ((options & PHP_JSON_UNESCAPED_UNICODE) && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) || us < 0x2028 || us > 0x2029)) { - smart_str_appendl(buf, s, pos); + /* No need to emit any bytes, just move the cursor. */ } else { + if (pos_old) { + smart_str_appendl(buf, s, pos_old); + } + s += pos; + pos = 0; + /* From http://en.wikipedia.org/wiki/UTF16 */ if (us >= 0x10000) { unsigned int next_us; @@ -663,10 +675,12 @@ zend_result php_json_escape_string( dst[4] = digits[(us >> 4) & 0xf]; dst[5] = digits[us & 0xf]; } - s += pos; - len -= pos; - pos = 0; } else { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } s++; switch (us) { case '\b': From 3c8b68e70b6184243977b2a3418a349100f4e825 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 21:14:16 +0100 Subject: [PATCH 20/46] cheaper pos compute --- ext/json/json_encoder.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 4e1ffa4c38da4..355129d87d46f 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -561,10 +561,9 @@ zend_result php_json_escape_string( break; } - int shift = zend_ulong_nlz(mask) - 16 - (SIZEOF_ZEND_LONG == 8 ? 32 : 0); /* skips over everything */ smart_str_appendl(buf, s, pos); s += pos; - pos = shift; + const char *s_backup = s; do { /* Note that we shift the input forward, so we have to shift the mask as well, @@ -580,6 +579,8 @@ zend_result php_json_escape_string( bool handled = php_json_printable_ascii_escape(buf, us, options); ZEND_ASSERT(handled == true); } while (mask != 0); + + pos = 16 - (s - s_backup); } else { if (max_shift < 16) { pos += max_shift; From 4d16463d2169ff025aac8e4bd1ba9b1b45181d58 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 21:16:36 +0100 Subject: [PATCH 21/46] no magic nrs --- ext/json/json_encoder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 355129d87d46f..c72b81fb31991 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -523,7 +523,7 @@ zend_result php_json_escape_string( /* signed compare, so checks for unsigned ASCII >= 0x80 as well */ const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); - int max_shift = 16; + int max_shift = sizeof(__m128i); int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { @@ -554,7 +554,7 @@ zend_result php_json_escape_string( int mask = _mm_movemask_epi8(result_individual_bytes); #endif if (mask != 0) { - if (max_shift < 16) { + if (max_shift < sizeof(__m128i)) { int shift = zend_ulong_ntz(mask); /* first offending character */ pos += MIN(max_shift, shift); len -= MIN(max_shift, shift); @@ -580,9 +580,9 @@ zend_result php_json_escape_string( ZEND_ASSERT(handled == true); } while (mask != 0); - pos = 16 - (s - s_backup); + pos = sizeof(__m128i) - (s - s_backup); } else { - if (max_shift < 16) { + if (max_shift < sizeof(__m128i)) { pos += max_shift; len -= max_shift; break; From 27a89e0844b755419f911f2e29a9f783cf8e097b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 4 Feb 2025 21:23:47 +0100 Subject: [PATCH 22/46] simple heuristic --- ext/json/json_encoder.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index c72b81fb31991..06f6a711a9e18 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -528,6 +528,10 @@ zend_result php_json_escape_string( int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { max_shift = zend_ulong_ntz(input_range_mask); + if (max_shift <= 1) { + /* not worth it */ + break; + } } #ifdef ZEND_INTRIN_SSE4_2_NATIVE From b071dbad7f6a4482d8256e1cb759982c1f46b482 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 19:06:14 +0100 Subject: [PATCH 23/46] various small improvements --- ext/json/json_encoder.c | 144 ++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 51 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 06f6a711a9e18..d78a909456f10 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -374,32 +374,43 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, } /* }}} */ +/* Outlined smart_str_appendl() to avoid performance loss due to code bloat */ +// TODO: now I don't outline it anymore... +static void php_json_append(smart_str *dest, const char *src, size_t len) +{ + /* smart_str has a minimum size of the input length, + * this avoids generating initial allocation code */ + ZEND_ASSERT(dest->s); + + smart_str_appendl(dest, src, len); +} + static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, unsigned char us, int options) { switch (us) { case '"': if (options & PHP_JSON_HEX_QUOT) { - smart_str_appendl(buf, "\\u0022", 6); + php_json_append(buf, "\\u0022", 6); } else { - smart_str_appendl(buf, "\\\"", 2); + php_json_append(buf, "\\\"", 2); } break; case '\\': - smart_str_appendl(buf, "\\\\", 2); + php_json_append(buf, "\\\\", 2); break; case '/': if (options & PHP_JSON_UNESCAPED_SLASHES) { smart_str_appendc(buf, '/'); } else { - smart_str_appendl(buf, "\\/", 2); + php_json_append(buf, "\\/", 2); } break; case '<': if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003C", 6); + php_json_append(buf, "\\u003C", 6); } else { smart_str_appendc(buf, '<'); } @@ -407,7 +418,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u case '>': if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003E", 6); + php_json_append(buf, "\\u003E", 6); } else { smart_str_appendc(buf, '>'); } @@ -415,7 +426,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u case '&': if (options & PHP_JSON_HEX_AMP) { - smart_str_appendl(buf, "\\u0026", 6); + php_json_append(buf, "\\u0026", 6); } else { smart_str_appendc(buf, '&'); } @@ -423,7 +434,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u case '\'': if (options & PHP_JSON_HEX_APOS) { - smart_str_appendl(buf, "\\u0027", 6); + php_json_append(buf, "\\u0027", 6); } else { smart_str_appendc(buf, '\''); } @@ -436,16 +447,63 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u return true; } +#ifdef __SSE2__ +static zend_always_inline int php_json_sse2_compute_escape_intersection(const __m128i mask, const __m128i input) +{ + (void) mask; + + const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); + const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); + const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\'')); + const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8('/')); + const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8('<')); + const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8('>')); + const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\\')); + + const __m128i result_34_38 = _mm_or_si128(result_34, result_38); + const __m128i result_39_47 = _mm_or_si128(result_39, result_47); + const __m128i result_60_62 = _mm_or_si128(result_60, result_62); + + const __m128i result_34_38_39_47 = _mm_or_si128(result_34_38, result_39_47); + const __m128i result_60_62_92 = _mm_or_si128(result_60_62, result_92); + + const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); + return _mm_movemask_epi8(result_individual_bytes); +} +#endif + #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) +static const char php_json_escape_noslashes_lut[2][8][16] = { + /* !PHP_JSON_UNESCAPED_SLASHES */ + { + [0] = {'"', '\\', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP] = {'"', '\\', '&', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_APOS] = {'"', '\\', '\'', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS] = {'"', '\\', '&', '\'', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_TAG] = {'"', '\\', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '\'', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '\'', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + + /* PHP_JSON_UNESCAPED_SLASHES */ + { + [0] = {'"', '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP] = {'"', '\\', '&', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_APOS] = {'"', '\\', '\'', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS] = {'"', '\\', '&', '\'', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_TAG] = {'"', '\\', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '\'', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + [PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '\'', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + } +}; + static zend_always_inline __m128i php_json_create_sse_escape_mask(int options) { - const char sentinel = 1; /* outside of the printable range, so no false matches are possible */ - const char amp = (options & PHP_JSON_HEX_AMP) ? '&' : sentinel; - const char apos = (options & PHP_JSON_HEX_APOS) ? '\'' : sentinel; - const char slash = !(options & PHP_JSON_UNESCAPED_SLASHES) ? '/' : sentinel; - const char tag1 = (options & PHP_JSON_HEX_TAG) ? '<' : sentinel; - const char tag2 = (options & PHP_JSON_HEX_TAG) ? '>' : sentinel; - return _mm_setr_epi8('"', amp, apos, slash, tag1, tag2, '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0); + const int slashes = (options & PHP_JSON_UNESCAPED_SLASHES) ? 1 : 0; + const int masked = options & (PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG); + return *(const __m128i *) &php_json_escape_noslashes_lut[slashes][masked]; } ZEND_INTRIN_SSE4_2_FUNC_DECL(int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input)); @@ -467,7 +525,7 @@ static php_json_compute_escape_intersection_t resolve_json_escape_intersection(v if (zend_cpu_supports_sse42()) { return php_json_sse42_compute_escape_intersection_real; } - return NULL/*php_json_sse42_compute_escape_intersection_fallback*/; // TODO: implement this fallback + return php_json_sse2_compute_escape_intersection; } #endif @@ -500,10 +558,10 @@ zend_result php_json_escape_string( } } - checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; /* pre-allocate for string length plus 2 quotes */ smart_str_alloc(buf, len+2, 0); + checkpoint = ZSTR_LEN(buf->s); smart_str_appendc(buf, '"'); pos = 0; @@ -520,7 +578,7 @@ zend_result php_json_escape_string( #ifdef __SSE2__ while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); - /* signed compare, so checks for unsigned ASCII >= 0x80 as well */ + /* signed compare, so checks for unsigned bytes >= 0x80 as well */ const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); int max_shift = sizeof(__m128i); @@ -539,23 +597,7 @@ zend_result php_json_escape_string( #elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); #else - const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"')); - const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&')); - const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\'')); - const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8('/')); - const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8('<')); - const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8('>')); - const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\\')); - - const __m128i result_34_38 = _mm_or_si128(result_34, result_38); - const __m128i result_39_47 = _mm_or_si128(result_39, result_47); - const __m128i result_60_62 = _mm_or_si128(result_60, result_62); - - const __m128i result_34_38_39_47 = _mm_or_si128(result_34_38, result_39_47); - const __m128i result_60_62_92 = _mm_or_si128(result_60_62, result_92); - - const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92); - int mask = _mm_movemask_epi8(result_individual_bytes); + int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); #endif if (mask != 0) { if (max_shift < sizeof(__m128i)) { @@ -565,7 +607,7 @@ zend_result php_json_escape_string( break; } - smart_str_appendl(buf, s, pos); + php_json_append(buf, s, pos); s += pos; const char *s_backup = s; @@ -575,7 +617,7 @@ zend_result php_json_escape_string( int len = zend_ulong_ntz(mask); mask >>= len + 1; - smart_str_appendl(buf, s, len); + php_json_append(buf, s, len); us = (unsigned char)s[len]; s += len + 1; /* skip 'us' too */ @@ -610,16 +652,16 @@ zend_result php_json_escape_string( if (UNEXPECTED(us >= 0x80)) { zend_result status; size_t pos_old = pos; - const char *cur = s+pos; + const char *cur = s + pos; pos = 0; us = php_next_utf8_char((unsigned char *)cur, len, &pos, &status); pos += pos_old; len -= pos - pos_old; /* check whether UTF8 character is correct */ - if (UNEXPECTED(status != SUCCESS)) { + if (UNEXPECTED(!us)) { if (pos_old && (options & (PHP_JSON_INVALID_UTF8_IGNORE|PHP_JSON_INVALID_UTF8_SUBSTITUTE))) { - smart_str_appendl(buf, s, pos_old); + php_json_append(buf, s, pos_old); s += pos; pos = 0; } @@ -629,15 +671,15 @@ zend_result php_json_escape_string( } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */ if (options & PHP_JSON_UNESCAPED_UNICODE) { - smart_str_appendl(buf, "\xef\xbf\xbd", 3); + php_json_append(buf, "\xef\xbf\xbd", 3); } else { - smart_str_appendl(buf, "\\ufffd", 6); + php_json_append(buf, "\\ufffd", 6); } } else { ZSTR_LEN(buf->s) = checkpoint; encoder->error_code = PHP_JSON_ERROR_UTF8; if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { - smart_str_appendl(buf, "null", 4); + php_json_append(buf, "null", 4); } return FAILURE; } @@ -651,7 +693,7 @@ zend_result php_json_escape_string( /* No need to emit any bytes, just move the cursor. */ } else { if (pos_old) { - smart_str_appendl(buf, s, pos_old); + php_json_append(buf, s, pos_old); } s += pos; pos = 0; @@ -682,30 +724,30 @@ zend_result php_json_escape_string( } } else { if (pos) { - smart_str_appendl(buf, s, pos); + php_json_append(buf, s, pos); s += pos; pos = 0; } s++; switch (us) { case '\b': - smart_str_appendl(buf, "\\b", 2); + php_json_append(buf, "\\b", 2); break; case '\f': - smart_str_appendl(buf, "\\f", 2); + php_json_append(buf, "\\f", 2); break; case '\n': - smart_str_appendl(buf, "\\n", 2); + php_json_append(buf, "\\n", 2); break; case '\r': - smart_str_appendl(buf, "\\r", 2); + php_json_append(buf, "\\r", 2); break; case '\t': - smart_str_appendl(buf, "\\t", 2); + php_json_append(buf, "\\t", 2); break; default: @@ -727,7 +769,7 @@ zend_result php_json_escape_string( } while (len); if (pos) { - smart_str_appendl(buf, s, pos); + php_json_append(buf, s, pos); } smart_str_appendc(buf, '"'); From 2ae769e2d4b36fe66533892e7bfdd743be508075 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 19:36:02 +0100 Subject: [PATCH 24/46] save ci resources --- .github/workflows/push.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index b92a55b58b87d..fa0e1c21c0dc6 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -137,7 +137,7 @@ jobs: if: ${{ !matrix.asan }} uses: ./.github/actions/verify-generated-files LINUX_X32: - if: github.repository == 'php/php-src' || github.event_name == 'pull_request' + if: false name: LINUX_X32_DEBUG_ZTS runs-on: ubuntu-latest timeout-minutes: 50 @@ -193,7 +193,7 @@ jobs: -d zend_extension=opcache.so -d opcache.enable_cli=1 MACOS_DEBUG_NTS: - if: github.repository == 'php/php-src' || github.event_name == 'pull_request' + if: false strategy: fail-fast: false matrix: @@ -234,7 +234,7 @@ jobs: - name: Verify generated files are up to date uses: ./.github/actions/verify-generated-files WINDOWS: - if: github.repository == 'php/php-src' || github.event_name == 'pull_request' + if: false name: WINDOWS_X64_ZTS runs-on: windows-2022 timeout-minutes: 50 From 10bd63ad1b7ba4f1765046f3cc9cf7a40833d22b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 19:36:27 +0100 Subject: [PATCH 25/46] test with always inline --- ext/json/json_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index d78a909456f10..de9e12d269c09 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -376,7 +376,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, /* Outlined smart_str_appendl() to avoid performance loss due to code bloat */ // TODO: now I don't outline it anymore... -static void php_json_append(smart_str *dest, const char *src, size_t len) +static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len) { /* smart_str has a minimum size of the input length, * this avoids generating initial allocation code */ From 5df25a45ca6c34c6e7fdb8dc5ea267244f11fcee Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:21:17 +0100 Subject: [PATCH 26/46] tweak --- ext/json/json_encoder.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index de9e12d269c09..2cf67b38815ee 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -374,8 +374,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, } /* }}} */ -/* Outlined smart_str_appendl() to avoid performance loss due to code bloat */ -// TODO: now I don't outline it anymore... +/* Specialization of smart_str_appendl() to avoid performance loss due to code bloat */ static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len) { /* smart_str has a minimum size of the input length, @@ -387,6 +386,8 @@ static zend_always_inline void php_json_append(smart_str *dest, const char *src, static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, unsigned char us, int options) { + ZEND_ASSERT(buf->s); + switch (us) { case '"': if (options & PHP_JSON_HEX_QUOT) { From d5c5b9fab0f924cf9bf933101a7edf181a40fffb Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 21:28:04 +0100 Subject: [PATCH 27/46] code layout trick (vtune dsb improvement) --- ext/json/json_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 2cf67b38815ee..8e85971050d2c 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -601,7 +601,7 @@ zend_result php_json_escape_string( int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); #endif if (mask != 0) { - if (max_shift < sizeof(__m128i)) { + if (UNEXPECTED(max_shift < sizeof(__m128i))) { int shift = zend_ulong_ntz(mask); /* first offending character */ pos += MIN(max_shift, shift); len -= MIN(max_shift, shift); From ceb8443871c2fbfa74f83ff4421b641395007201 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 23:32:47 +0100 Subject: [PATCH 28/46] skip extra check --- ext/json/json_encoder.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 8e85971050d2c..c0ccb237dbbb3 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -587,9 +587,10 @@ zend_result php_json_escape_string( int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { max_shift = zend_ulong_ntz(input_range_mask); - if (max_shift <= 1) { + if (UNEXPECTED(max_shift <= 1)) { /* not worth it */ - break; + us = (unsigned char)s[pos]; + goto fallback; } } @@ -650,6 +651,7 @@ zend_result php_json_escape_string( pos++; len--; } else { +fallback:; if (UNEXPECTED(us >= 0x80)) { zend_result status; size_t pos_old = pos; From 81efe6bcb8bfceb3cb2e7212c6b1981ad18ab825 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 5 Feb 2025 23:36:08 +0100 Subject: [PATCH 29/46] tweak --- ext/json/json_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index c0ccb237dbbb3..4e200e62335b7 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -586,12 +586,12 @@ zend_result php_json_escape_string( int input_range_mask = _mm_movemask_epi8(input_range); if (input_range_mask != 0) { - max_shift = zend_ulong_ntz(input_range_mask); - if (UNEXPECTED(max_shift <= 1)) { + if (UNEXPECTED(input_range_mask & 1)) { /* not worth it */ us = (unsigned char)s[pos]; goto fallback; } + max_shift = zend_ulong_ntz(input_range_mask); } #ifdef ZEND_INTRIN_SSE4_2_NATIVE From 1d7109d05caf410831fdddba5612b7c6bf5df12c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 20:58:27 +0100 Subject: [PATCH 30/46] abstract away --- ext/json/json_encoder.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 4e200e62335b7..0a7c4b0ec0b07 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -38,6 +38,10 @@ # include "zend_cpuinfo.h" #endif +#ifdef __SSE2__ +# define JSON_USE_SIMD +#endif + static const char digits[] = "0123456789abcdef"; static zend_always_inline bool php_json_check_stack_limit(void) @@ -448,7 +452,7 @@ static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, u return true; } -#ifdef __SSE2__ +#ifdef JSON_USE_SIMD static zend_always_inline int php_json_sse2_compute_escape_intersection(const __m128i mask, const __m128i input) { (void) mask; @@ -576,7 +580,7 @@ zend_result php_json_escape_string( 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; -#ifdef __SSE2__ +#ifdef JSON_USE_SIMD while (len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); /* signed compare, so checks for unsigned bytes >= 0x80 as well */ From 45e91f59c9853ee25e531bd841585aa97125033e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 20:58:32 +0100 Subject: [PATCH 31/46] mark branch --- ext/json/json_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 0a7c4b0ec0b07..c787b763153b1 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -634,7 +634,7 @@ zend_result php_json_escape_string( pos = sizeof(__m128i) - (s - s_backup); } else { - if (max_shift < sizeof(__m128i)) { + if (UNEXPECTED(max_shift < sizeof(__m128i))) { pos += max_shift; len -= max_shift; break; From dfd6de05eb3c1a2ef65db19921f4c265b7967c20 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:03:29 +0100 Subject: [PATCH 32/46] split off --- ext/json/json_encoder.c | 148 ++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 68 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index c787b763153b1..7f9e2d3012f10 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -526,7 +526,7 @@ typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const ZEND_NO_SANITIZE_ADDRESS ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ -static php_json_compute_escape_intersection_t resolve_json_escape_intersection(void) { +static php_json_compute_escape_intersection_t resolve_json_escape_intersection(void) { // TODO: rename if (zend_cpu_supports_sse42()) { return php_json_sse42_compute_escape_intersection_real; } @@ -534,6 +534,82 @@ static php_json_compute_escape_intersection_t resolve_json_escape_intersection(v } #endif +#ifdef JSON_USE_SIMD +typedef enum php_json_simd_result { + PHP_JSON_STOP, + PHP_JSON_SLOW, + PHP_JSON_NON_ASCII, +} php_json_simd_result; + +static zend_always_inline php_json_simd_result php_json_process_simd_block(smart_str *buf, const __m128i sse_escape_mask, const char **s, size_t *pos, size_t *len, int options) +{ + while (*len >= sizeof(__m128i)) { + const __m128i input = _mm_loadu_si128((const __m128i *) (*s + *pos)); + /* signed compare, so checks for unsigned bytes >= 0x80 as well */ + const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); + + int max_shift = sizeof(__m128i); + + int input_range_mask = _mm_movemask_epi8(input_range); + if (input_range_mask != 0) { + if (UNEXPECTED(input_range_mask & 1)) { + /* not worth it */ + return PHP_JSON_NON_ASCII; + } + max_shift = zend_ulong_ntz(input_range_mask); + } + +#ifdef ZEND_INTRIN_SSE4_2_NATIVE + int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); +#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) + int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); +#else + int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); +#endif + if (mask != 0) { + if (UNEXPECTED(max_shift < sizeof(__m128i))) { + int shift = zend_ulong_ntz(mask); /* first offending character */ + *pos += MIN(max_shift, shift); + *len -= MIN(max_shift, shift); + return PHP_JSON_SLOW; + } + + php_json_append(buf, *s, *pos); + *s += *pos; + const char *s_backup = *s; + + do { + /* Note that we shift the input forward, so we have to shift the mask as well, + * beyond the to-be-escaped character */ + int len = zend_ulong_ntz(mask); + mask >>= len + 1; + + php_json_append(buf, *s, len); + + unsigned char us = (unsigned char)(*s)[len]; + *s += len + 1; /* skip 'us' too */ + + bool handled = php_json_printable_ascii_escape(buf, us, options); + ZEND_ASSERT(handled == true); + } while (mask != 0); + + *pos = sizeof(__m128i) - (*s - s_backup); + } else { + if (UNEXPECTED(max_shift < sizeof(__m128i))) { + *pos += max_shift; + *len -= max_shift; + return PHP_JSON_SLOW; + } + *pos += sizeof(__m128i); + } + + *len -= sizeof(__m128i); + } + + return !*len ? PHP_JSON_STOP : PHP_JSON_SLOW; +} +#endif + zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ @@ -581,81 +657,17 @@ zend_result php_json_escape_string( 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; #ifdef JSON_USE_SIMD - while (len >= sizeof(__m128i)) { - const __m128i input = _mm_loadu_si128((__m128i *) (s + pos)); - /* signed compare, so checks for unsigned bytes >= 0x80 as well */ - const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32)); - - int max_shift = sizeof(__m128i); - - int input_range_mask = _mm_movemask_epi8(input_range); - if (input_range_mask != 0) { - if (UNEXPECTED(input_range_mask & 1)) { - /* not worth it */ - us = (unsigned char)s[pos]; - goto fallback; - } - max_shift = zend_ulong_ntz(input_range_mask); - } - -#ifdef ZEND_INTRIN_SSE4_2_NATIVE - int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); -#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) - int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); -#else - int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); -#endif - if (mask != 0) { - if (UNEXPECTED(max_shift < sizeof(__m128i))) { - int shift = zend_ulong_ntz(mask); /* first offending character */ - pos += MIN(max_shift, shift); - len -= MIN(max_shift, shift); - break; - } - - php_json_append(buf, s, pos); - s += pos; - const char *s_backup = s; - - do { - /* Note that we shift the input forward, so we have to shift the mask as well, - * beyond the to-be-escaped character */ - int len = zend_ulong_ntz(mask); - mask >>= len + 1; - - php_json_append(buf, s, len); - - us = (unsigned char)s[len]; - s += len + 1; /* skip 'us' too */ - - bool handled = php_json_printable_ascii_escape(buf, us, options); - ZEND_ASSERT(handled == true); - } while (mask != 0); - - pos = sizeof(__m128i) - (s - s_backup); - } else { - if (UNEXPECTED(max_shift < sizeof(__m128i))) { - pos += max_shift; - len -= max_shift; - break; - } - pos += sizeof(__m128i); - } - - len -= sizeof(__m128i); - } - - if (!len) { + php_json_simd_result result = php_json_process_simd_block(buf, sse_escape_mask, &s, &pos, &len, options); + if (UNEXPECTED(result == PHP_JSON_STOP)) { break; } #endif us = (unsigned char)s[pos]; - if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { + if (EXPECTED(result != PHP_JSON_NON_ASCII && !ZEND_BIT_TEST(charmap, us))) { pos++; len--; } else { -fallback:; if (UNEXPECTED(us >= 0x80)) { zend_result status; size_t pos_old = pos; From ff4ef5b2c2c9aec16ebbcac6b2f357aeac9e5486 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:03:49 +0100 Subject: [PATCH 33/46] cs --- ext/json/json_encoder.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 7f9e2d3012f10..76ff4364d6714 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -520,13 +520,13 @@ zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m #endif #ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO -static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("resolve_json_escape_intersection"))); +static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("php_json_resolve_escape_intersection"))); typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input); ZEND_NO_SANITIZE_ADDRESS ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ -static php_json_compute_escape_intersection_t resolve_json_escape_intersection(void) { // TODO: rename +static php_json_compute_escape_intersection_t php_json_resolve_escape_intersection(void) { if (zend_cpu_supports_sse42()) { return php_json_sse42_compute_escape_intersection_real; } @@ -541,7 +541,9 @@ typedef enum php_json_simd_result { PHP_JSON_NON_ASCII, } php_json_simd_result; -static zend_always_inline php_json_simd_result php_json_process_simd_block(smart_str *buf, const __m128i sse_escape_mask, const char **s, size_t *pos, size_t *len, int options) +static zend_always_inline php_json_simd_result php_json_process_simd_block( + smart_str *buf, const __m128i sse_escape_mask, const char **s, size_t *pos, size_t *len, int options +) { while (*len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((const __m128i *) (*s + *pos)); From 57efb3a9b832c925d9730b66bfb311837bbde3c5 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:05:32 +0100 Subject: [PATCH 34/46] fix mask on sse2 builds --- ext/json/json_encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 76ff4364d6714..8593a311b0b9b 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -651,6 +651,8 @@ zend_result php_json_escape_string( #if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) const __m128i sse_escape_mask = php_json_create_sse_escape_mask(options); +#elif defined(JSON_USE_SIMD) + const __m128i sse_escape_mask = _mm_setzero_si128(); #endif do { From df0117e4af305b94102d2fde371cc52922a1935b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:26:06 +0100 Subject: [PATCH 35/46] test --- ext/json/json_encoder.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 8593a311b0b9b..0cd76afb4f278 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -379,7 +379,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, /* }}} */ /* Specialization of smart_str_appendl() to avoid performance loss due to code bloat */ -static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len) +static void php_json_append(smart_str *dest, const char *src, size_t len) { /* smart_str has a minimum size of the input length, * this avoids generating initial allocation code */ @@ -542,7 +542,12 @@ typedef enum php_json_simd_result { } php_json_simd_result; static zend_always_inline php_json_simd_result php_json_process_simd_block( - smart_str *buf, const __m128i sse_escape_mask, const char **s, size_t *pos, size_t *len, int options + smart_str *buf, + const __m128i sse_escape_mask, + const char **s, + size_t *restrict pos, + size_t *restrict len, + int options ) { while (*len >= sizeof(__m128i)) { @@ -584,20 +589,24 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( /* Note that we shift the input forward, so we have to shift the mask as well, * beyond the to-be-escaped character */ int len = zend_ulong_ntz(mask); - mask >>= len + 1; + mask >>= len; php_json_append(buf, *s, len); - unsigned char us = (unsigned char)(*s)[len]; - *s += len + 1; /* skip 'us' too */ + *s += len; /* skip 'us' too */ - bool handled = php_json_printable_ascii_escape(buf, us, options); - ZEND_ASSERT(handled == true); + /* Mitigate long run performance */ + do { + unsigned char us = (unsigned char)(*s)[0]; + (*s)++; + bool handled = php_json_printable_ascii_escape(buf, us, options); + ZEND_ASSERT(handled == true); + } while ((mask >>= 1) & 1); } while (mask != 0); *pos = sizeof(__m128i) - (*s - s_backup); } else { - if (UNEXPECTED(max_shift < sizeof(__m128i))) { + if (/*UNEXPECTED*/(max_shift < sizeof(__m128i))) { *pos += max_shift; *len -= max_shift; return PHP_JSON_SLOW; @@ -608,7 +617,7 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( *len -= sizeof(__m128i); } - return !*len ? PHP_JSON_STOP : PHP_JSON_SLOW; + return UNEXPECTED(!*len) ? PHP_JSON_STOP : PHP_JSON_SLOW; } #endif @@ -660,8 +669,9 @@ zend_result php_json_escape_string( 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; + php_json_simd_result result = PHP_JSON_SLOW; #ifdef JSON_USE_SIMD - php_json_simd_result result = php_json_process_simd_block(buf, sse_escape_mask, &s, &pos, &len, options); + result = php_json_process_simd_block(buf, sse_escape_mask, &s, &pos, &len, options); if (UNEXPECTED(result == PHP_JSON_STOP)) { break; } @@ -721,6 +731,8 @@ zend_result php_json_escape_string( s += pos; pos = 0; + ZEND_ASSERT(buf->s); + /* From http://en.wikipedia.org/wiki/UTF16 */ if (us >= 0x10000) { unsigned int next_us; From 246b413c2ea8971575ef6fe0c3f44af7304ee947 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:36:59 +0100 Subject: [PATCH 36/46] tweaks --- ext/json/json_encoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 0cd76afb4f278..e9e0a6a9aa645 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -678,7 +678,7 @@ zend_result php_json_escape_string( #endif us = (unsigned char)s[pos]; - if (EXPECTED(result != PHP_JSON_NON_ASCII && !ZEND_BIT_TEST(charmap, us))) { + if (result != PHP_JSON_NON_ASCII && EXPECTED(!ZEND_BIT_TEST(charmap, us))) { pos++; len--; } else { @@ -695,9 +695,9 @@ zend_result php_json_escape_string( if (UNEXPECTED(!us)) { if (pos_old && (options & (PHP_JSON_INVALID_UTF8_IGNORE|PHP_JSON_INVALID_UTF8_SUBSTITUTE))) { php_json_append(buf, s, pos_old); - s += pos; - pos = 0; } + s += pos; + pos = 0; if (options & PHP_JSON_INVALID_UTF8_IGNORE) { /* ignore invalid UTF8 character */ From 847497f842ebcd05ad56d12eca04e95acbe4c534 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:44:51 +0100 Subject: [PATCH 37/46] tweak --- ext/json/json_encoder.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index e9e0a6a9aa645..75c4f6f8e0aa4 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -379,7 +379,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, /* }}} */ /* Specialization of smart_str_appendl() to avoid performance loss due to code bloat */ -static void php_json_append(smart_str *dest, const char *src, size_t len) +static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len) { /* smart_str has a minimum size of the input length, * this avoids generating initial allocation code */ @@ -688,8 +688,8 @@ zend_result php_json_escape_string( const char *cur = s + pos; pos = 0; us = php_next_utf8_char((unsigned char *)cur, len, &pos, &status); + len -= pos; pos += pos_old; - len -= pos - pos_old; /* check whether UTF8 character is correct */ if (UNEXPECTED(!us)) { @@ -807,6 +807,7 @@ zend_result php_json_escape_string( php_json_append(buf, s, pos); } + ZEND_ASSERT(buf->s); smart_str_appendc(buf, '"'); return SUCCESS; From 901a95743e72f8eee375abc708478a92e39ca6c1 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 22:05:33 +0100 Subject: [PATCH 38/46] flag --- ext/json/json_encoder.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 75c4f6f8e0aa4..8d854d8977643 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -587,8 +587,19 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( do { /* Note that we shift the input forward, so we have to shift the mask as well, - * beyond the to-be-escaped character */ + * beyond the to-be-escaped character */ int len = zend_ulong_ntz(mask); +#if 1 + mask >>= len + 1; + + php_json_append(buf, *s, len); + + unsigned char us = (unsigned char)(*s)[len]; + *s += len + 1; /* skip 'us' too */ + + bool handled = php_json_printable_ascii_escape(buf, us, options); + ZEND_ASSERT(handled == true); +#else mask >>= len; php_json_append(buf, *s, len); @@ -602,6 +613,7 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( bool handled = php_json_printable_ascii_escape(buf, us, options); ZEND_ASSERT(handled == true); } while ((mask >>= 1) & 1); +#endif } while (mask != 0); *pos = sizeof(__m128i) - (*s - s_backup); From 8947f096ee8b8bfef5dfa6c624a8c0715629d027 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 22:24:48 +0100 Subject: [PATCH 39/46] tighter code layout --- ext/json/json_encoder.c | 50 +++++++++++++---------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 8d854d8977643..fee654942d7cf 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -519,7 +519,7 @@ zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m } #endif -#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO +#if defined(JSON_USE_SIMD) && defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("php_json_resolve_escape_intersection"))); typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input); @@ -534,13 +534,13 @@ static php_json_compute_escape_intersection_t php_json_resolve_escape_intersecti } #endif -#ifdef JSON_USE_SIMD typedef enum php_json_simd_result { PHP_JSON_STOP, PHP_JSON_SLOW, PHP_JSON_NON_ASCII, } php_json_simd_result; +#ifdef JSON_USE_SIMD static zend_always_inline php_json_simd_result php_json_process_simd_block( smart_str *buf, const __m128i sse_escape_mask, @@ -585,40 +585,18 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( *s += *pos; const char *s_backup = *s; - do { - /* Note that we shift the input forward, so we have to shift the mask as well, - * beyond the to-be-escaped character */ - int len = zend_ulong_ntz(mask); -#if 1 - mask >>= len + 1; - - php_json_append(buf, *s, len); - - unsigned char us = (unsigned char)(*s)[len]; - *s += len + 1; /* skip 'us' too */ - - bool handled = php_json_printable_ascii_escape(buf, us, options); - ZEND_ASSERT(handled == true); -#else - mask >>= len; - - php_json_append(buf, *s, len); - - *s += len; /* skip 'us' too */ - - /* Mitigate long run performance */ - do { - unsigned char us = (unsigned char)(*s)[0]; - (*s)++; - bool handled = php_json_printable_ascii_escape(buf, us, options); - ZEND_ASSERT(handled == true); - } while ((mask >>= 1) & 1); -#endif - } while (mask != 0); + for (; mask; mask >>= 1, *s += 1) { + if (UNEXPECTED(mask & 1)) { + bool handled = php_json_printable_ascii_escape(buf, (*s)[0], options); + ZEND_ASSERT(handled); + } else { + smart_str_appendc(buf, (*s)[0]); + } + } *pos = sizeof(__m128i) - (*s - s_backup); } else { - if (/*UNEXPECTED*/(max_shift < sizeof(__m128i))) { + if (max_shift < sizeof(__m128i)) { *pos += max_shift; *len -= max_shift; return PHP_JSON_SLOW; @@ -670,10 +648,12 @@ zend_result php_json_escape_string( pos = 0; -#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) +#ifdef JSON_USE_SIMD +# if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) const __m128i sse_escape_mask = php_json_create_sse_escape_mask(options); -#elif defined(JSON_USE_SIMD) +# else const __m128i sse_escape_mask = _mm_setzero_si128(); +# endif #endif do { From 4c41ad397594c92ddde30933f06cfde5debbbfe0 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 22:27:45 +0100 Subject: [PATCH 40/46] Remove check --- ext/json/json_encoder.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index fee654942d7cf..f6d3664f3c9b8 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -795,9 +795,7 @@ zend_result php_json_escape_string( } } while (len); - if (pos) { - php_json_append(buf, s, pos); - } + php_json_append(buf, s, pos); ZEND_ASSERT(buf->s); smart_str_appendc(buf, '"'); From 40cd08f00c281b5dd50ae3201d6f0f9976cfcb2c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 22:37:09 +0100 Subject: [PATCH 41/46] Tweak --- ext/json/json_encoder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index f6d3664f3c9b8..9395aa6009ac4 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -585,11 +585,14 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( *s += *pos; const char *s_backup = *s; + /* It's more important to keep this loop tight than to optimize this with + * a trailing zero count. */ for (; mask; mask >>= 1, *s += 1) { if (UNEXPECTED(mask & 1)) { bool handled = php_json_printable_ascii_escape(buf, (*s)[0], options); ZEND_ASSERT(handled); } else { + ZEND_ASSERT(buf->s); smart_str_appendc(buf, (*s)[0]); } } From dfb690df4322a96c4f330347ac9d2843d043672c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 23:15:36 +0100 Subject: [PATCH 42/46] Code layout and comment tweak --- ext/json/json_encoder.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 9395aa6009ac4..e4d86a8fff5b0 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -381,7 +381,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, /* Specialization of smart_str_appendl() to avoid performance loss due to code bloat */ static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len) { - /* smart_str has a minimum size of the input length, + /* dest has a minimum size of the input length, * this avoids generating initial allocation code */ ZEND_ASSERT(dest->s); @@ -720,9 +720,7 @@ zend_result php_json_escape_string( || us < 0x2028 || us > 0x2029)) { /* No need to emit any bytes, just move the cursor. */ } else { - if (pos_old) { - php_json_append(buf, s, pos_old); - } + php_json_append(buf, s, pos_old); s += pos; pos = 0; From bd6e462fbefe20a198cebe473fc5d031fe86c500 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 6 Feb 2025 23:51:39 +0100 Subject: [PATCH 43/46] test with indirect function ptr --- ext/json/json_encoder.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index e4d86a8fff5b0..2f9acf7a96884 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -539,7 +539,7 @@ typedef enum php_json_simd_result { PHP_JSON_SLOW, PHP_JSON_NON_ASCII, } php_json_simd_result; - +php_json_compute_escape_intersection_t foo = php_json_sse42_compute_escape_intersection_real; #ifdef JSON_USE_SIMD static zend_always_inline php_json_simd_result php_json_process_simd_block( smart_str *buf, @@ -550,6 +550,7 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( int options ) { + const php_json_compute_escape_intersection_t dontchangeme = foo; while (*len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((const __m128i *) (*s + *pos)); /* signed compare, so checks for unsigned bytes >= 0x80 as well */ @@ -566,13 +567,14 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( max_shift = zend_ulong_ntz(input_range_mask); } -#ifdef ZEND_INTRIN_SSE4_2_NATIVE - int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); -#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) - int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); -#else - int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); -#endif +// #ifdef ZEND_INTRIN_SSE4_2_NATIVE + // int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); +// #elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) + // int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); +// #else + // int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); +// #endif + int mask = dontchangeme(sse_escape_mask, input); if (mask != 0) { if (UNEXPECTED(max_shift < sizeof(__m128i))) { int shift = zend_ulong_ntz(mask); /* first offending character */ From 8d5a38137e6552468db086a818bda4a7fb827297 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 7 Feb 2025 00:11:30 +0100 Subject: [PATCH 44/46] Revert "test with indirect function ptr" This reverts commit bd6e462fbefe20a198cebe473fc5d031fe86c500. --- ext/json/json_encoder.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 2f9acf7a96884..e4d86a8fff5b0 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -539,7 +539,7 @@ typedef enum php_json_simd_result { PHP_JSON_SLOW, PHP_JSON_NON_ASCII, } php_json_simd_result; -php_json_compute_escape_intersection_t foo = php_json_sse42_compute_escape_intersection_real; + #ifdef JSON_USE_SIMD static zend_always_inline php_json_simd_result php_json_process_simd_block( smart_str *buf, @@ -550,7 +550,6 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( int options ) { - const php_json_compute_escape_intersection_t dontchangeme = foo; while (*len >= sizeof(__m128i)) { const __m128i input = _mm_loadu_si128((const __m128i *) (*s + *pos)); /* signed compare, so checks for unsigned bytes >= 0x80 as well */ @@ -567,14 +566,13 @@ static zend_always_inline php_json_simd_result php_json_process_simd_block( max_shift = zend_ulong_ntz(input_range_mask); } -// #ifdef ZEND_INTRIN_SSE4_2_NATIVE - // int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); -// #elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) - // int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); -// #else - // int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); -// #endif - int mask = dontchangeme(sse_escape_mask, input); +#ifdef ZEND_INTRIN_SSE4_2_NATIVE + int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input); +#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO) + int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input); +#else + int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input); +#endif if (mask != 0) { if (UNEXPECTED(max_shift < sizeof(__m128i))) { int shift = zend_ulong_ntz(mask); /* first offending character */ From d4297de5da5c6da09339c37a4651009a944b4221 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 7 Feb 2025 00:21:21 +0100 Subject: [PATCH 45/46] code layout --- ext/json/json_encoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index e4d86a8fff5b0..ae6161ecf9464 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -673,7 +673,7 @@ zend_result php_json_escape_string( #endif us = (unsigned char)s[pos]; - if (result != PHP_JSON_NON_ASCII && EXPECTED(!ZEND_BIT_TEST(charmap, us))) { + if (EXPECTED(result != PHP_JSON_NON_ASCII && !ZEND_BIT_TEST(charmap, us))) { pos++; len--; } else { @@ -727,13 +727,13 @@ zend_result php_json_escape_string( ZEND_ASSERT(buf->s); /* From http://en.wikipedia.org/wiki/UTF16 */ + dst = smart_str_extend(buf, 6 + ((us >= 0x10000) ? 6 : 0)); if (us >= 0x10000) { unsigned int next_us; us -= 0x10000; next_us = (unsigned short)((us & 0x3ff) | 0xdc00); us = (unsigned short)((us >> 10) | 0xd800); - dst = smart_str_extend(buf, 6); dst[0] = '\\'; dst[1] = 'u'; dst[2] = digits[(us >> 12) & 0xf]; @@ -741,8 +741,8 @@ zend_result php_json_escape_string( dst[4] = digits[(us >> 4) & 0xf]; dst[5] = digits[us & 0xf]; us = next_us; + dst += 6; } - dst = smart_str_extend(buf, 6); dst[0] = '\\'; dst[1] = 'u'; dst[2] = digits[(us >> 12) & 0xf]; From bc48fb8457c7787d27c31a1132a5001121b07b26 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 7 Feb 2025 17:41:59 +0100 Subject: [PATCH 46/46] wip --- ext/json/json_encoder.c | 6 +- ext/standard/html.c | 152 ++++++++++++++++++++++++---------------- ext/standard/html.h | 1 + 3 files changed, 94 insertions(+), 65 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index ae6161ecf9464..2bd620313a01f 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -544,7 +544,7 @@ typedef enum php_json_simd_result { static zend_always_inline php_json_simd_result php_json_process_simd_block( smart_str *buf, const __m128i sse_escape_mask, - const char **s, + const char **restrict s, size_t *restrict pos, size_t *restrict len, int options @@ -666,6 +666,7 @@ zend_result php_json_escape_string( php_json_simd_result result = PHP_JSON_SLOW; #ifdef JSON_USE_SIMD +// TODO: html.c change (incl UNEXPECTED) & mss dit manueel terug inlinen? result = php_json_process_simd_block(buf, sse_escape_mask, &s, &pos, &len, options); if (UNEXPECTED(result == PHP_JSON_STOP)) { break; @@ -678,11 +679,10 @@ zend_result php_json_escape_string( len--; } else { if (UNEXPECTED(us >= 0x80)) { - zend_result status; size_t pos_old = pos; const char *cur = s + pos; pos = 0; - us = php_next_utf8_char((unsigned char *)cur, len, &pos, &status); + us = php_next_utf8_char_ex((unsigned char *)cur, us, len, &pos); len -= pos; pos += pos_old; diff --git a/ext/standard/html.c b/ext/standard/html.c index 0c6231d590d88..90c49c3691635 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -53,12 +53,16 @@ (all) = (all) && !CHARSET_PARTIAL_SUPPORT((charset)) && ((doctype) != ENT_HTML_DOC_XML1); \ } while (0) -#define MB_FAILURE(pos, advance) do { \ +#define MB_FAILURE_NO_STATUS(pos, advance) do { \ *cursor = pos + (advance); \ - *status = FAILURE; \ return 0; \ } while (0) +#define MB_FAILURE(pos, advance) do { \ + *status = FAILURE; \ + MB_FAILURE_NO_STATUS(pos, advance); \ +} while (0) + #define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) /* valid as single byte character or leading byte */ @@ -85,6 +89,85 @@ static char *get_default_charset(void) { } /* }}} */ +PHPAPI unsigned int php_next_utf8_char_ex( + const unsigned char *str, + unsigned char c, + size_t str_len, + size_t *cursor) +{ + size_t pos = *cursor; + unsigned int this_char = 0; + + /* We'll follow strategy 2. from section 3.6.1 of UTR #36: + * "In a reported illegal byte sequence, do not include any + * non-initial byte that encodes a valid character or is a leading + * byte for a valid sequence." */ + + ZEND_ASSERT(c >= 0x80); + + if (UNEXPECTED(c < 0xc2)) { + MB_FAILURE_NO_STATUS(pos, 1); + } else if (c < 0xe0) { + if (UNEXPECTED(!CHECK_LEN(pos, 2))) + MB_FAILURE_NO_STATUS(pos, 1); + + if (UNEXPECTED(!utf8_trail(str[pos + 1]))) { + MB_FAILURE_NO_STATUS(pos, utf8_lead(str[pos + 1]) ? 1 : 2); + } + this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); + if (UNEXPECTED(this_char < 0x80)) { /* non-shortest form */ + MB_FAILURE_NO_STATUS(pos, 2); + } + pos += 2; + } else if (c < 0xf0) { + size_t avail = str_len - pos; + + if (UNEXPECTED(avail < 3 || + !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]))) { + if (avail < 2 || utf8_lead(str[pos + 1])) + MB_FAILURE_NO_STATUS(pos, 1); + else if (avail < 3 || utf8_lead(str[pos + 2])) + MB_FAILURE_NO_STATUS(pos, 2); + else + MB_FAILURE_NO_STATUS(pos, 3); + } + + this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); + if (UNEXPECTED(this_char < 0x800)) { /* non-shortest form */ + MB_FAILURE_NO_STATUS(pos, 3); + } else if (UNEXPECTED(this_char >= 0xd800 && this_char <= 0xdfff)) { /* surrogate */ + MB_FAILURE_NO_STATUS(pos, 3); + } + pos += 3; + } else if (c < 0xf5) { + size_t avail = str_len - pos; + + if (UNEXPECTED(avail < 4 || + !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || + !utf8_trail(str[pos + 3]))) { + if (avail < 2 || utf8_lead(str[pos + 1])) + MB_FAILURE_NO_STATUS(pos, 1); + else if (avail < 3 || utf8_lead(str[pos + 2])) + MB_FAILURE_NO_STATUS(pos, 2); + else if (avail < 4 || utf8_lead(str[pos + 3])) + MB_FAILURE_NO_STATUS(pos, 3); + else + MB_FAILURE_NO_STATUS(pos, 4); + } + + this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); + if (UNEXPECTED(this_char < 0x10000 || this_char > 0x10FFFF)) { /* non-shortest form or outside range */ + MB_FAILURE_NO_STATUS(pos, 4); + } + pos += 4; + } else { + MB_FAILURE_NO_STATUS(pos, 1); + } + + *cursor = pos; + return this_char; +} + /* {{{ get_next_char */ static inline unsigned int get_next_char( enum entity_charset charset, @@ -105,72 +188,17 @@ static inline unsigned int get_next_char( switch (charset) { case cs_utf_8: { - /* We'll follow strategy 2. from section 3.6.1 of UTR #36: - * "In a reported illegal byte sequence, do not include any - * non-initial byte that encodes a valid character or is a leading - * byte for a valid sequence." */ unsigned char c; c = str[pos]; if (c < 0x80) { this_char = c; pos++; - } else if (c < 0xc2) { - MB_FAILURE(pos, 1); - } else if (c < 0xe0) { - if (!CHECK_LEN(pos, 2)) - MB_FAILURE(pos, 1); - - if (!utf8_trail(str[pos + 1])) { - MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); - } - this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); - if (this_char < 0x80) { /* non-shortest form */ - MB_FAILURE(pos, 2); - } - pos += 2; - } else if (c < 0xf0) { - size_t avail = str_len - pos; - - if (avail < 3 || - !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { - if (avail < 2 || utf8_lead(str[pos + 1])) - MB_FAILURE(pos, 1); - else if (avail < 3 || utf8_lead(str[pos + 2])) - MB_FAILURE(pos, 2); - else - MB_FAILURE(pos, 3); - } - - this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); - if (this_char < 0x800) { /* non-shortest form */ - MB_FAILURE(pos, 3); - } else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ - MB_FAILURE(pos, 3); - } - pos += 3; - } else if (c < 0xf5) { - size_t avail = str_len - pos; - - if (avail < 4 || - !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || - !utf8_trail(str[pos + 3])) { - if (avail < 2 || utf8_lead(str[pos + 1])) - MB_FAILURE(pos, 1); - else if (avail < 3 || utf8_lead(str[pos + 2])) - MB_FAILURE(pos, 2); - else if (avail < 4 || utf8_lead(str[pos + 3])) - MB_FAILURE(pos, 3); - else - MB_FAILURE(pos, 4); - } - - this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); - if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ - MB_FAILURE(pos, 4); - } - pos += 4; } else { - MB_FAILURE(pos, 1); + this_char = php_next_utf8_char_ex(str, c, str_len, cursor); + if (UNEXPECTED(this_char == 0)) { + *status = FAILURE; + } + return this_char; } } break; diff --git a/ext/standard/html.h b/ext/standard/html.h index 40c595ba5d89c..05cd726f82516 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -48,5 +48,6 @@ PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t ol PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, const char *hint_charset, bool double_encode, bool quiet); PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, const char *hint_charset); PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, zend_result *status); +PHPAPI unsigned int php_next_utf8_char_ex(const unsigned char *str, unsigned char c, size_t str_len, size_t *cursor); #endif /* HTML_H */