diff --git a/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch b/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch new file mode 100644 index 0000000000000..529649a369c8b --- /dev/null +++ b/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch @@ -0,0 +1,53 @@ +From 24b52ec63eb55adb1c039e58dd3e1156f01083b2 Mon Sep 17 00:00:00 2001 +From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> +Date: Wed, 29 Nov 2023 21:26:47 +0100 +Subject: [PATCH 1/2] Remove unused upper case tag static data + +--- + source/lexbor/tag/res.h | 2 ++ + source/lexbor/tag/tag.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/source/lexbor/tag/res.h b/source/lexbor/tag/res.h +index c7190c5..4ad1f37 100644 +--- a/source/lexbor/tag/res.h ++++ b/source/lexbor/tag/res.h +@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] = + {{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true} + }; + ++#if 0 + static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] = + { + {{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true}, +@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] + {{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true}, + {{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true} + }; ++#endif + + static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] = + { +diff --git a/source/lexbor/tag/tag.c b/source/lexbor/tag/tag.c +index f8fcdf0..0571957 100755 +--- a/source/lexbor/tag/tag.c ++++ b/source/lexbor/tag/tag.c +@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len) + lexbor_hash_search_lower, name, len); + } + ++#if 0 + const lxb_tag_data_t * + lxb_tag_data_by_name_upper(lexbor_hash_t *hash, + const lxb_char_t *name, size_t len) +@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash, + return (const lxb_tag_data_t *) lexbor_hash_search(hash, + lexbor_hash_search_upper, name, len); + } ++#endif + + /* + * No inline functions for ABI. +-- +2.43.0 + diff --git a/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch b/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch new file mode 100644 index 0000000000000..3c0af9f7a721f --- /dev/null +++ b/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch @@ -0,0 +1,115 @@ +From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001 +From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> +Date: Wed, 29 Nov 2023 21:29:31 +0100 +Subject: [PATCH 2/2] Shrink size of static binary search tree + +This also makes it more efficient on the data cache. +--- + source/lexbor/core/sbst.h | 10 +++++----- + source/lexbor/html/tokenizer/state.c | 2 +- + utils/lexbor/html/tmp/tokenizer_res.h | 2 +- + utils/lexbor/html/tokenizer_entities_bst.py | 8 ++++---- + utils/lexbor/lexbor/LXB.py | 2 +- + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h +index b0fbc54..40e0e91 100755 +--- a/source/lexbor/core/sbst.h ++++ b/source/lexbor/core/sbst.h +@@ -19,12 +19,12 @@ extern "C" { + typedef struct { + lxb_char_t key; + +- void *value; +- size_t value_len; ++ lxb_char_t value[6]; ++ unsigned char value_len; + +- size_t left; +- size_t right; +- size_t next; ++ unsigned short left; ++ unsigned short right; ++ unsigned short next; + } + lexbor_sbst_entry_static_t; + +diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c +index 70ca391..2f3414f 100755 +--- a/source/lexbor/html/tokenizer/state.c ++++ b/source/lexbor/html/tokenizer/state.c +@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz, + goto done; + } + +- if (entry->value != NULL) { ++ if (entry->value[0] != 0) { + tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start; + tkz->entity_match = entry; + } +diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h +index b3701d5..73ab66e 100755 +--- a/utils/lexbor/html/tmp/tokenizer_res.h ++++ b/utils/lexbor/html/tmp/tokenizer_res.h +@@ -6,7 +6,7 @@ + + /* + * Caution!!! Important!!! +- * This file generated by the script ++ * This file is generated by the script + * "utils/lexbor/html/tokenizer_entities_bst.py"! + * Do not change this file! + */ +diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py +index ee7dcb4..7cd1335 100755 +--- a/utils/lexbor/html/tokenizer_entities_bst.py ++++ b/utils/lexbor/html/tokenizer_entities_bst.py +@@ -1,6 +1,6 @@ + + import json +-import sys, re, os ++import sys, os + + # Find and append run script run dir to module search path + ABS_PATH = os.path.dirname(os.path.abspath(__file__)) +@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index): + + def entities_bst_create(index): + bst = {} +- bst[0] = ["\0", 0, 0, 0, "NULL"] ++ bst[0] = ["\0", 0, 0, 0, "{0}"] + + begin = 1 + idx = end = entities_bst_create_tree(index, bst, begin) +@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx): + assert len(index[ split[0] ]['values']) < 2, 'Double values' + + if len(index[ split[0] ]['values']) == 0: +- value = "NULL" ++ value = "{0}" + else: + value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters'])) + +@@ -210,5 +210,5 @@ def entities_bst_print(bst): + + if __name__ == "__main__": + entities_bst("tmp/tokenizer_res.h", +- "../../../source/lexbor/html/tokenizer_res.h", ++ "../../../source/lexbor/html/tokenizer/res.h", + "data/entities.json"); +diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py +index 3e75812..b068ea3 100755 +--- a/utils/lexbor/lexbor/LXB.py ++++ b/utils/lexbor/lexbor/LXB.py +@@ -27,7 +27,7 @@ class Temp: + + for line in fh: + for name in self.patterns: +- line = re.sub(name, '\n'.join(self.patterns[name]), line) ++ line = line.replace(name, '\n'.join(self.patterns[name])) + self.buffer.append(line) + fh.close() + +-- +2.43.0 +