forked from php/php-src
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
168 additions
and
0 deletions.
There are no files selected for viewing
53 changes: 53 additions & 0 deletions
53
ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
From 24b52ec63eb55adb1c039e58dd3e1156f01083b2 Mon Sep 17 00:00:00 2001 | ||
From: Niels Dossche <[email protected]> | ||
Date: Wed, 29 Nov 2023 21:26:47 +0100 | ||
Subject: [PATCH 1/2] Remove unused upper case tag static data | ||
|
||
--- | ||
source/lexbor/tag/res.h | 2 ++ | ||
source/lexbor/tag/tag.c | 2 ++ | ||
2 files changed, 4 insertions(+) | ||
|
||
diff --git a/source/lexbor/tag/res.h b/source/lexbor/tag/res.h | ||
index c7190c5..4ad1f37 100644 | ||
--- a/source/lexbor/tag/res.h | ||
+++ b/source/lexbor/tag/res.h | ||
@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] = | ||
{{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true} | ||
}; | ||
|
||
+#if 0 | ||
static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] = | ||
{ | ||
{{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true}, | ||
@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] | ||
{{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true}, | ||
{{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true} | ||
}; | ||
+#endif | ||
|
||
static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] = | ||
{ | ||
diff --git a/source/lexbor/tag/tag.c b/source/lexbor/tag/tag.c | ||
index f8fcdf0..0571957 100755 | ||
--- a/source/lexbor/tag/tag.c | ||
+++ b/source/lexbor/tag/tag.c | ||
@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len) | ||
lexbor_hash_search_lower, name, len); | ||
} | ||
|
||
+#if 0 | ||
const lxb_tag_data_t * | ||
lxb_tag_data_by_name_upper(lexbor_hash_t *hash, | ||
const lxb_char_t *name, size_t len) | ||
@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash, | ||
return (const lxb_tag_data_t *) lexbor_hash_search(hash, | ||
lexbor_hash_search_upper, name, len); | ||
} | ||
+#endif | ||
|
||
/* | ||
* No inline functions for ABI. | ||
-- | ||
2.43.0 | ||
|
115 changes: 115 additions & 0 deletions
115
ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001 | ||
From: Niels Dossche <[email protected]> | ||
Date: Wed, 29 Nov 2023 21:29:31 +0100 | ||
Subject: [PATCH 2/2] Shrink size of static binary search tree | ||
|
||
This also makes it more efficient on the data cache. | ||
--- | ||
source/lexbor/core/sbst.h | 10 +++++----- | ||
source/lexbor/html/tokenizer/state.c | 2 +- | ||
utils/lexbor/html/tmp/tokenizer_res.h | 2 +- | ||
utils/lexbor/html/tokenizer_entities_bst.py | 8 ++++---- | ||
utils/lexbor/lexbor/LXB.py | 2 +- | ||
5 files changed, 12 insertions(+), 12 deletions(-) | ||
|
||
diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h | ||
index b0fbc54..40e0e91 100755 | ||
--- a/source/lexbor/core/sbst.h | ||
+++ b/source/lexbor/core/sbst.h | ||
@@ -19,12 +19,12 @@ extern "C" { | ||
typedef struct { | ||
lxb_char_t key; | ||
|
||
- void *value; | ||
- size_t value_len; | ||
+ lxb_char_t value[6]; | ||
+ unsigned char value_len; | ||
|
||
- size_t left; | ||
- size_t right; | ||
- size_t next; | ||
+ unsigned short left; | ||
+ unsigned short right; | ||
+ unsigned short next; | ||
} | ||
lexbor_sbst_entry_static_t; | ||
|
||
diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c | ||
index 70ca391..2f3414f 100755 | ||
--- a/source/lexbor/html/tokenizer/state.c | ||
+++ b/source/lexbor/html/tokenizer/state.c | ||
@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz, | ||
goto done; | ||
} | ||
|
||
- if (entry->value != NULL) { | ||
+ if (entry->value[0] != 0) { | ||
tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start; | ||
tkz->entity_match = entry; | ||
} | ||
diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h | ||
index b3701d5..73ab66e 100755 | ||
--- a/utils/lexbor/html/tmp/tokenizer_res.h | ||
+++ b/utils/lexbor/html/tmp/tokenizer_res.h | ||
@@ -6,7 +6,7 @@ | ||
|
||
/* | ||
* Caution!!! Important!!! | ||
- * This file generated by the script | ||
+ * This file is generated by the script | ||
* "utils/lexbor/html/tokenizer_entities_bst.py"! | ||
* Do not change this file! | ||
*/ | ||
diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py | ||
index ee7dcb4..7cd1335 100755 | ||
--- a/utils/lexbor/html/tokenizer_entities_bst.py | ||
+++ b/utils/lexbor/html/tokenizer_entities_bst.py | ||
@@ -1,6 +1,6 @@ | ||
|
||
import json | ||
-import sys, re, os | ||
+import sys, os | ||
|
||
# Find and append run script run dir to module search path | ||
ABS_PATH = os.path.dirname(os.path.abspath(__file__)) | ||
@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index): | ||
|
||
def entities_bst_create(index): | ||
bst = {} | ||
- bst[0] = ["\0", 0, 0, 0, "NULL"] | ||
+ bst[0] = ["\0", 0, 0, 0, "{0}"] | ||
|
||
begin = 1 | ||
idx = end = entities_bst_create_tree(index, bst, begin) | ||
@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx): | ||
assert len(index[ split[0] ]['values']) < 2, 'Double values' | ||
|
||
if len(index[ split[0] ]['values']) == 0: | ||
- value = "NULL" | ||
+ value = "{0}" | ||
else: | ||
value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters'])) | ||
|
||
@@ -210,5 +210,5 @@ def entities_bst_print(bst): | ||
|
||
if __name__ == "__main__": | ||
entities_bst("tmp/tokenizer_res.h", | ||
- "../../../source/lexbor/html/tokenizer_res.h", | ||
+ "../../../source/lexbor/html/tokenizer/res.h", | ||
"data/entities.json"); | ||
diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py | ||
index 3e75812..b068ea3 100755 | ||
--- a/utils/lexbor/lexbor/LXB.py | ||
+++ b/utils/lexbor/lexbor/LXB.py | ||
@@ -27,7 +27,7 @@ class Temp: | ||
|
||
for line in fh: | ||
for name in self.patterns: | ||
- line = re.sub(name, '\n'.join(self.patterns[name]), line) | ||
+ line = line.replace(name, '\n'.join(self.patterns[name])) | ||
self.buffer.append(line) | ||
fh.close() | ||
|
||
-- | ||
2.43.0 | ||
|