Skip to content

Commit

Permalink
[ci skip] Add missing patch files
Browse files Browse the repository at this point in the history
  • Loading branch information
nielsdos committed Dec 3, 2023
1 parent 7d5b754 commit 775fb31
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
From 24b52ec63eb55adb1c039e58dd3e1156f01083b2 Mon Sep 17 00:00:00 2001
From: Niels Dossche <[email protected]>
Date: Wed, 29 Nov 2023 21:26:47 +0100
Subject: [PATCH 1/2] Remove unused upper case tag static data

---
source/lexbor/tag/res.h | 2 ++
source/lexbor/tag/tag.c | 2 ++
2 files changed, 4 insertions(+)

diff --git a/source/lexbor/tag/res.h b/source/lexbor/tag/res.h
index c7190c5..4ad1f37 100644
--- a/source/lexbor/tag/res.h
+++ b/source/lexbor/tag/res.h
@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] =
{{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
};

+#if 0
static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] =
{
{{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true},
@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY]
{{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true},
{{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
};
+#endif

static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] =
{
diff --git a/source/lexbor/tag/tag.c b/source/lexbor/tag/tag.c
index f8fcdf0..0571957 100755
--- a/source/lexbor/tag/tag.c
+++ b/source/lexbor/tag/tag.c
@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len)
lexbor_hash_search_lower, name, len);
}

+#if 0
const lxb_tag_data_t *
lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
const lxb_char_t *name, size_t len)
@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
return (const lxb_tag_data_t *) lexbor_hash_search(hash,
lexbor_hash_search_upper, name, len);
}
+#endif

/*
* No inline functions for ABI.
--
2.43.0

Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001
From: Niels Dossche <[email protected]>
Date: Wed, 29 Nov 2023 21:29:31 +0100
Subject: [PATCH 2/2] Shrink size of static binary search tree

This also makes it more efficient on the data cache.
---
source/lexbor/core/sbst.h | 10 +++++-----
source/lexbor/html/tokenizer/state.c | 2 +-
utils/lexbor/html/tmp/tokenizer_res.h | 2 +-
utils/lexbor/html/tokenizer_entities_bst.py | 8 ++++----
utils/lexbor/lexbor/LXB.py | 2 +-
5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h
index b0fbc54..40e0e91 100755
--- a/source/lexbor/core/sbst.h
+++ b/source/lexbor/core/sbst.h
@@ -19,12 +19,12 @@ extern "C" {
typedef struct {
lxb_char_t key;

- void *value;
- size_t value_len;
+ lxb_char_t value[6];
+ unsigned char value_len;

- size_t left;
- size_t right;
- size_t next;
+ unsigned short left;
+ unsigned short right;
+ unsigned short next;
}
lexbor_sbst_entry_static_t;

diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c
index 70ca391..2f3414f 100755
--- a/source/lexbor/html/tokenizer/state.c
+++ b/source/lexbor/html/tokenizer/state.c
@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
goto done;
}

- if (entry->value != NULL) {
+ if (entry->value[0] != 0) {
tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
tkz->entity_match = entry;
}
diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h
index b3701d5..73ab66e 100755
--- a/utils/lexbor/html/tmp/tokenizer_res.h
+++ b/utils/lexbor/html/tmp/tokenizer_res.h
@@ -6,7 +6,7 @@

/*
* Caution!!! Important!!!
- * This file generated by the script
+ * This file is generated by the script
* "utils/lexbor/html/tokenizer_entities_bst.py"!
* Do not change this file!
*/
diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py
index ee7dcb4..7cd1335 100755
--- a/utils/lexbor/html/tokenizer_entities_bst.py
+++ b/utils/lexbor/html/tokenizer_entities_bst.py
@@ -1,6 +1,6 @@

import json
-import sys, re, os
+import sys, os

# Find and append run script run dir to module search path
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index):

def entities_bst_create(index):
bst = {}
- bst[0] = ["\0", 0, 0, 0, "NULL"]
+ bst[0] = ["\0", 0, 0, 0, "{0}"]

begin = 1
idx = end = entities_bst_create_tree(index, bst, begin)
@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx):
assert len(index[ split[0] ]['values']) < 2, 'Double values'

if len(index[ split[0] ]['values']) == 0:
- value = "NULL"
+ value = "{0}"
else:
value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters']))

@@ -210,5 +210,5 @@ def entities_bst_print(bst):

if __name__ == "__main__":
entities_bst("tmp/tokenizer_res.h",
- "../../../source/lexbor/html/tokenizer_res.h",
+ "../../../source/lexbor/html/tokenizer/res.h",
"data/entities.json");
diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py
index 3e75812..b068ea3 100755
--- a/utils/lexbor/lexbor/LXB.py
+++ b/utils/lexbor/lexbor/LXB.py
@@ -27,7 +27,7 @@ class Temp:

for line in fh:
for name in self.patterns:
- line = re.sub(name, '\n'.join(self.patterns[name]), line)
+ line = line.replace(name, '\n'.join(self.patterns[name]))
self.buffer.append(line)
fh.close()

--
2.43.0

0 comments on commit 775fb31

Please sign in to comment.