Skip to content

Commit 54f87df

Browse files
jellymartinpitt
authored andcommitted
lib: rewrite html2po in Python
Extracting translatable strings from html pages is now easy enough without having to look at other attributes and this removes an dependency on htmlparser and node_modules.
1 parent eac8295 commit 54f87df

File tree

6 files changed

+108
-220
lines changed

6 files changed

+108
-220
lines changed

node_modules

Submodule node_modules updated 1209 files

package.json

-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
"eslint-plugin-react-hooks": "4.6.2",
4545
"gettext-parser": "8.0.0",
4646
"glob": "11.0.1",
47-
"htmlparser": "1.7.7",
4847
"jed": "1.1.1",
4948
"qunit": "2.24.1",
5049
"qunit-tap": "1.5.1",

pkg/lib/html2po

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/python3
2+
3+
# Extracts translatable strings from HTML files in the following forms:
4+
#
5+
# <tag translate>String</tag>
6+
# <tag translate context="value">String</tag>
7+
#
8+
# Note that some of the use of the translated may not support all the strings
9+
# depending on the code actually using these strings to translate the HTML.
10+
11+
12+
import argparse
13+
import collections.abc
14+
import json
15+
import pathlib
16+
from collections import defaultdict
17+
from html.parser import HTMLParser
18+
19+
PO_HEADER = r"""msgid ""
20+
msgstr ""
21+
"Project-Id-Version: PACKAGE_VERSION\n"
22+
"MIME-Version: 1.0\n"
23+
"Content-Type: text/plain; charset=UTF-8\n"
24+
"Content-Transfer-Encoding: 8bit\n"
25+
"X-Generator: Cockpit html2po\n"
26+
"""
27+
28+
29+
class MyHTMLParser(HTMLParser):
30+
current_attr: tuple[str | None, str] | None = None
31+
filename: str
32+
33+
def __init__(self, filename: str):
34+
self.strings = defaultdict[tuple[str | None, str], set[str]](set)
35+
self.filename = filename
36+
super().__init__()
37+
38+
def handle_starttag(self, tag, attrs):
39+
translatable = False
40+
context = None
41+
42+
# attrs => (key, value) where value needs to be split
43+
for (attr, value) in attrs:
44+
if attr == 'translate-context':
45+
context = value
46+
elif attr == 'context':
47+
context = value
48+
elif attr == 'translate' and (value is None or 'yes' in value.split(' ')):
49+
translatable = True
50+
51+
if not translatable:
52+
return
53+
54+
lineno = self.getpos()[0]
55+
filename = f'{self.filename}:{lineno}'
56+
self.current_attr = (context, filename)
57+
58+
def handle_data(self, data: str):
59+
if self.current_attr and data:
60+
context, filename = self.current_attr
61+
self.strings[context, data].add(filename)
62+
self.current_attr = None
63+
64+
65+
def main() -> None:
66+
parser = argparse.ArgumentParser(prog='html2po',
67+
description='Extracts translatable strings from HTML files')
68+
parser.add_argument('-d', '--directory', help='Base directory for input files')
69+
parser.add_argument('-o', '--output', help='Output file')
70+
parser.add_argument('files', nargs='+', help='One or more input files', type=pathlib.Path, metavar='FILE')
71+
72+
args = parser.parse_args()
73+
strings = defaultdict[tuple[str | None, str], set[str]](set)
74+
75+
files: collections.abc.Iterable[pathlib.Path] = args.files
76+
for file in files:
77+
# Qualify the filename if necessary
78+
full_path = args.directory / file if args.directory else file
79+
80+
htmlparser = MyHTMLParser(str(file))
81+
with open(full_path, 'r') as fp:
82+
htmlparser.feed(fp.read())
83+
for ctx_msgid, filename in htmlparser.strings.items():
84+
strings[ctx_msgid].update(filename)
85+
86+
with open(args.output, 'w') as fp:
87+
fp.write(PO_HEADER)
88+
for (context, msgid), filenames in strings.items():
89+
fp.write(f"\n#: {' '.join(filenames)}\n")
90+
# json.dumps() to escape any quotes in translation text or context
91+
if context:
92+
fp.write(f'msgctxt {json.dumps(context)}\n')
93+
94+
fp.write(f'msgid {json.dumps(msgid)}\n')
95+
fp.write('msgstr ""\n')
96+
97+
98+
if __name__ == "__main__":
99+
main()

pkg/lib/html2po.js

-217
This file was deleted.

po/Makefile.am

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ PO_INPUTS = $(addsuffix .po,$(PO_LINGUAS))
88
# Extract translate attribute, Glade style, angular-gettext HTML translations
99
po/cockpit.html.pot: $(srcdir)/package-lock.json
1010
$(AM_V_GEN) mkdir -p $(dir $@) && \
11-
$(srcdir)/pkg/lib/html2po.js -d $(srcdir) -o $@ \
11+
$(srcdir)/pkg/lib/html2po -d $(srcdir) -o $@ \
1212
$$(cd $(srcdir) && find pkg/ -name '*.html')
1313

1414
# Extract cockpit style javascript translations

tools/vulture_suppressions/html2po.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
class MyHTMLParser:
2+
...
3+
4+
5+
MyHTMLParser.handle_starttag # type: ignore[attr-defined]
6+
7+
MyHTMLParser.handle_data # type: ignore[attr-defined]

0 commit comments

Comments
 (0)