forked from kiwix/libkiwix
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
i18n data is kept in and generated from JSON files
Introduced a new resource compiler script kiwix-compile-i18n that processes i18n string data stored in JSON files and generates sorted C++ tables of string keys and values for all languages.
- Loading branch information
1 parent
d029c2b
commit 507e111
Showing
11 changed files
with
230 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
usr/share/man/man1/kiwix-compile-resources.1* | ||
usr/share/man/man1/kiwix-compile-i18n.1* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
#!/usr/bin/env python3 | ||
|
||
''' | ||
Copyright 2022 Veloman Yunkan <[email protected]> | ||
This program is free software; you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation; either version 3 of the License, or any | ||
later version. | ||
This program is distributed in the hope that it will be useful, but | ||
WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program; if not, write to the Free Software | ||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
02110-1301, USA. | ||
''' | ||
|
||
import argparse | ||
import os.path | ||
import re | ||
import json | ||
|
||
def to_identifier(name): | ||
ident = re.sub(r'[^0-9a-zA-Z]', '_', name) | ||
if ident[0].isnumeric(): | ||
return "_"+ident | ||
return ident | ||
|
||
def lang_code(filename): | ||
filename = os.path.basename(filename) | ||
lang = to_identifier(os.path.splitext(filename)[0]) | ||
print(filename, '->', lang) | ||
return lang | ||
|
||
from string import Template | ||
|
||
def expand_cxx_template(t, **kwargs): | ||
return Template(t).substitute(**kwargs) | ||
|
||
def cxx_string_literal(s): | ||
# Taking advantage of the fact the JSON string escape rules match | ||
# those of C++ | ||
return 'u8' + json.dumps(s) | ||
|
||
string_table_cxx_template = ''' | ||
const I18nString $TABLE_NAME[] = { | ||
$TABLE_ENTRIES | ||
}; | ||
''' | ||
|
||
lang_table_entry_cxx_template = ''' | ||
{ | ||
$LANG_STRING_LITERAL, | ||
ARRAY_ELEMENT_COUNT($STRING_TABLE_NAME), | ||
$STRING_TABLE_NAME | ||
}''' | ||
|
||
cxxfile_template = '''// This file is automatically generated. Do not modify it. | ||
#include "server/i18n.h" | ||
namespace kiwix { | ||
namespace i18n { | ||
namespace | ||
{ | ||
$STRING_DATA | ||
} // unnamed namespace | ||
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0])) | ||
extern const I18nStringTable stringTables[] = { | ||
$LANG_TABLE | ||
}; | ||
extern const size_t langCount = $LANG_COUNT; | ||
} // namespace i18n | ||
} // namespace kiwix | ||
''' | ||
|
||
class Resource: | ||
def __init__(self, base_dirs, filename): | ||
filename = filename.strip() | ||
self.filename = filename | ||
self.lang_code = lang_code(filename) | ||
found = False | ||
for base_dir in base_dirs: | ||
try: | ||
with open(os.path.join(base_dir, filename), 'r') as f: | ||
self.data = f.read() | ||
found = True | ||
break | ||
except FileNotFoundError: | ||
continue | ||
if not found: | ||
raise Exception("Impossible to find {}".format(filename)) | ||
|
||
|
||
def get_string_table_name(self): | ||
return "string_table_for_" + self.lang_code | ||
|
||
def get_string_table(self): | ||
table_entries = ",\n ".join(self.get_string_table_entries()) | ||
return expand_cxx_template(string_table_cxx_template, | ||
TABLE_NAME=self.get_string_table_name(), | ||
TABLE_ENTRIES=table_entries) | ||
|
||
def get_string_table_entries(self): | ||
d = json.loads(self.data) | ||
for k in sorted(d.keys()): | ||
if k != "@metadata": | ||
key_string = cxx_string_literal(k) | ||
value_string = cxx_string_literal(d[k]) | ||
yield '{ ' + key_string + ', ' + value_string + ' }' | ||
|
||
def get_lang_table_entry(self): | ||
return expand_cxx_template(lang_table_entry_cxx_template, | ||
LANG_STRING_LITERAL=cxx_string_literal(self.lang_code), | ||
STRING_TABLE_NAME=self.get_string_table_name()) | ||
|
||
|
||
|
||
def gen_c_file(resources): | ||
string_data = [] | ||
lang_table = [] | ||
for r in resources: | ||
string_data.append(r.get_string_table()) | ||
lang_table.append(r.get_lang_table_entry()) | ||
|
||
return expand_cxx_template(cxxfile_template, | ||
STRING_DATA="\n".join(string_data), | ||
LANG_TABLE=",\n ".join(lang_table), | ||
LANG_COUNT=len(resources) | ||
) | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--cxxfile', | ||
required=True, | ||
help='The Cpp file name to generate') | ||
parser.add_argument('i18n_resource_file', | ||
help='The list of resources to compile.') | ||
args = parser.parse_args() | ||
|
||
base_dir = os.path.dirname(os.path.realpath(args.i18n_resource_file)) | ||
with open(args.i18n_resource_file, 'r') as f: | ||
resources = [Resource([base_dir], filename) | ||
for filename in f.readlines()] | ||
|
||
with open(args.cxxfile, 'w') as f: | ||
f.write(gen_c_file(resources)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands" | ||
.SH NAME | ||
kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data | ||
.SH SYNOPSIS | ||
\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_file\fR | ||
.SH DESCRIPTION | ||
.TP | ||
i18n_resource_file | ||
The list of i18n resources to compile. | ||
.TP | ||
\fB\-h\fR, \fB\-\-help\fR | ||
show a help message and exit | ||
.TP | ||
\fB\-\-cxxfile\fR CXXFILE | ||
The Cpp file name to generate | ||
.TP | ||
.SH AUTHOR | ||
Veloman Yunkan <[email protected]> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"@metadata": { | ||
"authors": [ | ||
] | ||
}, | ||
"name":"English", | ||
"suggest-full-text-search": "containing '{{{SEARCH_TERMS}}}'..." | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"@metadata": { | ||
"authors": [ | ||
"Veloman Yunkan" | ||
] | ||
}, | ||
"name": "Current language to which the string is being translated to.", | ||
"suggest-full-text-search": "Text appearing in the suggestion list that, when selected, runs a full text search instead of the title search" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
i18n/en.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters