Skip to content

Commit

Permalink
Update i18n readme section on editing.
Browse files Browse the repository at this point in the history
Add options to lang.py  -L for language select.  
Add experimental lang.py option -A for machine translation via argostranslate local downloaded models.
  • Loading branch information
itsTheFae committed Jan 5, 2025
1 parent 7224484 commit af381bd
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 12 deletions.
185 changes: 178 additions & 7 deletions i18n/lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,14 @@ def compile(self):

print("Compiling existing PO files to MO...")
for po_file in self.basedir.glob(self._po_file_pattern):
locale = po_file.parent.parent.name
if self.args.lang and self.args.lang != locale:
continue

mo_file = po_file.with_suffix(".mo")
po = polib.pofile(po_file)
po.save_as_mofile(mo_file)
locale = po_file.parent.parent.name

fname = po_file.name
ptl = self._colorize_percent(po.percent_translated())
print(f"Compiled: {C_BWHITE}{locale}{C_END} - {fname} - {ptl} translated")
Expand Down Expand Up @@ -280,6 +284,9 @@ def stats(self, save_json: bool = False, save_badges: bool = False):
pot_msgs = polib.pofile(self._msgs_diff_path)
for po_file in self.basedir.glob(self._po_file_pattern):
locale = po_file.parent.parent.name
if self.args.lang and self.args.lang != locale:
continue

locale_set.add(locale)

if locale != last_locale:
Expand Down Expand Up @@ -323,15 +330,15 @@ def stats(self, save_json: bool = False, save_badges: bool = False):
pct = completed / total * 100
print(f"\nOverall Completion: {pct:.1f}%\n")

if save_json:
if save_json and not self.args.lang:
data["MUSICBOT"] = {
"completion": pct,
"languages": ",".join(locale_set),
"completion": f"{pct:.1f}",
"languages": ", ".join(locale_set),
}
with open(self._json_stats_path, "w", encoding="utf-8") as fh:
json.dump(data, fh)

if save_badges:
if save_badges and not self.args.lang:
b_color = "red"
if pct > 60:
b_color = "yellow"
Expand Down Expand Up @@ -428,6 +435,152 @@ def update(self):
)
print("Done.")

def argostranslate(self):
"""
Use argostranslate to fetch languages and apply machine translations to
all untranslated strings in each supported language.
"""
self._check_polib()
import uuid

import polib

print("Starting Argos machine translation process...")

try:
from argostranslate import (
package as argospkg, # pylint: disable=import-error,useless-suppression
)
from argostranslate import (
translate as argostl, # pylint: disable=import-error,useless-suppression
)
except Exception: # pylint: disable=broad-exception-caught
print("Failed to import argostranslate. Please install it with pip.")
sys.exit(1)

try:
import marko
from marko.md_renderer import MarkdownRenderer
except Exception: # pylint: disable=broad-exception-caught
print("Failed to import marko. Please install it with pip.")
sys.exit(1)

# update argos package index.
print("Fetching available packages.")
argospkg.update_package_index()
available_packages = argospkg.get_available_packages()
installed_packages = argospkg.get_installed_packages()
stringsubs = re.compile(r"%(?:\([a-z0-9_]+\))?[a-z0-9\.]+")

# extract locales from existing language directories.
# then determine if we should install or update language packs for them.
excluded_tocodes = ["en", "xx"]
from_code = "en"
pofile_paths = []
for po_file in self.basedir.glob(self._po_file_pattern):
locale = po_file.parent.parent.name
if self.args.lang and self.args.lang != locale:
continue

pofile_paths.append(po_file)
to_code = locale.split("_", maxsplit=1)[0]

if to_code in excluded_tocodes:
print(f"Excluded target language: {to_code}")
continue

def fltr(pkg):
return (
pkg.from_code == from_code
and pkg.to_code == to_code # pylint: disable=cell-var-from-loop
)

installed_package = next(filter(fltr, installed_packages), None)

if installed_package is not None:
print(f"Updating language pack for: {to_code}")
installed_package.update()
else:
package_to_install = next(filter(fltr, available_packages), None)
if package_to_install is not None:
print(f"Installing language pack for: {to_code}")
argospkg.install_from_path(package_to_install.download())
else:
print(f"Language pack may not be available for: {to_code}")

# update installed packages list.
installed_packages = argospkg.get_installed_packages()

# Helper for Markdown AST traversal.
def marko_tl(elm, from_code, to_code):
# process text elements which can be translated.
if isinstance(elm, marko.inline.InlineElement) and isinstance(
elm.children, str
):
# print(f"MD_ELM: {type(elm)} :: {elm}")
elm_text = elm.children
subs_map = {}
# map percent-style placeholders to a machine-translation-friendly format.
for sub in stringsubs.finditer(elm_text):
subin = sub.group(0)
subout = str(uuid.uuid4().int >> 64)
subout = f"_{subout}_"
elm_text = elm_text.replace(subin, subout)
subs_map[subout] = subin
tsubout = argostl.translate(subout, from_code, to_code)
subs_map[tsubout] = subin
# translate the raw text for this node.
elm_ttext = argostl.translate(elm_text, from_code, to_code)
# fix placeholder substitutions.
for subin, subout in subs_map.items():
elm_ttext = elm_ttext.replace(subin, subout)
# print(f"REPLACE {subin} >>> {subout}")
# update the element node with translated text.
elm.children = elm_ttext

# process element children to search for translatable elements.
elif hasattr(elm, "children"):
for child in elm.children:
marko_tl(child, from_code, to_code)
return elm

# Assuming the above loop got all the language packs we need.
# We loop over the PO files again and translate only the missing strings.
for po_file in pofile_paths:
locale = po_file.parent.parent.name
if self.args.lang and self.args.lang != locale:
continue

to_code = locale.split("_", maxsplit=1)[0]
if to_code in excluded_tocodes:
print(f"Excluded target language: {to_code}")
continue

po = polib.pofile(po_file)
ut_entries = po.untranslated_entries()
num = len(ut_entries)
print(
f"Translating {num} strings from {from_code} to {to_code} in {po_file.name}"
)
for entry in ut_entries:
otext = entry.msgid

# print(f"Translated from {from_code} to {to_code}")
# print(f">>>Source:\n{entry.msgid}")
# print("--------")

# parse out the markdown formatting to make strings less complex.
mp = marko.Markdown(renderer=MarkdownRenderer)
md = mp.parse(otext)
md = marko_tl(md, from_code, to_code)
ttext = mp.render(md)

# print(f">>>Translation:\n{ttext}")
# print("=========================================================\n\n")
entry.msgstr = ttext
entry.flags.append("machine-translated")
po.save()


def main():
"""MusicBot i18n tool entry point."""
Expand All @@ -437,12 +590,20 @@ def main():
epilog=(
"For more help and support with this bot, join our discord:"
"\n https://discord.gg/bots\n\n"
"This software is provided under the MIT License."
"\n"
"This software is provided under the MIT License.\n"
"See the `LICENSE` text file for complete details."
),
)

ap.add_argument(
"-L",
dest="lang",
type=str,
help="Select a single language code to run tasks on, instead of all installed languages.",
default="",
metavar="LOCALE",
)

ap.add_argument(
"-c",
dest="do_compile",
Expand Down Expand Up @@ -506,6 +667,13 @@ def main():
help="Update existing POT files and then update existing PO files.",
)

ap.add_argument(
"-A",
dest="do_argostranslate",
action="store_true",
help="Update all missing translations with Argos-translate machine translations.",
)

_args = ap.parse_args()
_basedir = pathlib.Path(__file__).parent.resolve()

Expand Down Expand Up @@ -539,6 +707,9 @@ def main():
if _args.do_update:
langtool.update()

if _args.do_argostranslate:
langtool.argostranslate()

if _args.do_compile:
langtool.compile()

Expand Down
18 changes: 13 additions & 5 deletions i18n/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,14 @@ Of course, the `.pot` and `.po` files are just plain text. So you can edit them
To compile the `.mo` files, you generally have two options.
If you used Poedit for translations, you can also use it to compile the `.po` into a `.mo` file.

If you edited on Crowdin and downloaded the changes or are using another editor, MusicBot provides the `lang.py` script to enable compiling on any system.
If you edited using another editor, MusicBot provides an option in the `lang.py` script to enable compiling on any system.
Follow these steps to compile manually:

1. First, make sure you've downloaded the PO files into their respective language directories.
2. Make sure you have `polib` python package installed.
2. Double check the Language code in the PO file matches the code used in the language directory.
3. Make sure you have the `polib` python package installed.
You can use `pip install polib` or use your system's package manager to find and install the appropriate package.
3. Run the lang tool with `python3 lang.py -c` to compile all existing PO files.
4. Run the lang tool with `python3 lang.py -c` to compile all existing PO files.

MusicBot should now be able to use the new translations!

Expand Down Expand Up @@ -135,7 +136,10 @@ Some options require the `polib` python package in order to be used.
The script provides these command line flags:

- `-h` or `--help`
Shows the help message and exits.
Shows the help message and exits.

- `-L`
Select a single language code to operate on, instead of all installed.

- `-c`
Compile existing PO files into MO files.
Expand All @@ -157,8 +161,12 @@ The script provides these command line flags:
This requires the `polib` python package.

- `-s`
Show translation stats for existing PO files, such as completion and number of missing translations.
Show translation stats for existing PO files, such as completion and number of missing translations.

- `-u`
Extracts strings to POT files, then updates existing PO files with new strings.
This requires the `polib` python package.

- `-A`
Attempt to automatically translate all untranslated strings using machine translations.
This requires the `polib` as well as `argostranslate` and `marko` python packages.

0 comments on commit af381bd

Please sign in to comment.