Skip to content

Commit

Permalink
Uses langcodes for lang names
Browse files Browse the repository at this point in the history
  • Loading branch information
catusphan committed Dec 6, 2024
1 parent f8e60dc commit b4479b4
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 11 deletions.
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
7 changes: 5 additions & 2 deletions bin/dict_summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import argparse
import langcodes

DOWNLOAD_TAG = "v4.0" # Set the GitHub tag version that asscociates with the release

Expand Down Expand Up @@ -119,8 +120,10 @@ def generate_summary(dict_dir):
download_urls = get_downloadable_files(filebase, DOWNLOAD_TAG, dict_dir)

# Get full language names in Vietnamese
source_full_name = language_names.get(metadata['Source'], f"Unknown ({metadata['Source']})")
target_full_name = language_names.get(metadata['Target'], f"Unknown ({metadata['Target']})")
source_full_name = langcodes.Language.get(metadata['Source']).display_name('vi')
# language_names.get(metadata['Source'], f"Unknown ({metadata['Source']})")
target_full_name = langcodes.Language.get(metadata['Target']).display_name('vi')
#language_names.get(metadata['Target'], f"Unknown ({metadata['Target']})")

# Append the data to the list
data.append({
Expand Down
16 changes: 7 additions & 9 deletions bin/test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from multiprocessing import Pool
data = [
{"name": "Delphi"}, {"name": "Orion"}, {"name": "Asher"}, {"name": "Baccus"}
]
def fun(object):
print(object)
import langcodes

with Pool(4) as pool:
pool.map(fun,data)
#Prints: """
language_names = {
lang_code: langcodes.Language.get(lang_code).display_name('vi')
for lang_code in ['vi', 'en', 'fr', 'de', 'es', 'it', 'ja', 'ko', 'zh', 'ru']
}

print(language_names)
16 changes: 16 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[project]
name = "tudien"
version = "0.1.0"
description = "Tudien project "
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"beautifulsoup4>=4.11.1",
"iso-language-codes>=1.1.0",
"langcodes>=3.5.0",
"lxml>=4.9.1",
"marisa-trie>=0.7.7",
"pinyin>=0.4.0",
"pyglossary>=4.6.1",
"ruff>=0.8.2",
]
Loading

0 comments on commit b4479b4

Please sign in to comment.