Skip to content

Commit

Permalink
Move en/zh_pron_tags.json to Python code
Browse files Browse the repository at this point in the history
Delete `WiktionaryConfig.ZH_PRON_TAGS`.
  • Loading branch information
xxyzz committed Feb 23, 2024
1 parent 71a3607 commit 7a55eb3
Show file tree
Hide file tree
Showing 4 changed files with 1,002 additions and 1,437 deletions.
44 changes: 12 additions & 32 deletions src/wiktextract/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import json
import sys
from typing import (
Callable,
Iterable,
Optional,
TypedDict,
Union,
)

from wikitextprocessor.core import ErrorMessageData, CollatedErrorReturnData
from wikitextprocessor.core import CollatedErrorReturnData, ErrorMessageData

if sys.version_info < (3, 10):
from importlib_resources import files
Expand Down Expand Up @@ -61,7 +59,6 @@ class WiktionaryConfig:
"debugs",
"redirects",
"data_folder",
"ZH_PRON_TAGS",
"analyze_templates",
"extract_thesaurus_pages",
"save_ns_names",
Expand All @@ -72,17 +69,17 @@ def __init__(
self,
dump_file_lang_code: str = "en",
capture_language_codes: Optional[Iterable[str]] = {"en", "mul"},
capture_translations = True,
capture_pronunciation = True,
capture_linkages = True,
capture_compounds = True,
capture_redirects = True,
capture_examples = True,
capture_etymologies = True,
capture_inflections = True,
capture_descendants = True,
verbose = False,
expand_tables = False,
capture_translations=True,
capture_pronunciation=True,
capture_linkages=True,
capture_compounds=True,
capture_redirects=True,
capture_examples=True,
capture_etymologies=True,
capture_inflections=True,
capture_descendants=True,
verbose=False,
expand_tables=False,
):
if capture_language_codes is not None:
assert isinstance(capture_language_codes, (list, tuple, set))
Expand Down Expand Up @@ -123,8 +120,6 @@ def __init__(
self.debugs: list[ErrorMessageData] = []
self.redirects: SoundFileRedirects = {}
self.data_folder = files("wiktextract") / "data" / dump_file_lang_code
self.ZH_PRON_TAGS: list[str]
self.set_attr_from_json("ZH_PRON_TAGS", "zh_pron_tags.json")
self.analyze_templates = True # find templates that need pre-expand
self.extract_thesaurus_pages = True
# these namespace pages will be copied from the XML dump file and
Expand Down Expand Up @@ -161,21 +156,6 @@ def merge_return(self, ret: CollatedErrorReturnData):
self.warnings.extend(ret.get("warnings", []))
self.debugs.extend(ret.get("debugs", []))

def set_attr_from_json(
self,
attr_name: str,
file_name: str,
convert_func: Optional[Callable] = None,
) -> None:
file_path = self.data_folder.joinpath(file_name)
json_value = {}
if file_path.exists():
with file_path.open(encoding="utf-8") as f:
json_value = json.load(f)
if convert_func:
json_value = convert_func(json_value)
setattr(self, attr_name, json_value)

def load_edition_settings(self) -> None:
file_path = self.data_folder / "config.json"
if file_path.exists():
Expand Down
Loading

0 comments on commit 7a55eb3

Please sign in to comment.