Skip to content

Commit

Permalink
Remove unused code for Chinese Wiktionary from English extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Nov 28, 2023
1 parent ba2492e commit 73c0aec
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 31 deletions.
7 changes: 0 additions & 7 deletions src/wiktextract/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ class WiktionaryConfig:
"POS_TYPES",
"OTHER_SUBTITLES",
"ZH_PRON_TAGS",
"FR_FORM_TABLES",
"DE_FORM_TABLES",
"FORM_OF_TEMPLATES",
"analyze_templates",
"extract_thesaurus_pages",
)
Expand Down Expand Up @@ -112,10 +109,6 @@ def __init__(
self.data_folder = files("wiktextract") / "data" / dump_file_lang_code
self.init_subtitles()
self.set_attr_from_json("ZH_PRON_TAGS", "zh_pron_tags.json")
if dump_file_lang_code == "zh":
self.set_attr_from_json(
"FORM_OF_TEMPLATES", "form_of_templates.json"
)
self.analyze_templates = True # find templates that need pre-expand
self.extract_thesaurus_pages = True
self.load_edition_settings()
Expand Down
24 changes: 0 additions & 24 deletions src/wiktextract/extractor/en/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,9 +1470,6 @@ def sense_template_fn(
v = v.strip()
if v and "<" not in v:
gloss_template_args.add(v)
if wxr.config.dump_file_lang_code == "zh":
add_form_of_tags(wxr, name,
wxr.config.FORM_OF_TEMPLATES, sense_base)
return None

def extract_link_texts(item):
Expand Down Expand Up @@ -3596,24 +3593,3 @@ def multitrans_post_fn(name, ht, text):
)
x["original_title"] = word
return ret


def add_form_of_tags(wxr, template_name, form_of_templates, sense_data):
# https://en.wiktionary.org/wiki/Category:Form-of_templates
if template_name in form_of_templates:
data_append(sense_data, "tags", "form-of")

if template_name in ("abbreviation of", "abbr of"):
data_append(sense_data, "tags", "abbreviation")
elif template_name.startswith(("alt ", "alternative")):
data_append(sense_data, "tags", "alt-of")
elif template_name.startswith(("female", "feminine")):
data_append(sense_data, "tags", "feminine")
elif template_name == "initialism of":
data_extend(sense_data, "tags", ["abbreviation", "initialism"])
elif template_name.startswith("masculine"):
data_append(sense_data, "tags", "masculine")
elif template_name.startswith("misspelling"):
data_append(sense_data, "tags", "misspelling")
elif template_name.startswith(("obsolete", "obs ")):
data_append(sense_data, "tags", "obsolete")

0 comments on commit 73c0aec

Please sign in to comment.