diff --git a/src/wiktextract/extractor/de/tags.py b/src/wiktextract/extractor/de/tags.py index fa9ed520..2918500f 100644 --- a/src/wiktextract/extractor/de/tags.py +++ b/src/wiktextract/extractor/de/tags.py @@ -1,6 +1,8 @@ from .models import WordEntry +# Sense tags # https://de.wiktionary.org/wiki/Vorlage:K +# https://de.wiktionary.org/wiki/Vorlage:K/Abk K_TEMPLATE_TAGS = { "Abl.": "ablative", "Ablativ": "ablative", @@ -117,6 +119,75 @@ "ostfränkisch": "East Franconian German", "pej.": "pejorative", "poet.": "poetic", + "PräpmG": "genitive prepositional", + "PmG": "genitive prepositional", + "reg.": "regional", + "refl.": "reflexive", + "reflexiv": "reflexive", + # "respektive": "", + "sal.": "casual", + "scherzh.": "jocular", + "schriftspr.": "literary", + # "schülerspr.": "", + "schwäb.": "Swabian", + "schwäbisch": "Swabian", + "Schweiz": "Swiss Standard German", + "schweiz.": "Swiss Standard German", + "schweizerisch": "Swiss Standard German", + "Schweizerdeutsch": "Swiss German", + "schweizerdeutsch": "Swiss German", + # "seemannsspr.": "", + "sein": "auxiliary verb", + # "sehr": "", # very + # "seltener": "", # rare + # "seltener auch": "", + "soldatenspr.": ["military", "slang"], + # "sonderspr.": "", + # "sonst": "", + # "sowie": "", + "spätlat.": "Late Latin", + "spätlateinisch": "Late Latin", + # "später": "", + "speziell": "special", + "südd.": "South German", + "süddt.": "South German", + # "techn.": "", + # "teils": "", + # "teilweise": "", + "tlwva.": "outdated", + "tlwvatd.": "outdated", + "trans.": "transitive", + "transitiv": "transitive", + # "über": "", + # "überwiegend": "mostly", + "übertr.": "figurative", + "ugs.": "colloquial", + # "und": "", + "ungebr.": "uncommon", + "unpers.": "impersonal", + "unpersönlich": "impersonal", + # "ursprünglich": "", + "va.": "outdated", + "vatd.": "outdated", + # "verh.": "", + "volkst.": "popular", + # "von": "", + # "vor allem": "", + # "vor allem in": "", + "vul.": "vulgar", + "vulg.": "vulgar", + "vlat.": ["vulgar", "Latin"], + "vulgärlat": ["vulgar", "Latin"], + "vulgärlateinisch": ["vulgar", "Latin"], + "wien.": "Vienna", + "wienerisch": "Vienna", + # "Wpräp": "", + # "z. B.": "", + # "z. T.": "", + # "zijn": "", + # "zum Beispiel": "", + # "zum Teil": "", + # "zumeist": "", } @@ -124,7 +195,11 @@ def translate_raw_tags(data: WordEntry) -> None: raw_tags = [] for raw_tag in data.raw_tags: if raw_tag in K_TEMPLATE_TAGS: - data.tags.append(K_TEMPLATE_TAGS[raw_tag]) + tag = K_TEMPLATE_TAGS[raw_tag] + if isinstance(tag, str): + data.tags.append(tag) + elif isinstance(tag, list): + data.tags.extend(tag) else: raw_tags.append(raw_tag) data.raw_tags = raw_tags diff --git a/tests/test_de_gloss.py b/tests/test_de_gloss.py index 1fcbdf7b..7fa2afa3 100644 --- a/tests/test_de_gloss.py +++ b/tests/test_de_gloss.py @@ -142,8 +142,8 @@ def test_k_template_multiple_tags(self): "Verb transitiv (Deutsch)", "Österreichisches Deutsch", ], - "tags": ["Austrian German"], - "raw_tags": ["trans.", "besonders", "bayrisch"], + "tags": ["transitive", "Austrian German"], + "raw_tags": ["besonders", "bayrisch"], "glosses": ["Vieh auf der Alm halten"], "senseid": "1", }, @@ -174,17 +174,19 @@ def test_italic_sense_modifier(self): [s.model_dump(exclude_defaults=True) for s in word_entry.senses], [ { - "raw_tags": ["transitiv"], + "tags": ["transitive"], "glosses": ["etwas oft haben, zu haben pflegen"], "senseid": "1", }, { - "raw_tags": ["transitiv", "Stadt/Dorf", "aktiv"], + "tags": ["transitive"], + "raw_tags": ["Stadt/Dorf", "aktiv"], "glosses": ["bewohnen, wohnen"], "senseid": "2.1", }, { - "raw_tags": ["transitiv", "Stadt/Dorf", "passiv"], + "tags": ["transitive"], + "raw_tags": ["Stadt/Dorf", "passiv"], "glosses": ["bewohnt werden, zum Wohnsitz dienen"], "senseid": "2.2", },