Skip to content

Commit

Permalink
Merge pull request #566 from xxyzz/de
Browse files Browse the repository at this point in the history
Translate the rest arguments of de edition's "K" template
  • Loading branch information
xxyzz authored Apr 1, 2024
2 parents 8944339 + 37261aa commit 2d30b31
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 6 deletions.
77 changes: 76 additions & 1 deletion src/wiktextract/extractor/de/tags.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from .models import WordEntry

# Sense tags
# https://de.wiktionary.org/wiki/Vorlage:K
# https://de.wiktionary.org/wiki/Vorlage:K/Abk
K_TEMPLATE_TAGS = {
"Abl.": "ablative",
"Ablativ": "ablative",
Expand Down Expand Up @@ -117,14 +119,87 @@
"ostfränkisch": "East Franconian German",
"pej.": "pejorative",
"poet.": "poetic",
"PräpmG": "genitive prepositional",
"PmG": "genitive prepositional",
"reg.": "regional",
"refl.": "reflexive",
"reflexiv": "reflexive",
# "respektive": "",
"sal.": "casual",
"scherzh.": "jocular",
"schriftspr.": "literary",
# "schülerspr.": "",
"schwäb.": "Swabian",
"schwäbisch": "Swabian",
"Schweiz": "Swiss Standard German",
"schweiz.": "Swiss Standard German",
"schweizerisch": "Swiss Standard German",
"Schweizerdeutsch": "Swiss German",
"schweizerdeutsch": "Swiss German",
# "seemannsspr.": "",
"sein": "auxiliary verb",
# "sehr": "", # very
# "seltener": "", # rare
# "seltener auch": "",
"soldatenspr.": ["military", "slang"],
# "sonderspr.": "",
# "sonst": "",
# "sowie": "",
"spätlat.": "Late Latin",
"spätlateinisch": "Late Latin",
# "später": "",
"speziell": "special",
"südd.": "South German",
"süddt.": "South German",
# "techn.": "",
# "teils": "",
# "teilweise": "",
"tlwva.": "outdated",
"tlwvatd.": "outdated",
"trans.": "transitive",
"transitiv": "transitive",
# "über": "",
# "überwiegend": "mostly",
"übertr.": "figurative",
"ugs.": "colloquial",
# "und": "",
"ungebr.": "uncommon",
"unpers.": "impersonal",
"unpersönlich": "impersonal",
# "ursprünglich": "",
"va.": "outdated",
"vatd.": "outdated",
# "verh.": "",
"volkst.": "popular",
# "von": "",
# "vor allem": "",
# "vor allem in": "",
"vul.": "vulgar",
"vulg.": "vulgar",
"vlat.": ["vulgar", "Latin"],
"vulgärlat": ["vulgar", "Latin"],
"vulgärlateinisch": ["vulgar", "Latin"],
"wien.": "Vienna",
"wienerisch": "Vienna",
# "Wpräp": "",
# "z. B.": "",
# "z. T.": "",
# "zijn": "",
# "zum Beispiel": "",
# "zum Teil": "",
# "zumeist": "",
}


def translate_raw_tags(data: WordEntry) -> None:
raw_tags = []
for raw_tag in data.raw_tags:
if raw_tag in K_TEMPLATE_TAGS:
data.tags.append(K_TEMPLATE_TAGS[raw_tag])
tag = K_TEMPLATE_TAGS[raw_tag]
if isinstance(tag, str):
data.tags.append(tag)
elif isinstance(tag, list):
data.tags.extend(tag)
else:
raw_tags.append(raw_tag)
data.raw_tags = raw_tags
12 changes: 7 additions & 5 deletions tests/test_de_gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ def test_k_template_multiple_tags(self):
"Verb transitiv (Deutsch)",
"Österreichisches Deutsch",
],
"tags": ["Austrian German"],
"raw_tags": ["trans.", "besonders", "bayrisch"],
"tags": ["transitive", "Austrian German"],
"raw_tags": ["besonders", "bayrisch"],
"glosses": ["Vieh auf der Alm halten"],
"senseid": "1",
},
Expand Down Expand Up @@ -174,17 +174,19 @@ def test_italic_sense_modifier(self):
[s.model_dump(exclude_defaults=True) for s in word_entry.senses],
[
{
"raw_tags": ["transitiv"],
"tags": ["transitive"],
"glosses": ["etwas oft haben, zu haben pflegen"],
"senseid": "1",
},
{
"raw_tags": ["transitiv", "Stadt/Dorf", "aktiv"],
"tags": ["transitive"],
"raw_tags": ["Stadt/Dorf", "aktiv"],
"glosses": ["bewohnen, wohnen"],
"senseid": "2.1",
},
{
"raw_tags": ["transitiv", "Stadt/Dorf", "passiv"],
"tags": ["transitive"],
"raw_tags": ["Stadt/Dorf", "passiv"],
"glosses": ["bewohnt werden, zum Wohnsitz dienen"],
"senseid": "2.2",
},
Expand Down

0 comments on commit 2d30b31

Please sign in to comment.