Skip to content

Commit

Permalink
Translate table header tags in fr edition "eo-conj" template
Browse files Browse the repository at this point in the history
This table has two rows of headers, clear the last row of headers at
the next row.
xxyzz committed Mar 5, 2024
1 parent bdc2b27 commit 4ddbd5b
Showing 3 changed files with 72 additions and 4 deletions.
9 changes: 6 additions & 3 deletions src/wiktextract/extractor/fr/inflection.py
Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@ def extract_inflection(
"nature", # de-adj
"genre", # es-accord-oa
"conjugaison présent indicatif", # avk-tab-conjug
"mode", # eo-conj
}
)
IGNORE_TABLE_HEADER_PREFIXES = (
@@ -119,6 +120,8 @@ def process_inflection_table(
and not table_data_cell_is_header(wxr, cell, page_data[-1].word)
for cell in table_row_nodes
)
if not current_row_has_data_cell:
column_headers.clear()
row_headers = []
new_rowspan_headers = []
for rowspan_text, rowspan_count in rowspan_headers:
@@ -203,7 +206,7 @@ def process_inflection_table(
if form_data.form == "":
form_data.form = table_cell_line
else:
form_data.form += " " + table_cell_line
form_data.form += "\n" + table_cell_line
for colspan_header in colspan_headers:
if (
column_cell_index >= colspan_header.index
@@ -224,9 +227,9 @@ def process_inflection_table(
if len(row_headers) > 0:
form_data.raw_tags.extend(row_headers)
if form_data.form != "":
for form in form_data.form.split(" ou "):
for form in form_data.form.splitlines():
new_form_data = form_data.model_copy(deep=True)
new_form_data.form = form
new_form_data.form = form.removeprefix("ou ")
translate_raw_tags(
new_form_data, table_template.template_name
)
16 changes: 15 additions & 1 deletion src/wiktextract/extractor/fr/tags.py
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@
"subjonctif": "subjunctive",
"conditionnel": "conditional",
"impératif": "imperative",
"volitif": "volitive",
}

VERB_FORM_TAGS: dict[str, Union[str, list[str]]] = {
@@ -152,9 +153,21 @@
"rare": "rare",
"plus rare": "rare",
"familier": "colloquial",
"par extension": "broadly",
}

GRAMMATICAL_TAGS: dict[str, str] = {
# https://en.wikipedia.org/wiki/Voice_(grammar)
VOICE_TAGS: dict[str, Union[str, list[str]]] = {
# https://fr.wiktionary.org/wiki/Modèle:eo-conj
"participe actif": ["participle", "active"],
"participe passif": ["participle", "passive"],
"adverbe actif": ["adverb", "active"],
"adverbe passif": ["adverb", "passive"],
"substantif actif": ["subsuntive", "active"],
"substantif passif": ["subsuntive", "passive"],
}

GRAMMATICAL_TAGS: dict[str, Union[str, list[str]]] = {
**GENDER_TAGS,
**NUMBER_TAGS,
**MOOD_TAGS,
@@ -169,6 +182,7 @@
**JA_TAGS,
**OTHER_GRAMMATICAL_TAGS,
**SENSE_TAGS,
**VOICE_TAGS,
}


51 changes: 51 additions & 0 deletions tests/test_fr_inflection.py
Original file line number Diff line number Diff line change
@@ -617,3 +617,54 @@ def tes_avk_tab_conjug(self):
},
],
)

def test_eo_conj(self):
page_data = [
WordEntry(word="abdikanta", lang_code="eo", lang="Espéranto")
]
self.wxr.wtp.start_page("abdikanta")
root = self.wxr.wtp.parse("{{eo-conj|abdik|adp=1|sub=mf|subp=}}")
self.wxr.wtp.add_page(
"Modèle:eo-conj",
10,
"""{| class="flextable"
|-
! Temps
! Passé
! Présent
! Futur
|-
!Substantif<br />actif
| [[abdikinto#eo|abdikinto(j,n)]]<br>[[abdikintino#eo|abdikintino(j,n)]]
|-
! Mode
! Conditionnel
! Volitif
! Infinitif
|-
! Présent
| [[abdikus#eo|abdikus]] || [[abdiku#eo|abdiku]]
|}""",
)
extract_inflection(self.wxr, page_data, root.children[0])
self.assertEqual(
[d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
[
{
"form": "abdikinto(j,n)",
"tags": ["past", "subsuntive", "active"],
},
{
"form": "abdikintino(j,n)",
"tags": ["past", "subsuntive", "active"],
},
{
"form": "abdikus",
"tags": ["conditional", "present"],
},
{
"form": "abdiku",
"tags": ["volitive", "present"],
},
],
)

0 comments on commit 4ddbd5b

Please sign in to comment.