diff --git a/src/wiktextract/extractor/fr/inflection.py b/src/wiktextract/extractor/fr/inflection.py
index 49da53852..36d01fc7b 100644
--- a/src/wiktextract/extractor/fr/inflection.py
+++ b/src/wiktextract/extractor/fr/inflection.py
@@ -36,6 +36,8 @@ def extract_inflection(
"nombre", # ca-accord-mixte2
"nature", # de-adj
"genre", # es-accord-oa
+ "conjugaison présent indicatif", # avk-tab-conjug
+ "mode", # eo-conj
}
)
IGNORE_TABLE_HEADER_PREFIXES = (
@@ -50,7 +52,7 @@ def extract_inflection(
}
)
IGNORE_TABLE_CELL_PREFIXES = (
- "voir conjugaison ", # en-conj
+ "voir conjugaison ", # en-conj, avk-conj
)
@@ -82,10 +84,10 @@ def table_data_cell_is_header(
def process_inflection_table(
wxr: WiktextractContext,
page_data: list[WordEntry],
- node: WikiNode,
+ table_template: TemplateNode,
) -> None:
expanded_node = wxr.wtp.parse(
- wxr.wtp.node_to_wikitext(node), expand_all=True
+ wxr.wtp.node_to_wikitext(table_template), expand_all=True
)
table_nodes = list(expanded_node.find_child(NodeKind.TABLE))
if len(table_nodes) == 0:
@@ -118,6 +120,8 @@ def process_inflection_table(
and not table_data_cell_is_header(wxr, cell, page_data[-1].word)
for cell in table_row_nodes
)
+ if not current_row_has_data_cell:
+ column_headers.clear()
row_headers = []
new_rowspan_headers = []
for rowspan_text, rowspan_count in rowspan_headers:
@@ -178,7 +182,8 @@ def process_inflection_table(
table_cell.attrs.get("colspan", 1)
)
else:
- row_headers.append(table_header_text)
+ if table_header_text not in row_headers:
+ row_headers.append(table_header_text)
if "rowspan" in table_cell.attrs:
rowspan_headers.append(
(
@@ -194,14 +199,14 @@ def process_inflection_table(
elif (
table_cell_line != page_data[-1].word
and table_cell_line not in IGNORE_TABLE_CELL
- and not table_cell_line.startswith(
+ and not table_cell_line.lower().startswith(
IGNORE_TABLE_CELL_PREFIXES
)
):
if form_data.form == "":
form_data.form = table_cell_line
else:
- form_data.form += " " + table_cell_line
+ form_data.form += "\n" + table_cell_line
for colspan_header in colspan_headers:
if (
column_cell_index >= colspan_header.index
@@ -222,10 +227,12 @@ def process_inflection_table(
if len(row_headers) > 0:
form_data.raw_tags.extend(row_headers)
if form_data.form != "":
- for form in form_data.form.split(" ou "):
+ for form in form_data.form.splitlines():
new_form_data = form_data.model_copy(deep=True)
- new_form_data.form = form
- translate_raw_tags(new_form_data)
+ new_form_data.form = form.removeprefix("ou ")
+ translate_raw_tags(
+ new_form_data, table_template.template_name
+ )
page_data[-1].forms.append(new_form_data)
colspan_text = table_cell.attrs.get("colspan", "1")
diff --git a/src/wiktextract/extractor/fr/tags.py b/src/wiktextract/extractor/fr/tags.py
index b38f91f88..12d77e74b 100644
--- a/src/wiktextract/extractor/fr/tags.py
+++ b/src/wiktextract/extractor/fr/tags.py
@@ -34,6 +34,7 @@
"subjonctif": "subjunctive",
"conditionnel": "conditional",
"impératif": "imperative",
+ "volitif": "volitive",
}
VERB_FORM_TAGS: dict[str, Union[str, list[str]]] = {
@@ -62,6 +63,7 @@
"présent": "present",
"passé": "past",
"passé simple": "past",
+ "futur": "future",
"futur simple": "future",
# https://en.wikipedia.org/wiki/Passé_composé
"passé composé": "past multiword-construction",
@@ -75,11 +77,19 @@
}
# https://en.wikipedia.org/wiki/Grammatical_person
-PERSON_TAGS: dict[str, str] = {
+PERSON_TAGS: dict[str, Union[str, list[str]]] = {
"1ᵉ personne": "first-person",
"1ʳᵉ personne": "first-person",
"2ᵉ personne": "second-person",
"3ᵉ personne": "third-person",
+ # Modèle:avk-conj
+ "1ʳᵉ du sing.": ["first-person", "singular"],
+ "2ᵉ du sing.": ["second-person", "singular"],
+ "3ᵉ du sing.": ["third-person", "singular"],
+ "1ʳᵉ du plur.": ["first-person", "plural"],
+ "2ᵉ du plur.": ["second-person", "plural"],
+ "3ᵉ du plur.": ["third-person", "plural"],
+ "4ᵉ du plur.": ["fourth-person", "plural"],
}
SEMANTICS_TAGS: dict[str, str] = {
@@ -143,9 +153,21 @@
"rare": "rare",
"plus rare": "rare",
"familier": "colloquial",
+ "par extension": "broadly",
}
-GRAMMATICAL_TAGS: dict[str, str] = {
+# https://en.wikipedia.org/wiki/Voice_(grammar)
+VOICE_TAGS: dict[str, Union[str, list[str]]] = {
+ # https://fr.wiktionary.org/wiki/Modèle:eo-conj
+ "participe actif": ["participle", "active"],
+ "participe passif": ["participle", "passive"],
+ "adverbe actif": ["adverb", "active"],
+ "adverbe passif": ["adverb", "passive"],
+ "substantif actif": ["subsuntive", "active"],
+ "substantif passif": ["subsuntive", "passive"],
+}
+
+GRAMMATICAL_TAGS: dict[str, Union[str, list[str]]] = {
**GENDER_TAGS,
**NUMBER_TAGS,
**MOOD_TAGS,
@@ -160,14 +182,19 @@
**JA_TAGS,
**OTHER_GRAMMATICAL_TAGS,
**SENSE_TAGS,
+ **VOICE_TAGS,
}
-def translate_raw_tags(data: WordEntry) -> WordEntry:
+def translate_raw_tags(
+ data: WordEntry,
+ table_template_name: str = "",
+ tag_dict: dict[str, str] = GRAMMATICAL_TAGS,
+) -> WordEntry:
raw_tags = []
for raw_tag in data.raw_tags:
- if raw_tag.lower() in GRAMMATICAL_TAGS:
- tr_tag = GRAMMATICAL_TAGS[raw_tag.lower()]
+ if raw_tag.lower() in tag_dict:
+ tr_tag = tag_dict[raw_tag.lower()]
if isinstance(tr_tag, str):
data.tags.append(tr_tag)
elif isinstance(tr_tag, list):
@@ -175,4 +202,19 @@ def translate_raw_tags(data: WordEntry) -> WordEntry:
else:
raw_tags.append(raw_tag)
data.raw_tags = raw_tags
+ if table_template_name != "":
+ return convert_table_headers(data, table_template_name)
+ return data
+
+
+def convert_table_headers(data: WordEntry, template_name: str) -> WordEntry:
+ if template_name == "avk-tab-conjug":
+ # https://fr.wiktionary.org/wiki/Modèle:avk-tab-conjug
+ tags = {
+ "1": "first-person",
+ "2": "second-person",
+ "3": "third-person",
+ "4": "fourth-person",
+ }
+ return translate_raw_tags(data, tag_dict=tags)
return data
diff --git a/tests/test_fr_inflection.py b/tests/test_fr_inflection.py
index aab356629..c6990726c 100644
--- a/tests/test_fr_inflection.py
+++ b/tests/test_fr_inflection.py
@@ -580,3 +580,91 @@ def test_br_nom(self):
},
],
)
+
+ def tes_avk_tab_conjug(self):
+ page_data = [WordEntry(word="aalar", lang_code="avk", lang="Kotava")]
+ self.wxr.wtp.start_page("aalar")
+ root = self.wxr.wtp.parse("{{avk-tab-conjug|aalá|aala}}")
+ self.wxr.wtp.add_page(
+ "Modèle:avk-tab-conjug",
+ 10,
+ """{| class="flextable"
+|-
+| class="titre" colspan="4" align="center" | '''Conjugaison Présent Indicatif'''
+|-
+! Personne
+! Singulier
+! Personne
+! Pluriel
+|-
+! 1
+| [[aalá]]
+! 1
+| [[aalat|aala'''t''']]
+|}""",
+ )
+ extract_inflection(self.wxr, page_data, root.children[0])
+ self.assertEqual(
+ [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
+ [
+ {
+ "form": "aalá",
+ "tags": ["singular", "first-person"],
+ },
+ {
+ "form": "aalat",
+ "tags": ["plural", "first-person"],
+ },
+ ],
+ )
+
+ def test_eo_conj(self):
+ page_data = [
+ WordEntry(word="abdikanta", lang_code="eo", lang="Espéranto")
+ ]
+ self.wxr.wtp.start_page("abdikanta")
+ root = self.wxr.wtp.parse("{{eo-conj|abdik|adp=1|sub=mf|subp=}}")
+ self.wxr.wtp.add_page(
+ "Modèle:eo-conj",
+ 10,
+ """{| class="flextable"
+|-
+! Temps
+! Passé
+! Présent
+! Futur
+|-
+!Substantif
actif
+| [[abdikinto#eo|abdikinto(j,n)]]
[[abdikintino#eo|abdikintino(j,n)]]
+|-
+! Mode
+! Conditionnel
+! Volitif
+! Infinitif
+|-
+! Présent
+| [[abdikus#eo|abdikus]] || [[abdiku#eo|abdiku]]
+|}""",
+ )
+ extract_inflection(self.wxr, page_data, root.children[0])
+ self.assertEqual(
+ [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
+ [
+ {
+ "form": "abdikinto(j,n)",
+ "tags": ["past", "subsuntive", "active"],
+ },
+ {
+ "form": "abdikintino(j,n)",
+ "tags": ["past", "subsuntive", "active"],
+ },
+ {
+ "form": "abdikus",
+ "tags": ["conditional", "present"],
+ },
+ {
+ "form": "abdiku",
+ "tags": ["volitive", "present"],
+ },
+ ],
+ )