Skip to content

Commit

Permalink
Merge pull request #497 from xxyzz/fr
Browse files Browse the repository at this point in the history
Process forms table expanded from "fr-verbe-flexion" template
  • Loading branch information
xxyzz authored Feb 8, 2024
2 parents 5c4c72d + fdaccad commit 0264748
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
26 changes: 24 additions & 2 deletions src/wiktextract/extractor/fr/inflection.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ class ColspanHeader:
span: int


def table_data_cell_is_header(
wxr: WiktextractContext, cell_node: WikiNode, page_title: str
) -> bool:
# first child is bold node
if cell_node.kind == NodeKind.TABLE_CELL:
for child in cell_node.filter_empty_str_child():
return (
isinstance(child, WikiNode)
and child.kind == NodeKind.BOLD
and clean_node(wxr, None, child) != page_title
)

return False


def process_inflection_table(
wxr: WiktextractContext,
page_data: list[WordEntry],
Expand Down Expand Up @@ -83,7 +98,9 @@ def process_inflection_table(
and "invisible" not in row_node_child.attrs.get("class", "")
]
current_row_has_data_cell = any(
isinstance(cell, WikiNode) and cell.kind == NodeKind.TABLE_CELL
isinstance(cell, WikiNode)
and cell.kind == NodeKind.TABLE_CELL
and not table_data_cell_is_header(wxr, cell, page_data[-1].word)
for cell in table_row_nodes
)
row_headers = []
Expand All @@ -98,7 +115,12 @@ def process_inflection_table(
for column_num, table_cell in enumerate(table_row_nodes):
form_data = Form()
if isinstance(table_cell, WikiNode):
if table_cell.kind == NodeKind.TABLE_HEADER_CELL:
if (
table_cell.kind == NodeKind.TABLE_HEADER_CELL
or table_data_cell_is_header(
wxr, table_cell, page_data[-1].word
)
):
if any(
table_cell.find_html(
"span",
Expand Down
32 changes: 32 additions & 0 deletions tests/test_fr_inflection.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,35 @@ def test_en_adj(self):
},
],
)

def test_fr_verbe_flexion(self):
page_data = [WordEntry(word="dièse", lang_code="fr", lang="Français")]
self.wxr.wtp.start_page("dièse")
root = self.wxr.wtp.parse("{{fr-verbe-flexion|diéser}}")
self.wxr.wtp.add_page(
"Modèle:fr-verbe-flexion",
10,
"""{|
!colspan="3"|Voir la conjugaison du verbe ''diéser''
|-
|rowspan="2" | '''Indicatif'''
|rowspan="2" | '''Présent'''
| je <nowiki />dièse
|-
| il/elle/on dièse
|}""",
)
extract_inflection(self.wxr, page_data, root.children[0])
self.assertEqual(
[d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
[
{
"form": "je dièse",
"tags": ["Indicatif", "Présent"],
},
{
"form": "il/elle/on dièse",
"tags": ["Indicatif", "Présent"],
},
],
)

0 comments on commit 0264748

Please sign in to comment.