Skip to content

Commit

Permalink
[nl] extract "-nlverb-reflex-" table template
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Nov 19, 2024
1 parent d6317fe commit fb4c28a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/wiktextract/extractor/nl/inflection.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def extract_vervoeging_page(
return
root = wxr.wtp.parse(page.body)
for t_node in root.find_child(NodeKind.TEMPLATE):
if t_node.template_name == "-nlverb-":
if t_node.template_name in ["-nlverb-", "-nlverb-reflex-"]:
extract_nlverb_template(wxr, word_entry, t_node)


Expand All @@ -113,13 +113,15 @@ class TableHeader:
"vervoeging van de bedrijvende vorm van": ["active"],
"onpersoonlijke lijdende vorm": ["impersonal", "passive"],
"lijdende vorm": ["passive"],
"vervoeging van het Nederlandse werkwoord": [],
}


def extract_nlverb_template(
wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
) -> None:
# https://nl.wiktionary.org/wiki/Sjabloon:-nlverb-
# Sjabloon:-nlverb-reflex-
expanded_node = wxr.wtp.parse(
wxr.wtp.node_to_wikitext(t_node), expand_all=True
)
Expand Down Expand Up @@ -154,6 +156,7 @@ def extract_nlverb_template(
col_headers.clear()
row_headers.clear()

small_tag = ""
is_row_first_node = True
for cell_node in row_node.find_child(
NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL
Expand Down Expand Up @@ -181,7 +184,10 @@ def extract_nlverb_template(
break
else:
if current_row_all_header:
if is_row_first_node:
if (
is_row_first_node
and t_node.template_name == "-nlverb-"
):
shared_raw_tags.append(cell_str)
else:
col_headers.append(
Expand All @@ -208,12 +214,22 @@ def extract_nlverb_template(
)
)
else:
has_small_tag = False
for small_node in cell_node.find_html("small"):
has_small_tag = True
if has_small_tag:
small_tag = cell_str
col_index += cell_colspan
continue
form = Form(
form=cell_str,
tags=shared_tags,
raw_tags=shared_raw_tags,
source=f"{wxr.wtp.title}/vervoeging",
)
if small_tag != "":
form.raw_tags.append(small_tag)
small_tag = ""
for row_header in row_headers:
if (
row_index >= row_header.row_index
Expand Down
8 changes: 8 additions & 0 deletions src/wiktextract/extractor/nl/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@
# Sjabloon:-denoun-
"datief": "dative",
"accusatief": "accusative",
# Sjabloon:-nlverb-reflex-
"tegenwoordige tijd": "present",
"verleden tijd": "past",
"toekomende tijd": "future",
"1": "first-person",
"2": "second-person",
"3": "third-person",
"voltooide tijd": "past",
}


Expand Down

0 comments on commit fb4c28a

Please sign in to comment.