Skip to content

Commit

Permalink
Remove boilerplate from pydantic models and avoid append_base_data()
Browse files Browse the repository at this point in the history
This work is a contribution to the EWOK project, which receives funding from LABEX ASLAN (ANR–10–LABX–0081) at the Université de Lyon, as part of the "Investissements d'Avenir" program initiated and overseen by the Agence Nationale de la Recherche (ANR) in France.
  • Loading branch information
empiriker committed Nov 29, 2023
1 parent 2d10d32 commit cbc62fe
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 26 deletions.
32 changes: 16 additions & 16 deletions src/wiktextract/extractor/es/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ def debug(
class BaseModelWrap(BaseModel):
model_config = ConfigDict(validate_assignment=True)

def update(self, data: dict):
for k, v in data.items():
setattr(self, k, v)
return self

def get(self, key: str, _=None):
return getattr(self, key)

def __getitem__(self, item):
return getattr(self, item)

def __setitem__(self, item, value):
try:
setattr(self, item, value)
except ValidationError:
pass
# def update(self, data: dict):
# for k, v in data.items():
# setattr(self, k, v)
# return self

# def get(self, key: str, _=None):
# return getattr(self, key)

# def __getitem__(self, item):
# return getattr(self, item)

# def __setitem__(self, item, value):
# try:
# setattr(self, item, value)
# except ValidationError:
# pass


class LoggingExtraFieldsModel(BaseModelWrap):
Expand Down
17 changes: 7 additions & 10 deletions src/wiktextract/extractor/es/page.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import copy
import logging
from collections import defaultdict
from typing import Dict, List

from wikitextprocessor import NodeKind, WikiNode

from wiktextract.datautils import append_base_data
from wiktextract.extractor.es.gloss import extract_gloss
from wiktextract.extractor.es.models import PydanticLogger, WordEntry
from wiktextract.extractor.es.pronunciation import extract_pronunciation
Expand All @@ -32,7 +30,7 @@ def parse_section(
level_node: WikiNode,
) -> None:
# Page Structure: https://es.wiktionary.org/wiki/Wikcionario:Estructura
subtitle = clean_node(wxr, page_data[-1], level_node.largs)
subtitle = clean_node(wxr, base_data, level_node.largs)
wxr.wtp.start_subsection(subtitle)

pos_template_name = None
Expand Down Expand Up @@ -62,7 +60,8 @@ def process_pos_block(
pos_title: str,
):
pos_type = wxr.config.POS_SUBTITLES[pos_template_name]["pos"]
append_base_data(page_data, "pos", pos_type, base_data)
page_data.append(copy.deepcopy(base_data))
page_data[-1].pos = pos_type
page_data[-1].pos_title = pos_title
child_nodes = list(pos_level_node.filter_empty_str_child())

Expand Down Expand Up @@ -115,23 +114,21 @@ def parse_page(
# https://es.wiktionary.org/wiki/Plantilla:lengua
# https://es.wiktionary.org/wiki/Apéndice:Códigos_de_idioma
if subtitle_template.template_name == "lengua":
categories_and_links = defaultdict(list)
categories = {"categories": []}
lang_code = subtitle_template.template_parameters.get(1)
if (
wxr.config.capture_language_codes is not None
and lang_code not in wxr.config.capture_language_codes
):
continue

lang_name = clean_node(
wxr, categories_and_links, subtitle_template
)
lang_name = clean_node(wxr, categories, subtitle_template)
wxr.wtp.start_section(lang_name)
base_data = WordEntry(
lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
)
base_data.update(categories_and_links)
page_data.append(copy.deepcopy(base_data))
base_data.categories.extend(categories["categories"])
# page_data.append(copy.deepcopy(base_data))
for level3_node in level2_node.find_child(NodeKind.LEVEL3):
parse_section(wxr, page_data, base_data, level3_node)

Expand Down

0 comments on commit cbc62fe

Please sign in to comment.