Skip to content

Commit

Permalink
Merge pull request #889 from tatuylonen/invalid-tags
Browse files Browse the repository at this point in the history
[en] Split tag validation error into two
  • Loading branch information
xxyzz authored Oct 28, 2024
2 parents f61ff83 + 945ca9e commit 7675eda
Showing 1 changed file with 26 additions and 9 deletions.
35 changes: 26 additions & 9 deletions src/wiktextract/wiktionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,13 @@ def check_error(
lang: str | None,
pos: str | None,
msg: str,
called_from: str | None = None,
) -> None:
"""Formats and outputs an error message about data format checks."""
if called_from is None:
called_from = "wiktionary/179/20240425"
else:
called_from = "wiktionary/179/20240425" + called_from
msg += ": " + json.dumps(dt, sort_keys=True, ensure_ascii=False)
prefix = word or ""
if lang:
Expand All @@ -198,7 +203,7 @@ def check_error(
"title": word,
"section": lang,
"subsection": pos,
"called_from": "wiktionary/179/20240425",
"called_from": called_from,
"path": tuple(),
}
config.debugs.append(error_data)
Expand Down Expand Up @@ -248,14 +253,26 @@ def check_tags(
from .tags import uppercase_tags, valid_tags

if tag not in valid_tags and tag not in uppercase_tags:
check_error(
wxr,
dt,
word,
lang,
pos,
f"invalid tag {tag} not in valid_tags(or uppercase_tags)",
)
if len(tag) > 0 and tag[0].isupper():
check_error(
wxr,
dt,
word,
lang,
pos,
f"invalid uppercase tag {tag} not in or uppercase_tags",
called_from="uppercase_tags",
)
else:
check_error(
wxr,
dt,
word,
lang,
pos,
f"invalid tag {tag} not in valid_tags "
"or uppercase_tags",
)


def check_str_fields(
Expand Down

0 comments on commit 7675eda

Please sign in to comment.