Skip to content

Commit

Permalink
Temp commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kristian-clausal committed Jan 4, 2024
1 parent 8b3fbbb commit 003aab5
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 14 deletions.
11 changes: 6 additions & 5 deletions src/wiktextract/extractor/en/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def parse_sense_linkage(
w = clean_node(wxr, data, w)
for alias in ns_title_prefix_tuple(wxr, "Thesaurus"):
if w.startswith(alias):
w = w[len(alias):]
w = w[len(alias) :]
break
if not w:
break
Expand Down Expand Up @@ -795,8 +795,7 @@ def merge_base(data: WordData, base: WordData) -> None:
sortid="page/904",
)

def complementary_pop(pron: WordData, key: str
) -> WordData:
def complementary_pop(pron: WordData, key: str) -> WordData:
"""Remove unnecessary keys from dict values
in a list comprehension..."""
if key in pron:
Expand All @@ -807,7 +806,9 @@ def complementary_pop(pron: WordData, key: str
# does not match "word" or one of "forms"
if "sounds" in data and "word" in data:
accepted = [data["word"]]
accepted.extend(f["form"] for f in data.get("forms", dict())) # type:ignore
accepted.extend(
f["form"] for f in data.get("forms", dict()) # type:ignore
)
data["sounds"] = list(
s
for s in data["sounds"] # type:ignore
Expand All @@ -822,7 +823,7 @@ def complementary_pop(pron: WordData, key: str
if "pos" not in s or s["pos"] == data["pos"] # type:ignore
)

def push_sense():
def push_sense() -> bool:
"""Starts collecting data for a new word sense. This returns True
if a sense was added."""
nonlocal sense_data
Expand Down
60 changes: 51 additions & 9 deletions src/wiktextract/type_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,56 @@
from typing import (
TypedDict,
Union,
)


WordData = dict[str, Union[
str,
int,
list[str],
list[list[str]],
"WordData",
list["WordData"]
]
]
# WordData = dict[str, Union[
# str,
# int,
# list[str],
# list[list[str]],
# "WordData",
# list["WordData"]
# ]
# ]

class WordData(TypedDict, total=False):
word: str
etymology_number: int
etymology_text: str
hyphenation: str
lang: str
lang_code: str
original_title: str
pos: str
source: str

categories: list[str]
redirects: list[str]
topics: list[str]
wikidata: list[WikidataReference]
wikipedia: list[WikipediaReference]

abbreviations: list[Abbreviation]
alt_of: list[Alt]
antonym: list[Antonym]
coordinate_terms: list[CoordinateTerm]
derived: list[Derived]
descendants: list[Descendant]
etymology_templates: list[EtymologyTemplate]
form_of: list[OriginalForm]
forms: list[Form]
head_templates: list[HeadTemplate]
holonyms: list[Holonym]
hypernyms: list[Hypernym]
hyponyms: list[Hyponym]
inflection_templates: list[InflectionTemplate]
instances: list[Instance]
meronyms: list[Meronym]
proverbs: list[Proverb]
related: list[Related]
senses: list[Sense]
sounds: list[Sound]
synonyms: list[Synonym]
translations: list[Translation]
troponyms: list[Troponym]

0 comments on commit 003aab5

Please sign in to comment.