Skip to content

Commit

Permalink
Merge branch 'master' into wiktextract-typing
Browse files Browse the repository at this point in the history
  • Loading branch information
kristian-clausal committed Jan 2, 2024
2 parents befd5d8 + 3b6e0af commit 8c87081
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 6 deletions.
9 changes: 6 additions & 3 deletions src/wiktextract/extractor/en/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -3024,9 +3024,12 @@ def skip_template_fn(name, ht):
stack.pop()

if len(redirect_list) > 0:
new_page_data = base_data.copy()
new_page_data["redirects"] = redirect_list
page_datas.append(new_page_data)
if len(pos_data) > 0:
pos_data["redirects"] = redirect_list
else:
new_page_data = base_data.copy()
new_page_data["redirects"] = redirect_list
page_datas.append(new_page_data)

def extract_examples(others, sense_base):
"""Parses through a list of definitions and quotes to find examples.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_long.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def test_long(self):
words[word] += 1
lang = data.get("lang", "")
self.assertGreater(len(lang), 0)
# redirect Chinese character maybe not have pos
pos = data.get("pos", "")
self.assertGreater(len(pos), 0)
langs[lang] += 1
poses[pos] += 1
if data.get("translations"):
Expand Down
30 changes: 28 additions & 2 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
#
# Copyright (c) 2021 Tatu Ylonen. See file LICENSE and https://ylonen.org

import json
import unittest
from unittest.mock import patch

from wikitextprocessor import Page, Wtp

from wiktextract.config import WiktionaryConfig
from wiktextract.page import parse_page
from wiktextract.thesaurus import close_thesaurus_db
Expand Down Expand Up @@ -494,6 +492,34 @@ def test_zh_see(self, mock_get_page):
}
],
)
data = parse_page(
self.wxr,
"车",
"""
==Chinese==
===Glyph origin===
From cursive script of 車.
===Definitions===
{{zh-see|車|s}}
{{zh-see|龺|ss}}
""",
)
self.assertEqual(
data,
[
{
"etymology_templates": [],
"etymology_text": "From cursive script of 車.",
"lang": "Chinese",
"lang_code": "zh",
"redirects": ["車", "龺"],
"word": "车",
"pos": "character",
"senses": [{"tags": ["no-gloss"]}],
}
],
)

@patch(
"wikitextprocessor.Wtp.get_page",
Expand Down

0 comments on commit 8c87081

Please sign in to comment.