Skip to content

Commit

Permalink
Extract span node's title attribute from expanded qualifier template
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Jan 30, 2024
1 parent 19f65b4 commit ef6ac38
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
16 changes: 13 additions & 3 deletions src/wiktextract/extractor/zh/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,19 @@ def process_translation_list_item(
continue
else:
# qualifier template
tag = clean_node(wxr, None, child)
if len(tag) > 0:
tr_data.tags.append(tag.strip("()"))
expanded_template = wxr.wtp.parse(
wxr.wtp.node_to_wikitext(child), expand_all=True
)
find_title = False
for span_node in expanded_template.find_html("span"):
tag = span_node.attrs.get("title", "")
if len(tag) > 0:
tr_data.tags.append(tag.strip())
find_title = True
if not find_title:
tag = clean_node(wxr, None, child)
if len(tag) > 0:
tr_data.tags.append(tag.strip("()"))
elif isinstance(child, WikiNode) and child.kind == NodeKind.LINK:
if len(tr_data.word) > 0:
page_data[-1].translations.append(tr_data.model_copy(deep=True))
Expand Down
21 changes: 20 additions & 1 deletion tests/test_zh_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,21 @@ def test_language_name_template(self):

def test_l_template(self):
self.wxr.wtp.start_page("茄子")
self.wxr.wtp.add_page("Template:cs", 10, "捷克语")
self.wxr.wtp.add_page(
"Template:l",
10,
"""<span>{{{2}}}</span>
{{#if:{{{g|}}}|<span class="gender"><abbr title="陽性名詞">m</abbr></span>}}""",
)
self.wxr.wtp.add_page(
"Template:口", 10, '<span title="口语词汇">口</span>〉'
)
page_data = [WordEntry(word="茄子", lang_code="zh", lang="漢語")]
node = self.wxr.wtp.parse("* 南非語: {{l|af|eiervrug}}")
node = self.wxr.wtp.parse(
"""* 南非語: {{l|af|eiervrug}}
* {{cs}}: {{l|cs|patližán|g=m}} {{口}}"""
)
extract_translation(self.wxr, page_data, node)
self.assertEqual(
[
Expand All @@ -194,5 +207,11 @@ def test_l_template(self):
"lang": "南非語",
"word": "eiervrug",
},
{
"lang_code": "cs",
"lang": "捷克语",
"word": "patližán",
"tags": ["陽性名詞", "口语词汇"],
},
],
)

0 comments on commit ef6ac38

Please sign in to comment.