From 34712de767e9d5516f54080e677925adec9d4c43 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Wed, 27 Dec 2023 15:24:10 +0800 Subject: [PATCH] =?UTF-8?q?Extract=20links=20in=20"D=C3=A9riv=C3=A9s=20dan?= =?UTF-8?q?s=20d=E2=80=99autres=20langues"=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Links also could be used in lists of this block. --- src/wiktextract/extractor/fr/linkage.py | 14 ++++++++------ tests/test_fr_linkage.py | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/wiktextract/extractor/fr/linkage.py b/src/wiktextract/extractor/fr/linkage.py index 81fdd981..2494da81 100644 --- a/src/wiktextract/extractor/fr/linkage.py +++ b/src/wiktextract/extractor/fr/linkage.py @@ -33,12 +33,14 @@ def process_derives_autres_list( for list_item in level_node.find_child_recursively(NodeKind.LIST_ITEM): lang_code = "" lang_name = "" - for template_node in list_item.find_child(NodeKind.TEMPLATE): - if template_node.template_name == "L": - lang_code = template_node.template_parameters.get(1) - lang_name = clean_node(wxr, None, template_node) - elif template_node.template_name == "lien": - word = clean_node(wxr, None, template_node) + for node in list_item.find_child(NodeKind.TEMPLATE | NodeKind.LINK): + if isinstance(node, TemplateNode) and node.template_name == "L": + lang_code = node.template_parameters.get(1) + lang_name = clean_node(wxr, None, node) + elif node.kind == NodeKind.LINK or ( + isinstance(node, TemplateNode) and node.template_name == "lien" + ): + word = clean_node(wxr, None, node) page_data[-1].derived.append( Linkage(lang_code=lang_code, lang_name=lang_name, word=word) ) diff --git a/tests/test_fr_linkage.py b/tests/test_fr_linkage.py index da2aadb2..bb2f57c3 100644 --- a/tests/test_fr_linkage.py +++ b/tests/test_fr_linkage.py @@ -143,14 +143,23 @@ def test_sense(self): def test_derives_autres_langues_section(self): # https://fr.wiktionary.org/wiki/eau#Dérivés_dans_d’autres_langues + # https://fr.wiktionary.org/wiki/caligineux#Dérivés_dans_d’autres_langues self.wxr.wtp.add_page("Modèle:lien", 10, body="{{{1}}}") - self.wxr.wtp.add_page("Modèle:L", 10, body="Karipúna") + self.wxr.wtp.add_page( + "Modèle:L", + 10, + body="""{{#switch: {{{1}}} +| kmv = Karipúna +| en = Anglais +}}""", + ) page_data = [ WordEntry(word="test", lang_code="fr", lang_name="Français") ] self.wxr.wtp.start_page("eau") root = self.wxr.wtp.parse( - "* {{L|kmv}} : {{lien|dlo|kmv}}, {{lien|djilo|kmv}}" + """* {{L|kmv}} : {{lien|dlo|kmv}}, {{lien|djilo|kmv}} +* {{L|en}} : [[caliginous#en|caliginous]]""" ) extract_linkage(self.wxr, page_data, root, "dérivés autres langues") self.assertEqual( @@ -169,6 +178,11 @@ def test_derives_autres_langues_section(self): "lang_code": "kmv", "lang_name": "Karipúna", }, + { + "word": "caliginous", + "lang_code": "en", + "lang_name": "Anglais", + }, ], )