Skip to content

Commit

Permalink
Fix index out of range error in fr/pronunciation.py
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Nov 29, 2023
1 parent 201c4d5 commit b8ff771
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/wiktextract/extractor/fr/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def parse_section(
wxr.config.capture_pronunciation
and section_type in wxr.config.OTHER_SUBTITLES["pronunciation"]
):
extract_pronunciation(wxr, page_data, level_node)
extract_pronunciation(wxr, page_data, level_node, base_data)
elif (
wxr.config.capture_linkages
and section_type in wxr.config.LINKAGE_SUBTITLES
Expand Down
16 changes: 11 additions & 5 deletions src/wiktextract/extractor/fr/pronunciation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,25 @@


def extract_pronunciation(
wxr: WiktextractContext, page_data: List[Dict], level_node: WikiNode
wxr: WiktextractContext,
page_data: List[Dict],
level_node: WikiNode,
base_data: Dict[str, str],
) -> None:
sound_data = []
lang_code = base_data.get("lang_code")
for list_node in level_node.find_child(NodeKind.LIST):
for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
sound_data.extend(
process_pron_list_item(
wxr, list_item_node, page_data, defaultdict(list)
wxr, list_item_node, defaultdict(list), lang_code
)
)

if len(sound_data) == 0:
return
if len(page_data) == 0:
page_data.append(deepcopy(base_data))

if level_node.kind == NodeKind.LEVEL3:
# Add extracted sound data to all sense dictionaries that have the same
Expand Down Expand Up @@ -53,10 +59,10 @@ def extract_pronunciation(
def process_pron_list_item(
wxr: WiktextractContext,
list_item_node: WikiNode,
page_data: List[Dict],
sound_data: Dict[str, Union[str, List[str]]],
lang_code: str,
) -> List[Dict[str, Union[str, List[str]]]]:
pron_key = "zh-pron" if page_data[-1].get("lang_code") == "zh" else "ipa"
pron_key = "zh-pron" if lang_code == "zh" else "ipa"

for template_node in list_item_node.find_child(NodeKind.TEMPLATE):
if template_node.template_name in PRON_TEMPLATES:
Expand All @@ -81,7 +87,7 @@ def process_pron_list_item(
):
new_sound_data = deepcopy(sound_data)
process_pron_list_item(
wxr, nest_list_item, page_data, new_sound_data
wxr, nest_list_item, new_sound_data, lang_code
)
if pron_key in new_sound_data:
returned_data.append(new_sound_data)
Expand Down
17 changes: 12 additions & 5 deletions tests/test_fr_pronunciation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_pron_list(self):
root = self.wxr.wtp.parse(
"=== Prononciation ===\n* {{pron|bɔ̃.ʒuʁ|fr}}\n** {{écouter|France (Paris)|bõ.ʒuːʁ|audio=Fr-bonjour.ogg|lang=fr}}"
)
extract_pronunciation(self.wxr, page_data, root.children[0])
extract_pronunciation(self.wxr, page_data, root.children[0], {})
self.assertEqual(
page_data,
[
Expand Down Expand Up @@ -60,13 +60,18 @@ def test_pron_list(self):
)

def test_str_pron(self):
page_data = [defaultdict(list, {"lang_code": "zh"})]
page_data = []
self.wxr.wtp.add_page("Modèle:Yale-zh", 10, body="Yale")
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
"=== {{S|prononciation}} ===\n* '''cantonais''' {{pron||yue}}\n** {{Yale-zh}} : nei⁵hou²"
)
extract_pronunciation(self.wxr, page_data, root.children[0])
extract_pronunciation(
self.wxr,
page_data,
root.children[0],
defaultdict(list, {"lang_code": "zh"}),
)
self.assertEqual(
page_data[0].get("sounds"),
[{"tags": ["cantonais", "Yale"], "zh-pron": "nei⁵hou²"}],
Expand All @@ -78,14 +83,16 @@ def test_no_ipa(self):
files.
Test wikitext from https://fr.wiktionary.org/wiki/mars
"""
page_data = [defaultdict(list)]
page_data = []
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
"""=== {{S|prononciation}} ===
{{ébauche-pron|sv}}
* {{écouter|lang=sv|Suède||audio=LL-Q9027 (swe)-Moonhouse-mars.wav}}"""
)
extract_pronunciation(self.wxr, page_data, root.children[0])
extract_pronunciation(
self.wxr, page_data, root.children[0], defaultdict(list)
)
self.assertEqual(
page_data[0].get("sounds"),
[
Expand Down

0 comments on commit b8ff771

Please sign in to comment.