From e3af868a3b7dd1d190b6a4eb58e9e9c331d05f5a Mon Sep 17 00:00:00 2001 From: Empiriker Date: Thu, 5 Oct 2023 09:45:11 +0300 Subject: [PATCH] Test German parse_section() with at least some sense data --- tests/test_de_page.py | 93 ++++--------------------------------------- 1 file changed, 8 insertions(+), 85 deletions(-) diff --git a/tests/test_de_page.py b/tests/test_de_page.py index 0de6e4d9..4451898c 100644 --- a/tests/test_de_page.py +++ b/tests/test_de_page.py @@ -1,7 +1,6 @@ # Tests for parsing a page from the German Wiktionary import unittest -from unittest.mock import patch from collections import defaultdict @@ -81,57 +80,10 @@ def test_de_parse_page_skipping_head_templates(self): ], ) - def mock_append_base_data_side_effects( - self, page_data, field: str, value, base_data - ) -> None: - import copy - - if page_data[-1].get(field) is not None: - if len(page_data[-1]["senses"]) > 0: - # append new dictionary if the last dictionary has sense data and - # also has the same key - page_data.append(copy.deepcopy(base_data)) - elif isinstance(page_data[-1].get(field), list): - page_data[-1][field] += value - else: - page_data.append(copy.deepcopy(base_data)) - - else: - page_data[-1][field] = value - - @patch("wiktextract.extractor.de.page.append_base_data") - def test_de_parse_section_with_mock(self, mock_append_base_data): - mock_append_base_data.side_effect = ( - self.mock_append_base_data_side_effects - ) - - self.wxr.wtp.add_page("Vorlage:Wortart", 10, "") - page_text = """ -=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} === -=== {{Wortart|Verb|Englisch}} === -=== {{Wortart|Substantiv|Englisch}} === -""" - self.wxr.wtp.start_page("") - root = self.wxr.wtp.parse( - page_text, - pre_expand=True, - ) - - base_data = defaultdict(list, {"lang_code": "de"}) - page_data = [defaultdict(list, {"lang_code": "de"})] - parse_section(self.wxr, page_data, base_data, root.children) - - self.assertEqual( - page_data, - [ - {"lang_code": "de", "pos": "adj", "senses": []}, - {"lang_code": "de", "pos": "adv", "senses": []}, - {"lang_code": "de", "pos": "verb", "senses": []}, - {"lang_code": "de", "pos": "noun"}, - ], - ) - - def test_de_parse_section_with_senses(self): + # The way append_base_data() works requires the presence of a sense + # dictionary before starting a new pos section. Therefore, we need to add + # at least one sense data point to the test case. + def test_de_parse_section(self): self.wxr.wtp.add_page("Vorlage:Wortart", 10, "") self.wxr.wtp.add_page("Vorlage:Bedeutungen", 10, "") page_text = """ @@ -140,10 +92,10 @@ def test_de_parse_section_with_senses(self): :[1] gloss1 === {{Wortart|Verb|Englisch}} === {{Bedeutungen}} -:[1] gloss1 +:[1] gloss2 === {{Wortart|Substantiv|Englisch}} === {{Bedeutungen}} -:[1] gloss1 +:[1] gloss3 """ self.wxr.wtp.start_page("") @@ -182,7 +134,7 @@ def test_de_parse_section_with_senses(self): "pos": "verb", "senses": [ { - "glosses": ["gloss1"], + "glosses": ["gloss2"], }, ], }, @@ -191,42 +143,13 @@ def test_de_parse_section_with_senses(self): "pos": "noun", "senses": [ { - "glosses": ["gloss1"], + "glosses": ["gloss3"], }, ], }, ], ) - def test_de_parse_section_without_mock(self): - self.wxr.wtp.add_page("Vorlage:Wortart", 10, "") - page_text = """ -=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} === -=== {{Wortart|Verb|Englisch}} === -=== {{Wortart|Substantiv|Englisch}} === -""" - self.wxr.wtp.start_page("") - root = self.wxr.wtp.parse( - page_text, - pre_expand=True, - ) - - base_data = defaultdict(list, {"lang_code": "de"}) - page_data = [defaultdict(list, {"lang_code": "de"})] - parse_section(self.wxr, page_data, base_data, root.children) - - self.assertEqual( - page_data, - [ - {"lang_code": "de", "pos": "adj", "senses": []}, - {"lang_code": "de", "pos": "adv", "senses": []}, - {"lang_code": "de", "pos": "verb", "senses": []}, - {"lang_code": "de", "pos": "noun"}, - ], - ) - # Assertion fails. The resulting page_data is just: - # [{'lang_code': 'de', 'pos': 'adj', 'senses': []}] - def test_de_fix_level_hierarchy_of_subsections(self): self.wxr.wtp.add_page("Vorlage:Englisch Substantiv Übersicht", 10, "") self.wxr.wtp.add_page("Vorlage:Worttrennung", 10, "")