Skip to content

Commit

Permalink
Test German parse_section() with at least some sense data
Browse files Browse the repository at this point in the history
  • Loading branch information
empiriker committed Oct 5, 2023
1 parent e6d3bfa commit e3af868
Showing 1 changed file with 8 additions and 85 deletions.
93 changes: 8 additions & 85 deletions tests/test_de_page.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Tests for parsing a page from the German Wiktionary

import unittest
from unittest.mock import patch

from collections import defaultdict

Expand Down Expand Up @@ -81,57 +80,10 @@ def test_de_parse_page_skipping_head_templates(self):
],
)

def mock_append_base_data_side_effects(
self, page_data, field: str, value, base_data
) -> None:
import copy

if page_data[-1].get(field) is not None:
if len(page_data[-1]["senses"]) > 0:
# append new dictionary if the last dictionary has sense data and
# also has the same key
page_data.append(copy.deepcopy(base_data))
elif isinstance(page_data[-1].get(field), list):
page_data[-1][field] += value
else:
page_data.append(copy.deepcopy(base_data))

else:
page_data[-1][field] = value

@patch("wiktextract.extractor.de.page.append_base_data")
def test_de_parse_section_with_mock(self, mock_append_base_data):
mock_append_base_data.side_effect = (
self.mock_append_base_data_side_effects
)

self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
page_text = """
=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===
=== {{Wortart|Verb|Englisch}} ===
=== {{Wortart|Substantiv|Englisch}} ===
"""
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
page_text,
pre_expand=True,
)

base_data = defaultdict(list, {"lang_code": "de"})
page_data = [defaultdict(list, {"lang_code": "de"})]
parse_section(self.wxr, page_data, base_data, root.children)

self.assertEqual(
page_data,
[
{"lang_code": "de", "pos": "adj", "senses": []},
{"lang_code": "de", "pos": "adv", "senses": []},
{"lang_code": "de", "pos": "verb", "senses": []},
{"lang_code": "de", "pos": "noun"},
],
)

def test_de_parse_section_with_senses(self):
# The way append_base_data() works requires the presence of a sense
# dictionary before starting a new pos section. Therefore, we need to add
# at least one sense data point to the test case.
def test_de_parse_section(self):
self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
self.wxr.wtp.add_page("Vorlage:Bedeutungen", 10, "")
page_text = """
Expand All @@ -140,10 +92,10 @@ def test_de_parse_section_with_senses(self):
:[1] gloss1
=== {{Wortart|Verb|Englisch}} ===
{{Bedeutungen}}
:[1] gloss1
:[1] gloss2
=== {{Wortart|Substantiv|Englisch}} ===
{{Bedeutungen}}
:[1] gloss1
:[1] gloss3
"""
self.wxr.wtp.start_page("")
Expand Down Expand Up @@ -182,7 +134,7 @@ def test_de_parse_section_with_senses(self):
"pos": "verb",
"senses": [
{
"glosses": ["gloss1"],
"glosses": ["gloss2"],
},
],
},
Expand All @@ -191,42 +143,13 @@ def test_de_parse_section_with_senses(self):
"pos": "noun",
"senses": [
{
"glosses": ["gloss1"],
"glosses": ["gloss3"],
},
],
},
],
)

def test_de_parse_section_without_mock(self):
self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
page_text = """
=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===
=== {{Wortart|Verb|Englisch}} ===
=== {{Wortart|Substantiv|Englisch}} ===
"""
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
page_text,
pre_expand=True,
)

base_data = defaultdict(list, {"lang_code": "de"})
page_data = [defaultdict(list, {"lang_code": "de"})]
parse_section(self.wxr, page_data, base_data, root.children)

self.assertEqual(
page_data,
[
{"lang_code": "de", "pos": "adj", "senses": []},
{"lang_code": "de", "pos": "adv", "senses": []},
{"lang_code": "de", "pos": "verb", "senses": []},
{"lang_code": "de", "pos": "noun"},
],
)
# Assertion fails. The resulting page_data is just:
# [{'lang_code': 'de', 'pos': 'adj', 'senses': []}]

def test_de_fix_level_hierarchy_of_subsections(self):
self.wxr.wtp.add_page("Vorlage:Englisch Substantiv Übersicht", 10, "")
self.wxr.wtp.add_page("Vorlage:Worttrennung", 10, "")
Expand Down

0 comments on commit e3af868

Please sign in to comment.