Skip to content

Commit

Permalink
Add temporarily different variants of test_de_parse_section
Browse files Browse the repository at this point in the history
  • Loading branch information
empiriker committed Oct 1, 2023
1 parent 41d3ce6 commit e6d3bfa
Showing 1 changed file with 97 additions and 1 deletion.
98 changes: 97 additions & 1 deletion tests/test_de_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def mock_append_base_data_side_effects(
page_data[-1][field] = value

@patch("wiktextract.extractor.de.page.append_base_data")
def test_de_parse_section(self, mock_append_base_data):
def test_de_parse_section_with_mock(self, mock_append_base_data):
mock_append_base_data.side_effect = (
self.mock_append_base_data_side_effects
)
Expand Down Expand Up @@ -131,6 +131,102 @@ def test_de_parse_section(self, mock_append_base_data):
],
)

def test_de_parse_section_with_senses(self):
self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
self.wxr.wtp.add_page("Vorlage:Bedeutungen", 10, "")
page_text = """
=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===
{{Bedeutungen}}
:[1] gloss1
=== {{Wortart|Verb|Englisch}} ===
{{Bedeutungen}}
:[1] gloss1
=== {{Wortart|Substantiv|Englisch}} ===
{{Bedeutungen}}
:[1] gloss1
"""
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
page_text,
pre_expand=True,
)

base_data = defaultdict(list, {"lang_code": "de"})
page_data = [defaultdict(list, {"lang_code": "de"})]
parse_section(self.wxr, page_data, base_data, root.children)

self.assertEqual(
page_data,
[
{
"lang_code": "de",
"pos": "adj",
"senses": [
{
"glosses": ["gloss1"],
},
],
},
{
"lang_code": "de",
"pos": "adv",
"senses": [
{
"glosses": ["gloss1"],
},
],
},
{
"lang_code": "de",
"pos": "verb",
"senses": [
{
"glosses": ["gloss1"],
},
],
},
{
"lang_code": "de",
"pos": "noun",
"senses": [
{
"glosses": ["gloss1"],
},
],
},
],
)

def test_de_parse_section_without_mock(self):
self.wxr.wtp.add_page("Vorlage:Wortart", 10, "")
page_text = """
=== {{Wortart|Adjektiv|Englisch}}, {{Wortart|Adverb|Englisch}} ===
=== {{Wortart|Verb|Englisch}} ===
=== {{Wortart|Substantiv|Englisch}} ===
"""
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(
page_text,
pre_expand=True,
)

base_data = defaultdict(list, {"lang_code": "de"})
page_data = [defaultdict(list, {"lang_code": "de"})]
parse_section(self.wxr, page_data, base_data, root.children)

self.assertEqual(
page_data,
[
{"lang_code": "de", "pos": "adj", "senses": []},
{"lang_code": "de", "pos": "adv", "senses": []},
{"lang_code": "de", "pos": "verb", "senses": []},
{"lang_code": "de", "pos": "noun"},
],
)
# Assertion fails. The resulting page_data is just:
# [{'lang_code': 'de', 'pos': 'adj', 'senses': []}]

def test_de_fix_level_hierarchy_of_subsections(self):
self.wxr.wtp.add_page("Vorlage:Englisch Substantiv Übersicht", 10, "")
self.wxr.wtp.add_page("Vorlage:Worttrennung", 10, "")
Expand Down

0 comments on commit e6d3bfa

Please sign in to comment.