Skip to content

Commit

Permalink
Merge pull request #437 from xxyzz/fr
Browse files Browse the repository at this point in the history
Fix a bug that appends all senses to the same WordEntry object
  • Loading branch information
xxyzz authored Dec 26, 2023
2 parents 87921d1 + 12e4d8c commit ae805ae
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/wiktextract/extractor/fr/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def process_pos_block(
pos_title: str,
):
pos_type = wxr.config.POS_SUBTITLES[pos_argument]["pos"]
if len(page_data) == 0 or "pos" not in page_data[-1].model_fields_set:
if len(page_data) == 0 or "pos" in page_data[-1].model_fields_set:
page_data.append(base_data.model_copy(deep=True))
page_data[-1].pos = pos_type
page_data[-1].pos_title = pos_title
Expand Down
81 changes: 72 additions & 9 deletions tests/test_fr_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,44 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()

def test_fr_parse_page(self):
self.wxr.wtp.add_page("Modèle:langue", 10, "Français")
self.wxr.wtp.add_page("Modèle:S", 10, "Nom commun")
# https://fr.wiktionary.org/wiki/anthracite
self.wxr.wtp.add_page(
"Modèle:langue",
10,
"{{#switch: {{{1}}} | fr = Français | en = Anglais }}",
)
self.wxr.wtp.add_page(
"Modèle:S",
10,
"""{{#switch: {{{1}}}
| étymologie = Étymologie
| nom = Nom commun
| adjectif = Adjectif
}}""",
)
self.wxr.wtp.add_page("Modèle:roches", 10, "''(Pétrographie)''")
self.wxr.wtp.add_page("Modèle:indénombrable", 10, "''(Indénombrable)''")

page_data = parse_page(
self.wxr,
"exemple",
"""
== {{langue|fr}} ==
"anthracite",
"""== {{langue|fr}} ==
=== {{S|étymologie}} ===
: (1549) Du latin anthracites.
=== {{S|nom|fr}} ===
'''exemple'''
""",
# {{roches|fr}} [[variété|Variété]] de [[charbon de terre]], à [[reflet]] [[métallique]] et à [[combustion]] [[lent]]e.
=== {{S|adjectif|fr}} ===
# De couleur anthracite, gris très foncé, du nom de la variété de charbon du même nom.
== {{langue|en}} ==
=== {{S|étymologie}} ===
: Du latin anthracites.
=== {{S|nom|en}} ===
# {{indénombrable|en}} [[anthracite#fr|Anthracite]].""",
)
self.assertEqual(
page_data,
Expand All @@ -42,7 +70,42 @@ def test_fr_parse_page(self):
"lang_code": "fr",
"pos": "noun",
"pos_title": "Nom commun",
"word": "exemple",
}
"word": "anthracite",
"senses": [
{
"glosses": [
"Variété de charbon de terre, à reflet métallique et à combustion lente."
],
"tags": ["Pétrographie"],
}
],
"etymology_texts": ["(1549) Du latin anthracites."],
},
{
"lang_name": "Français",
"lang_code": "fr",
"pos": "adj",
"pos_title": "Adjectif",
"word": "anthracite",
"senses": [
{
"glosses": [
"De couleur anthracite, gris très foncé, du nom de la variété de charbon du même nom."
]
}
],
"etymology_texts": ["(1549) Du latin anthracites."],
},
{
"lang_name": "Anglais",
"lang_code": "en",
"pos": "noun",
"pos_title": "Nom commun",
"word": "anthracite",
"senses": [
{"glosses": ["Anthracite."], "tags": ["Indénombrable"]}
],
"etymology_texts": ["Du latin anthracites."],
},
],
)

0 comments on commit ae805ae

Please sign in to comment.