Skip to content

Commit

Permalink
[it] extract tag data from "Term" template in gloss list
Browse files Browse the repository at this point in the history
and add more section templates
  • Loading branch information
xxyzz committed Dec 11, 2024
1 parent b64f6dc commit e948032
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 4 deletions.
9 changes: 8 additions & 1 deletion src/wiktextract/extractor/it/analyze_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"Template:-uso-",
"Template:-var-",
"Template:-alter-",
"Template:-chat-",
"Template:-chat-", # pos
"Template:-coni-",
"Template:-decl-",
"Template:-der-",
Expand All @@ -76,6 +76,13 @@
"Template:-pron-",
"Template:-prov-",
"Template:-trascrizione-", # pos
# https://it.wiktionary.org/wiki/Categoria:Template_vocabolo
"Template:-etim-",
"Template:-trad-",
"Template:-ant-",
"Template:-cod-", # pos
"Template:-carhi-", # pos
"Template:-quote-",
}


Expand Down
12 changes: 10 additions & 2 deletions src/wiktextract/extractor/it/pos.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from wikitextprocessor import LevelNode, NodeKind, WikiNode
from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode

from ...page import clean_node
from ...wxr_context import WiktextractContext
Expand Down Expand Up @@ -35,7 +35,15 @@ def extract_gloss_list_item(
gloss_nodes = []
sense = Sense()
for node in list_item.children:
if not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):
if isinstance(node, TemplateNode):
match node.template_name:
case "Term":
raw_tag = clean_node(wxr, sense, node).strip("() \n")
if raw_tag != "":
sense.raw_tags.append(raw_tag)
case _:
gloss_nodes.append(node)
elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST):
gloss_nodes.append(node)
gloss_str = clean_node(wxr, sense, gloss_nodes)
if gloss_str != "":
Expand Down
2 changes: 2 additions & 0 deletions src/wiktextract/extractor/it/section_titles.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@
"pos": "abbrev",
"tags": ["abbreviation"],
},
"Codice / Simbolo": {"pos": "symbol"},
"Carattere hiragana": {"pos": "character", "tags": ["hiragana"]},
}
3 changes: 2 additions & 1 deletion tests/test_it_gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def test_gloss_list(self):
"pos_title": "Sostantivo",
"senses": [
{
"glosses": ["(mammalogia) animale"],
"glosses": ["animale"],
"raw_tags": ["mammalogia"],
"categories": ["Mammalogia-IT"],
}
],
Expand Down

0 comments on commit e948032

Please sign in to comment.