diff --git a/src/wiktextract/extractor/it/analyze_template.py b/src/wiktextract/extractor/it/analyze_template.py index 621d8d3f..d76e34af 100644 --- a/src/wiktextract/extractor/it/analyze_template.py +++ b/src/wiktextract/extractor/it/analyze_template.py @@ -67,7 +67,7 @@ "Template:-uso-", "Template:-var-", "Template:-alter-", - "Template:-chat-", + "Template:-chat-", # pos "Template:-coni-", "Template:-decl-", "Template:-der-", @@ -76,6 +76,13 @@ "Template:-pron-", "Template:-prov-", "Template:-trascrizione-", # pos + # https://it.wiktionary.org/wiki/Categoria:Template_vocabolo + "Template:-etim-", + "Template:-trad-", + "Template:-ant-", + "Template:-cod-", # pos + "Template:-carhi-", # pos + "Template:-quote-", } diff --git a/src/wiktextract/extractor/it/pos.py b/src/wiktextract/extractor/it/pos.py index 2a8f0de6..c77e427f 100644 --- a/src/wiktextract/extractor/it/pos.py +++ b/src/wiktextract/extractor/it/pos.py @@ -1,4 +1,4 @@ -from wikitextprocessor import LevelNode, NodeKind, WikiNode +from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode from ...page import clean_node from ...wxr_context import WiktextractContext @@ -35,7 +35,15 @@ def extract_gloss_list_item( gloss_nodes = [] sense = Sense() for node in list_item.children: - if not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST): + if isinstance(node, TemplateNode): + match node.template_name: + case "Term": + raw_tag = clean_node(wxr, sense, node).strip("() \n") + if raw_tag != "": + sense.raw_tags.append(raw_tag) + case _: + gloss_nodes.append(node) + elif not (isinstance(node, WikiNode) and node.kind == NodeKind.LIST): gloss_nodes.append(node) gloss_str = clean_node(wxr, sense, gloss_nodes) if gloss_str != "": diff --git a/src/wiktextract/extractor/it/section_titles.py b/src/wiktextract/extractor/it/section_titles.py index 087f10ab..1fc81b45 100644 --- a/src/wiktextract/extractor/it/section_titles.py +++ b/src/wiktextract/extractor/it/section_titles.py @@ -59,4 +59,6 @@ "pos": "abbrev", "tags": ["abbreviation"], }, + "Codice / Simbolo": {"pos": "symbol"}, + "Carattere hiragana": {"pos": "character", "tags": ["hiragana"]}, } diff --git a/tests/test_it_gloss.py b/tests/test_it_gloss.py index b6db0da0..2863cbfd 100644 --- a/tests/test_it_gloss.py +++ b/tests/test_it_gloss.py @@ -46,7 +46,8 @@ def test_gloss_list(self): "pos_title": "Sostantivo", "senses": [ { - "glosses": ["(mammalogia) animale"], + "glosses": ["animale"], + "raw_tags": ["mammalogia"], "categories": ["Mammalogia-IT"], } ],