From a78495556a37b744ff3b3594347fc5b8706fb305 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 12:00:45 +0800
Subject: [PATCH 1/6] [it] save hyphenation lists data to `hyphenations` field

---
 src/wiktextract/extractor/it/models.py |  7 +++++-
 src/wiktextract/extractor/it/sound.py  | 32 ++++++++++++++++++++++----
 tests/test_it_sound.py                 | 30 ++++++++++++++++++++++--
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/wiktextract/extractor/it/models.py b/src/wiktextract/extractor/it/models.py
index 1e42a0f5..83e9cdd1 100644
--- a/src/wiktextract/extractor/it/models.py
+++ b/src/wiktextract/extractor/it/models.py
@@ -62,6 +62,11 @@ class Sound(ItalianBaseModel):
     raw_tags: list[str] = []
 
 
+class Hyphenation(ItalianBaseModel):
+    hyphenation: str = ""
+    sense: str = ""
+
+
 class WordEntry(ItalianBaseModel):
     model_config = ConfigDict(title="Italian Wiktionary")
     word: str = Field(description="Word string", min_length=1)
@@ -77,5 +82,5 @@ class WordEntry(ItalianBaseModel):
     forms: list[Form] = []
     etymology_texts: list[str] = []
     etymology_examples: list[Example] = []
-    hyphenation: str = ""
+    hyphenations: list[Hyphenation] = []
     sounds: list[Sound] = []
diff --git a/src/wiktextract/extractor/it/sound.py b/src/wiktextract/extractor/it/sound.py
index 08b9074d..cd40cfd9 100644
--- a/src/wiktextract/extractor/it/sound.py
+++ b/src/wiktextract/extractor/it/sound.py
@@ -3,19 +3,41 @@
 from ...page import clean_node
 from ...wxr_context import WiktextractContext
 from ..share import set_sound_file_url_fields
-from .models import Sound, WordEntry
+from .models import Hyphenation, Sound, WordEntry
 
 
 def extract_hyphenation_section(
     wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
 ) -> None:
-    hyphenation = ""
+    # https://it.wiktionary.org/wiki/Aiuto:Sillabazione
+    hyphenations = []
     for list_node in level_node.find_child(NodeKind.LIST):
-        for list_item in list_node.find_child(NodeKind.LIST_ITEM):
-            hyphenation = clean_node(wxr, None, list_item.children)
+        match list_node.sarg:
+            case ";":
+                for list_item in list_node.find_child(NodeKind.LIST_ITEM):
+                    h_str = clean_node(wxr, None, list_item.children)
+                    if h_str != "":
+                        hyphenations.append(Hyphenation(hyphenation=h_str))
+                        break
+            case "*":
+                for list_item in list_node.find_child(NodeKind.LIST_ITEM):
+                    h_data = Hyphenation()
+                    for node in list_item.find_child(
+                        NodeKind.ITALIC | NodeKind.BOLD
+                    ):
+                        match node.kind:
+                            case NodeKind.ITALIC:
+                                h_data.sense = clean_node(
+                                    wxr, None, node
+                                ).strip("()")
+                            case NodeKind.BOLD:
+                                h_data.hyphenation = clean_node(wxr, None, node)
+                    if h_data.hyphenation != "":
+                        hyphenations.append(h_data)
+
     for data in page_data:
         if data.lang_code == page_data[-1].lang_code:
-            data.hyphenation = hyphenation
+            data.hyphenations.extend(hyphenations)
 
 
 def extract_pronunciation_section(
diff --git a/tests/test_it_sound.py b/tests/test_it_sound.py
index 62c695b2..bef39981 100644
--- a/tests/test_it_sound.py
+++ b/tests/test_it_sound.py
@@ -18,7 +18,7 @@ def setUp(self) -> None:
             ),
         )
 
-    def test_hyphenation(self):
+    def test_hyphenation_single_list(self):
         self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
         data = parse_page(
             self.wxr,
@@ -29,7 +29,7 @@ def test_hyphenation(self):
 ===Sillabazione===
 ; cà | ne""",
         )
-        self.assertEqual(data[0]["hyphenation"], "cà | ne")
+        self.assertEqual(data[0]["hyphenations"], [{"hyphenation": "cà | ne"}])
 
     def test_ipa_audio_templates(self):
         self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
@@ -46,3 +46,29 @@ def test_ipa_audio_templates(self):
         sound = data[0]["sounds"][0]
         self.assertEqual(sound["ipa"], "/ˈkaːne/")
         self.assertEqual(sound["audio"], "it-cane.ogg")
+
+    def test_hyphenation_lists(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "pesca",
+            """== {{-it-}} ==
+===Sostantivo===
+# [[frutto]] del [[pesco]]
+===Sillabazione===
+* ''(il frutto e significati correlati)'' '''pè | sca'''
+* ''(l'atto del pescare e significati correlati)'' '''pé | sca'''""",
+        )
+        self.assertEqual(
+            data[0]["hyphenations"],
+            [
+                {
+                    "hyphenation": "pè | sca",
+                    "sense": "il frutto e significati correlati",
+                },
+                {
+                    "hyphenation": "pé | sca",
+                    "sense": "l'atto del pescare e significati correlati",
+                },
+            ],
+        )

From 042f583ae249588248d9d81a989dc51480f761f8 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 12:06:45 +0800
Subject: [PATCH 2/6] [it] extract direct bold child nodes in hyphenation
 section

---
 src/wiktextract/extractor/it/sound.py |  6 ++++++
 tests/test_it_sound.py                | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/src/wiktextract/extractor/it/sound.py b/src/wiktextract/extractor/it/sound.py
index cd40cfd9..6bc147bc 100644
--- a/src/wiktextract/extractor/it/sound.py
+++ b/src/wiktextract/extractor/it/sound.py
@@ -35,6 +35,12 @@ def extract_hyphenation_section(
                     if h_data.hyphenation != "":
                         hyphenations.append(h_data)
 
+    # no list
+    for node in level_node.find_child(NodeKind.BOLD):
+        h_str = clean_node(wxr, None, node)
+        if h_str != "":
+            hyphenations.append(Hyphenation(hyphenation=h_str))
+
     for data in page_data:
         if data.lang_code == page_data[-1].lang_code:
             data.hyphenations.extend(hyphenations)
diff --git a/tests/test_it_sound.py b/tests/test_it_sound.py
index bef39981..4dbe4b5b 100644
--- a/tests/test_it_sound.py
+++ b/tests/test_it_sound.py
@@ -72,3 +72,22 @@ def test_hyphenation_lists(self):
                 },
             ],
         )
+
+    def test_hyphenation_no_list(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "cespita",
+            """== {{-it-}} ==
+===Sostantivo===
+# [[variante]] di [[ceppita]]
+===Sillabazione===
+'''cè | spi | ta''' o '''cé | spi | ta'''""",
+        )
+        self.assertEqual(
+            data[0]["hyphenations"],
+            [
+                {"hyphenation": "cè | spi | ta"},
+                {"hyphenation": "cé | spi | ta"},
+            ],
+        )

From db51476c9baffdb7415ea583b5ca75479398f1fd Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 14:13:18 +0800
Subject: [PATCH 3/6] [it] extract the rest arguments of template "IPA"

---
 src/wiktextract/extractor/it/sound.py | 20 ++++++++++++++------
 tests/test_it_sound.py                | 20 ++++++++++++++++++++
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/wiktextract/extractor/it/sound.py b/src/wiktextract/extractor/it/sound.py
index 6bc147bc..84a6f811 100644
--- a/src/wiktextract/extractor/it/sound.py
+++ b/src/wiktextract/extractor/it/sound.py
@@ -52,12 +52,20 @@ def extract_pronunciation_section(
     sounds = []
     for t_node in level_node.find_child(NodeKind.TEMPLATE):
         match t_node.template_name.lower():
-            case "ipa":
-                ipa = clean_node(
-                    wxr, None, t_node.template_parameters.get(1, "")
-                )
-                if ipa != "":
-                    sounds.append(Sound(ipa=ipa))
+            case "ipa" | "sampa":
+                # https://it.wiktionary.org/wiki/Template:IPA
+                # https://it.wiktionary.org/wiki/Template:SAMPA
+                for arg_name in range(1, 5):
+                    if arg_name not in t_node.template_parameters:
+                        break
+                    ipa = clean_node(
+                        wxr, None, t_node.template_parameters.get(arg_name, "")
+                    )
+                    if ipa != "":
+                        sound = Sound(ipa=ipa)
+                        if t_node.template_name.lower() == "sampa":
+                            sound.tags.append("SAMPA")
+                        sounds.append(sound)
             case "audio":
                 sound_file = clean_node(
                     wxr, None, t_node.template_parameters.get(1, "")
diff --git a/tests/test_it_sound.py b/tests/test_it_sound.py
index 4dbe4b5b..091c9489 100644
--- a/tests/test_it_sound.py
+++ b/tests/test_it_sound.py
@@ -91,3 +91,23 @@ def test_hyphenation_no_list(self):
                 {"hyphenation": "cé | spi | ta"},
             ],
         )
+
+    def test_sampa(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "Italia",
+            """== {{-it-}} ==
+===Nome proprio===
+# [[stato]]
+===Pronuncia===
+{{IPA|/iˈtalja/|/iˈtaː.li̯a/}}, {{SAMPA|/i"talja/}}
+{{Audio|It-Italia.ogg}}""",
+        )
+        self.assertEqual(
+            data[0]["sounds"][:2],
+            [{"ipa": "/iˈtalja/"}, {"ipa": "/iˈtaː.li̯a/"}],
+        )
+        self.assertEqual(data[0]["sounds"][2]["ipa"], '/i"talja/')
+        self.assertEqual(data[0]["sounds"][2]["tags"], ["SAMPA"])
+        self.assertEqual(data[0]["sounds"][2]["audio"], "It-Italia.ogg")

From 52fb94c47a18958ebe0b58158ce89b12f8656dd2 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 15:02:32 +0800
Subject: [PATCH 4/6] [it] extract lists in pronunciation section

---
 src/wiktextract/extractor/it/models.py |  1 +
 src/wiktextract/extractor/it/sound.py  | 89 ++++++++++++++++++--------
 tests/test_it_sound.py                 | 17 +++++
 3 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/src/wiktextract/extractor/it/models.py b/src/wiktextract/extractor/it/models.py
index 83e9cdd1..701f53b2 100644
--- a/src/wiktextract/extractor/it/models.py
+++ b/src/wiktextract/extractor/it/models.py
@@ -60,6 +60,7 @@ class Sound(ItalianBaseModel):
     flac_url: str = ""
     tags: list[str] = []
     raw_tags: list[str] = []
+    sense: str = ""
 
 
 class Hyphenation(ItalianBaseModel):
diff --git a/src/wiktextract/extractor/it/sound.py b/src/wiktextract/extractor/it/sound.py
index 84a6f811..daed473e 100644
--- a/src/wiktextract/extractor/it/sound.py
+++ b/src/wiktextract/extractor/it/sound.py
@@ -1,4 +1,4 @@
-from wikitextprocessor import LevelNode, NodeKind
+from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
 
 from ...page import clean_node
 from ...wxr_context import WiktextractContext
@@ -49,35 +49,70 @@ def extract_hyphenation_section(
 def extract_pronunciation_section(
     wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
 ) -> None:
+    # https://it.wiktionary.org/wiki/Aiuto:Pronuncia
     sounds = []
+    for list_node in level_node.find_child(NodeKind.LIST):
+        for list_item in list_node.find_child(NodeKind.LIST_ITEM):
+            extract_sound_list_item(wxr, list_item, sounds)
+
+    # no list
     for t_node in level_node.find_child(NodeKind.TEMPLATE):
-        match t_node.template_name.lower():
-            case "ipa" | "sampa":
-                # https://it.wiktionary.org/wiki/Template:IPA
-                # https://it.wiktionary.org/wiki/Template:SAMPA
-                for arg_name in range(1, 5):
-                    if arg_name not in t_node.template_parameters:
-                        break
-                    ipa = clean_node(
-                        wxr, None, t_node.template_parameters.get(arg_name, "")
-                    )
-                    if ipa != "":
-                        sound = Sound(ipa=ipa)
-                        if t_node.template_name.lower() == "sampa":
-                            sound.tags.append("SAMPA")
-                        sounds.append(sound)
-            case "audio":
-                sound_file = clean_node(
-                    wxr, None, t_node.template_parameters.get(1, "")
-                )
-                if sound_file != "":
-                    if len(sounds) > 0:
-                        set_sound_file_url_fields(wxr, sound_file, sounds[-1])
-                    else:
-                        sound = Sound()
-                        set_sound_file_url_fields(wxr, sound_file, sound)
-                        sounds.append(sound)
+        extract_sound_template(wxr, t_node, sounds, "")
 
     for data in page_data:
         if data.lang_code == page_data[-1].lang_code:
             data.sounds.extend(sounds)
+
+
+def extract_sound_list_item(
+    wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound]
+) -> None:
+    sense = ""
+    for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE):
+        match node.kind:
+            case NodeKind.ITALIC:
+                sense = clean_node(wxr, None, node).strip("()")
+            case NodeKind.TEMPLATE:
+                extract_sound_template(wxr, node, sounds, sense)
+
+
+def extract_sound_template(
+    wxr: WiktextractContext,
+    t_node: TemplateNode,
+    sounds: list[Sound],
+    sense: str,
+) -> None:
+    match t_node.template_name:
+        case "IPA" | "SAMPA":
+            # https://it.wiktionary.org/wiki/Template:IPA
+            # https://it.wiktionary.org/wiki/Template:SAMPA
+            for arg_name in range(1, 5):
+                if arg_name not in t_node.template_parameters:
+                    break
+                ipa = clean_node(
+                    wxr, None, t_node.template_parameters.get(arg_name, "")
+                )
+                if ipa != "":
+                    sound = Sound(ipa=ipa, sense=sense)
+                    if t_node.template_name.lower() == "sampa":
+                        sound.tags.append("SAMPA")
+                    sounds.append(sound)
+        case "Audio" | "audio":
+            # https://it.wiktionary.org/wiki/Template:Audio
+            sound_file = clean_node(
+                wxr, None, t_node.template_parameters.get(1, "")
+            )
+            raw_tag = clean_node(
+                wxr, None, t_node.template_parameters.get(2, "")
+            )
+            if sound_file != "":
+                if len(sounds) > 0:
+                    set_sound_file_url_fields(wxr, sound_file, sounds[-1])
+                    if raw_tag != "":
+                        sounds[-1].raw_tags.append(raw_tag)
+                else:
+                    sound = Sound(sense=sense)
+                    set_sound_file_url_fields(wxr, sound_file, sound)
+                    if raw_tag != "":
+                        sound.raw_tags.append(raw_tag)
+                    sounds.append(sound)
diff --git a/tests/test_it_sound.py b/tests/test_it_sound.py
index 091c9489..72ca53bc 100644
--- a/tests/test_it_sound.py
+++ b/tests/test_it_sound.py
@@ -111,3 +111,20 @@ def test_sampa(self):
         self.assertEqual(data[0]["sounds"][2]["ipa"], '/i"talja/')
         self.assertEqual(data[0]["sounds"][2]["tags"], ["SAMPA"])
         self.assertEqual(data[0]["sounds"][2]["audio"], "It-Italia.ogg")
+
+    def test_sound_list(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        data = parse_page(
+            self.wxr,
+            "pesca",
+            """== {{-it-}} ==
+===Nome proprio===
+# [[frutto]]
+===Pronuncia===
+* ''(il frutto e significati correlati)'' {{IPA|/ˈpɛska/}} {{Audio|It-pesca_(frutto).ogg}}""",
+        )
+        self.assertEqual(
+            data[0]["sounds"][0]["sense"], "il frutto e significati correlati"
+        )
+        self.assertEqual(data[0]["sounds"][0]["ipa"], "/ˈpɛska/")
+        self.assertEqual(data[0]["sounds"][0]["audio"], "It-pesca_(frutto).ogg")

From f84cb54f57f5deb8233194364029db34485a00a5 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 15:38:21 +0800
Subject: [PATCH 5/6] [it] extract "Glossa" tag template in sound lists

---
 src/wiktextract/extractor/it/pos.py   | 12 +++++-------
 src/wiktextract/extractor/it/sound.py | 13 +++++++++----
 tests/test_it_sound.py                | 16 ++++++++++++++++
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/wiktextract/extractor/it/pos.py b/src/wiktextract/extractor/it/pos.py
index f50a3605..f863ff48 100644
--- a/src/wiktextract/extractor/it/pos.py
+++ b/src/wiktextract/extractor/it/pos.py
@@ -50,13 +50,11 @@ def extract_gloss_list_item(
     sense = Sense()
     for node in list_item.children:
         if isinstance(node, TemplateNode):
-            match node.template_name:
-                case "Term":
-                    raw_tag = clean_node(wxr, sense, node).strip("() \n")
-                    if raw_tag != "":
-                        sense.raw_tags.append(raw_tag)
-                case _:
-                    gloss_nodes.append(node)
+            t_str = clean_node(wxr, sense, node)
+            if t_str.startswith("(") and t_str.endswith(")"):
+                sense.raw_tags.append(t_str.strip("()"))
+            else:
+                gloss_nodes.append(t_str)
         elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
             if node.sarg.endswith("*"):
                 for example_list_item in node.find_child(NodeKind.LIST_ITEM):
diff --git a/src/wiktextract/extractor/it/sound.py b/src/wiktextract/extractor/it/sound.py
index daed473e..a507ad01 100644
--- a/src/wiktextract/extractor/it/sound.py
+++ b/src/wiktextract/extractor/it/sound.py
@@ -57,7 +57,7 @@ def extract_pronunciation_section(
 
     # no list
     for t_node in level_node.find_child(NodeKind.TEMPLATE):
-        extract_sound_template(wxr, t_node, sounds, "")
+        extract_sound_template(wxr, t_node, sounds, "", [])
 
     for data in page_data:
         if data.lang_code == page_data[-1].lang_code:
@@ -68,12 +68,16 @@ def extract_sound_list_item(
     wxr: WiktextractContext, list_item: WikiNode, sounds: list[Sound]
 ) -> None:
     sense = ""
+    raw_tags = []
     for node in list_item.find_child(NodeKind.ITALIC | NodeKind.TEMPLATE):
         match node.kind:
             case NodeKind.ITALIC:
                 sense = clean_node(wxr, None, node).strip("()")
             case NodeKind.TEMPLATE:
-                extract_sound_template(wxr, node, sounds, sense)
+                if node.template_name.lower() == "glossa":
+                    raw_tags.append(clean_node(wxr, None, node).strip("()"))
+                else:
+                    extract_sound_template(wxr, node, sounds, sense, raw_tags)
 
 
 def extract_sound_template(
@@ -81,6 +85,7 @@ def extract_sound_template(
     t_node: TemplateNode,
     sounds: list[Sound],
     sense: str,
+    raw_tags: list[str],
 ) -> None:
     match t_node.template_name:
         case "IPA" | "SAMPA":
@@ -93,7 +98,7 @@ def extract_sound_template(
                     wxr, None, t_node.template_parameters.get(arg_name, "")
                 )
                 if ipa != "":
-                    sound = Sound(ipa=ipa, sense=sense)
+                    sound = Sound(ipa=ipa, sense=sense, raw_tags=raw_tags)
                     if t_node.template_name.lower() == "sampa":
                         sound.tags.append("SAMPA")
                     sounds.append(sound)
@@ -111,7 +116,7 @@ def extract_sound_template(
                     if raw_tag != "":
                         sounds[-1].raw_tags.append(raw_tag)
                 else:
-                    sound = Sound(sense=sense)
+                    sound = Sound(sense=sense, raw_tags=raw_tags)
                     set_sound_file_url_fields(wxr, sound_file, sound)
                     if raw_tag != "":
                         sound.raw_tags.append(raw_tag)
diff --git a/tests/test_it_sound.py b/tests/test_it_sound.py
index 72ca53bc..30ba4a95 100644
--- a/tests/test_it_sound.py
+++ b/tests/test_it_sound.py
@@ -128,3 +128,19 @@ def test_sound_list(self):
         )
         self.assertEqual(data[0]["sounds"][0]["ipa"], "/ˈpɛska/")
         self.assertEqual(data[0]["sounds"][0]["audio"], "It-pesca_(frutto).ogg")
+
+    def test_glossa_tag(self):
+        self.wxr.wtp.add_page("Template:-en-", 10, "Inglese")
+        self.wxr.wtp.add_page("Template:glossa", 10, "({{{1}}})")
+        data = parse_page(
+            self.wxr,
+            "large",
+            """== {{-en-}} ==
+===Aggettivo===
+# [[largo]]
+===Pronuncia===
+*{{glossa|UK}} {{IPA|/lɑːd͡ʒ/}}""",
+        )
+        self.assertEqual(
+            data[0]["sounds"], [{"raw_tags": ["UK"], "ipa": "/lɑːd͡ʒ/"}]
+        )

From b92d96e5a9575f3d4cb43e0023c05da0767d02b6 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 16 Dec 2024 17:06:12 +0800
Subject: [PATCH 6/6] [it] extract linkage sections

---
 src/wiktextract/extractor/it/etymology.py     |  1 +
 src/wiktextract/extractor/it/linkage.py       | 50 +++++++++++++++++++
 src/wiktextract/extractor/it/models.py        | 13 +++++
 src/wiktextract/extractor/it/page.py          |  7 ++-
 .../extractor/it/section_titles.py            | 15 ++++++
 tests/test_it_linkage.py                      | 44 ++++++++++++++++
 6 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 src/wiktextract/extractor/it/linkage.py
 create mode 100644 tests/test_it_linkage.py

diff --git a/src/wiktextract/extractor/it/etymology.py b/src/wiktextract/extractor/it/etymology.py
index 8092e7af..b108f35c 100644
--- a/src/wiktextract/extractor/it/etymology.py
+++ b/src/wiktextract/extractor/it/etymology.py
@@ -8,6 +8,7 @@
 def extract_etymology_section(
     wxr: WiktextractContext, page_data: list[WordEntry], level_node: LevelNode
 ) -> None:
+    # https://it.wiktionary.org/wiki/Aiuto:Etimologia
     etymology_texts = []
     for list_node in level_node.find_child(NodeKind.LIST):
         for list_item in list_node.find_child(NodeKind.LIST_ITEM):
diff --git a/src/wiktextract/extractor/it/linkage.py b/src/wiktextract/extractor/it/linkage.py
new file mode 100644
index 00000000..75abd5d5
--- /dev/null
+++ b/src/wiktextract/extractor/it/linkage.py
@@ -0,0 +1,50 @@
+from wikitextprocessor import LevelNode, NodeKind, WikiNode
+
+from ...page import clean_node
+from ...wxr_context import WiktextractContext
+from .models import Linkage, WordEntry
+
+
+def extract_linkage_section(
+    wxr: WiktextractContext,
+    page_data: list[WordEntry],
+    level_node: LevelNode,
+    linkage_type: str,
+) -> None:
+    linkages = []
+    for list_node in level_node.find_child(NodeKind.LIST):
+        for list_item in list_node.find_child(NodeKind.LIST_ITEM):
+            linkages.extend(extract_linkage_list_item(wxr, list_item))
+
+    for data in page_data:
+        if data.lang_code == page_data[-1].lang_code:
+            getattr(data, linkage_type).extend(linkages)
+
+
+def extract_linkage_list_item(
+    wxr: WiktextractContext, list_item: WikiNode
+) -> list[Linkage]:
+    raw_tags = []
+    linkages = []
+    for node in list_item.children:
+        if isinstance(node, WikiNode):
+            match node.kind:
+                case NodeKind.LINK:
+                    node_str = clean_node(wxr, None, node)
+                    if node_str != "":
+                        linkages.append(
+                            Linkage(word=node_str, raw_tags=raw_tags)
+                        )
+                        raw_tags.clear()
+                case NodeKind.TEMPLATE | NodeKind.ITALIC:
+                    node_str = clean_node(wxr, None, node)
+                    if node_str.startswith("(") and node_str.endswith(")"):
+                        raw_tags.append(node_str.strip("()"))
+        elif isinstance(node, str):
+            for word_str in node.split(","):
+                word_str = word_str.strip()
+                if word_str != "":
+                    linkages.append(Linkage(word=word_str, raw_tags=raw_tags))
+                    raw_tags.clear()
+
+    return linkages
diff --git a/src/wiktextract/extractor/it/models.py b/src/wiktextract/extractor/it/models.py
index 701f53b2..7ba272a7 100644
--- a/src/wiktextract/extractor/it/models.py
+++ b/src/wiktextract/extractor/it/models.py
@@ -68,6 +68,12 @@ class Hyphenation(ItalianBaseModel):
     sense: str = ""
 
 
+class Linkage(ItalianBaseModel):
+    word: str
+    tags: list[str] = []
+    raw_tags: list[str] = []
+
+
 class WordEntry(ItalianBaseModel):
     model_config = ConfigDict(title="Italian Wiktionary")
     word: str = Field(description="Word string", min_length=1)
@@ -85,3 +91,10 @@ class WordEntry(ItalianBaseModel):
     etymology_examples: list[Example] = []
     hyphenations: list[Hyphenation] = []
     sounds: list[Sound] = []
+    synonyms: list[Linkage] = []
+    antonyms: list[Linkage] = []
+    derived: list[Linkage] = []
+    related: list[Linkage] = []
+    hyponyms: list[Linkage] = []
+    hypernyms: list[Linkage] = []
+    proverbs: list[Linkage] = []
diff --git a/src/wiktextract/extractor/it/page.py b/src/wiktextract/extractor/it/page.py
index 7817a40b..68f51ce2 100644
--- a/src/wiktextract/extractor/it/page.py
+++ b/src/wiktextract/extractor/it/page.py
@@ -5,9 +5,10 @@
 from ...page import clean_node
 from ...wxr_context import WiktextractContext
 from .etymology import extract_citation_section, extract_etymology_section
+from .linkage import extract_linkage_section
 from .models import Sense, WordEntry
 from .pos import extract_pos_section
-from .section_titles import POS_DATA
+from .section_titles import LINKAGE_SECTIONS, POS_DATA
 from .sound import extract_hyphenation_section, extract_pronunciation_section
 from .translation import extract_translation_section
 
@@ -31,6 +32,10 @@ def parse_section(
         extract_hyphenation_section(wxr, page_data, level_node)
     elif title_text == "Pronuncia":
         extract_pronunciation_section(wxr, page_data, level_node)
+    elif title_text in LINKAGE_SECTIONS:
+        extract_linkage_section(
+            wxr, page_data, level_node, LINKAGE_SECTIONS[title_text]
+        )
 
     for next_level in level_node.find_child(LEVEL_KIND_FLAGS):
         parse_section(wxr, page_data, base_data, next_level)
diff --git a/src/wiktextract/extractor/it/section_titles.py b/src/wiktextract/extractor/it/section_titles.py
index 1fc81b45..b5360d75 100644
--- a/src/wiktextract/extractor/it/section_titles.py
+++ b/src/wiktextract/extractor/it/section_titles.py
@@ -62,3 +62,18 @@
     "Codice / Simbolo": {"pos": "symbol"},
     "Carattere hiragana": {"pos": "character", "tags": ["hiragana"]},
 }
+
+
+LINKAGE_SECTIONS = {
+    "Sinonimi": "synonyms",
+    "Contrari": "antonyms",
+    "Derivati": "derived",
+    "Termini correlati": "related",
+    "Varianti": "related",
+    "Alterati": "related",
+    "Iponimi": "hyponyms",
+    "Iperonimi": "hypernyms",
+    "Da non confondere con": "related",
+    "Proverbi e modi di dire": "proverbs",
+    "Parole derivate": "derived",
+}
diff --git a/tests/test_it_linkage.py b/tests/test_it_linkage.py
new file mode 100644
index 00000000..3aabea24
--- /dev/null
+++ b/tests/test_it_linkage.py
@@ -0,0 +1,44 @@
+from unittest import TestCase
+
+from wikitextprocessor import Wtp
+
+from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.it.page import parse_page
+from wiktextract.wxr_context import WiktextractContext
+
+
+class TestItLinkage(TestCase):
+    maxDiff = None
+
+    def setUp(self) -> None:
+        self.wxr = WiktextractContext(
+            Wtp(lang_code="it"),
+            WiktionaryConfig(
+                dump_file_lang_code="it", capture_language_codes=None
+            ),
+        )
+
+    def test_synonyms(self):
+        self.wxr.wtp.add_page("Template:-it-", 10, "Italiano")
+        self.wxr.wtp.add_page(
+            "Template:Fig", 10, "<small>(''senso figurato'')</small>"
+        )
+        data = parse_page(
+            self.wxr,
+            "cane",
+            """== {{-it-}} ==
+===Sostantivo===
+# [[animale]]
+===Sinonimi===
+* [[animale]], amico dell’uomo
+* {{Fig}} ''(di freddo)'' [[forte]], [[intenso]]""",
+        )
+        self.assertEqual(
+            data[0]["synonyms"],
+            [
+                {"word": "animale"},
+                {"word": "amico dell’uomo"},
+                {"word": "forte", "raw_tags": ["senso figurato", "di freddo"]},
+                {"word": "intenso"},
+            ],
+        )