Changed Abbreviation Extension's glossary option

to accept a dictionary instead of a file. Changed `load_glossary` to merge in a dictionary, overwriting any existing records. Added a `reset_glossary` method to clear the glossary. Removed the `use_last_abbr` option, since `glossary` can be used to resolve the use case `use_last_abbr` was written for.
Python-Markdown · Jun 5, 2024 · 831449b · 831449b
1 parent 2009c02
commit 831449b
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 74 deletions.
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
@@ -22,7 +22,6 @@
 
 from __future__ import annotations
 
-import codecs
 from . import Extension
 from ..util import parseBoolValue
 from ..blockprocessors import BlockProcessor
@@ -44,15 +43,10 @@ class AbbrExtension(Extension):
     def __init__(self, **kwargs):
         """ Initiate Extension and set up configs. """
         self.config = {
-            'use_last_abbr': [
-                True,
-                'True to use the last instance of an abbreviation, rather than the first instance.'
-                'Default: `True`.'
-            ],
             'glossary': [
-                '',
-                'Path to the Markdown file containing abbreviations to be applied to every page.'
-                "Default: `''`"
+                {},
+                'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
+                "Default: `{}`"
             ],
         }
         """ Default configuration options. """
@@ -66,28 +60,19 @@ def reset(self):
         if (self.glossary):
             self.abbrs.update(self.glossary)
 
-    def load_glossary(self, md: Markdown, filename: str):
-        if filename and isinstance(filename, str):
-            input_file = codecs.open(filename, mode="r", encoding='utf-8')
-            text = input_file.read()
-            input_file.close()
-            text = text.lstrip('\ufeff')  # remove the byte-order mark
-            try:
-                text = str(text)
-            except UnicodeDecodeError as e:  # pragma: no cover
-                # Customize error message while maintaining original traceback
-                e.reason += '. -- Note: Markdown only accepts Unicode input!'
-                raise
-            lines = text.split("\n")
-
-            bp = AbbrBlockprocessor(md.parser, self.glossary, self.getConfigs())
-            for line in lines:
-                    bp.run(None, [line])
+    def reset_glossary(self):
+        """ Clear all abbreviations from the glossary. """
+        self.glossary.clear()
+
+    def load_glossary(self, dictionary : dict[str, str]):
+        """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
+        if dictionary:
+            self.glossary = {**dictionary, **self.glossary}
 
     def extendMarkdown(self, md):
         """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
         if (self.config['glossary'][0]):
-            self.load_glossary(md, self.config['glossary'][0])
+            self.load_glossary(self.config['glossary'][0])
         self.abbrs.update(self.glossary)
         md.registerExtension(self)
         md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
@@ -144,9 +129,8 @@ class AbbrBlockprocessor(BlockProcessor):
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
-    def __init__(self, parser: BlockParser, abbrs: dict, config: dict[str, Any]):
+    def __init__(self, parser: BlockParser, abbrs: dict, config: dict):
         self.abbrs: dict = abbrs
-        self.use_last_abbr: bool = parseBoolValue(config["use_last_abbr"])
         super().__init__(parser)
 
     def test(self, parent: etree.Element, block: str) -> bool:
@@ -163,8 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         if m:
             abbr = m.group('abbr').strip()
             title = m.group('title').strip()
-            if self.use_last_abbr or abbr not in self.abbrs:
-                self.abbrs[abbr] = title
+            self.abbrs[abbr] = title
             if block[m.end():].strip():
                 # Add any content after match back to blocks as separate block
                 blocks.insert(0, block[m.end():].lstrip('\n'))

diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
@@ -21,12 +21,11 @@
 """
 
 import os
-from tempfile import mkstemp
-import atexit
 from markdown.test_tools import TestCase
 from markdown import Markdown
 from markdown.extensions.abbr import AbbrExtension
 
+
 class TestAbbr(TestCase):
     maxDiff = None
 
@@ -121,55 +120,51 @@ def test_abbr_multiple_nested(self):
             )
         )
 
-    def test_abbr_override(self):
-        self.assertMarkdownRenders(
-            self.dedent(
-                """
-                ABBR
+    def test_abbr_glossary(self):
 
-                *[ABBR]: Ignored
-                *[ABBR]: The override
-                """
-            ),
-            self.dedent(
-                """
-                <p><abbr title="The override">ABBR</abbr></p>
-                """
-            ),
-            extensions=[AbbrExtension(use_last_abbr=True)]
-        )
+        glossary = {
+            "ABBR" : "Abbreviation",
+            "abbr" : "Abbreviation",
+            "HTML" : "Hyper Text Markup Language",
+            "W3C" : "World Wide Web Consortium"
+        }
 
-    def test_abbr_override_Ignored(self):
         self.assertMarkdownRenders(
             self.dedent(
                 """
                 ABBR
+                abbr
 
-                *[ABBR]: Abbreviation
-                *[ABBR]: Override Ignored
+                HTML
+                W3C
                 """
             ),
             self.dedent(
                 """
-                <p><abbr title="Abbreviation">ABBR</abbr></p>
+                <p><abbr title="Abbreviation">ABBR</abbr>
+                <abbr title="Abbreviation">abbr</abbr></p>
+                <p><abbr title="Hyper Text Markup Language">HTML</abbr>
+                <abbr title="World Wide Web Consortium">W3C</abbr></p>
                 """
             ),
-            extensions=[AbbrExtension(use_last_abbr=False)]
+            extensions=[AbbrExtension(glossary=glossary)]
         )
 
-    def test_abbr_glossary(self):
-        # Create temporary glossary file and set a trigger to guarantee it is deleted even if this test fails
-        temp_file, glossary_file = mkstemp(suffix='.md')
-        os.close(temp_file)
-        cleanup_trigger = atexit.register(os.remove, glossary_file)
-
-        with open(glossary_file, 'w', encoding='utf-8') as temp_file:
-            temp_file.writelines([
-                "*[ABBR]: Abbreviation\n",
-                "*[abbr]: Abbreviation\n",
-                "*[HTML]: Hyper Text Markup Language\n",
-                "*[W3C]:  World Wide Web Consortium\n"
-            ])
+    def test_abbr_glossary_2(self):
+
+        glossary = {
+            "ABBR" : "Abbreviation",
+            "abbr" : "Abbreviation",
+            "HTML" : "Hyper Text Markup Language",
+            "W3C" : "World Wide Web Consortium"
+        }
+
+        glossary_2 = {
+            "ABBR" : "New Abbreviation"
+        }
+
+        abbr_ext = AbbrExtension(glossary=glossary)
+        abbr_ext.load_glossary(glossary_2)
 
         self.assertMarkdownRenders(
             self.dedent(
@@ -181,15 +176,12 @@ def test_abbr_glossary(self):
             ),
             self.dedent(
                 """
-                <p><abbr title="Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
+                <p><abbr title="New Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
                 <p><abbr title="Hyper Text Markup Language">HTML</abbr> <abbr title="World Wide Web Consortium">W3C</abbr></p>
                 """
             ),
-            extensions=[AbbrExtension(glossary=glossary_file)]
+            extensions=[abbr_ext]
         )
-        # cleanup
-        os.remove(glossary_file)
-        atexit.unregister(cleanup_trigger)
 
     def test_abbr_nested(self):
         self.assertMarkdownRenders(
@@ -443,15 +435,18 @@ def test_abbr_superset_vs_subset(self):
             self.dedent(
                 """
                 abbr, SS, and abbr-SS should have different definitions.
-                
+
                 *[abbr]: Abbreviation Definition
                 *[abbr-SS]: Abbreviation Superset Definition
                 *[SS]: Superset Definition
                 """
             ),
             self.dedent(
                 """
-                <p><abbr title="Abbreviation Definition">abbr</abbr>, <abbr title="Superset Definition">SS</abbr>, and <abbr title="Abbreviation Superset Definition">abbr-SS</abbr> should have different definitions.</p>
+                <p><abbr title="Abbreviation Definition">abbr</abbr>, """
+                + """<abbr title="Superset Definition">SS</abbr>, """
+                + """and <abbr title="Abbreviation Superset Definition">abbr-SS</abbr> """
+                + """should have different definitions.</p>
                 """
             )
         )
@@ -487,3 +482,7 @@ def test_abbr_reset(self):
         self.assertEqual(ext.abbrs, {})
         md.convert('*[foo]: Foo Definition')
         self.assertEqual(ext.abbrs, {'foo': 'Foo Definition'})
+
+import unittest
+if __name__ == '__main__':
+    unittest.main()