diff --git a/docs/changelog.md b/docs/changelog.md index 4c308899..082ca6d5 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -23,6 +23,21 @@ better reflects what it is. `AbbrPreprocessor` has been deprecated. A call to `Markdown.reset()` now clears all previously defined abbreviations. +Abbreviations are now sorted by length before executing `AbbrTreeprocessor` +to ensure that multi-word abbreviations are implemented even if an abbreviation +exists for one of those component words. (#1465) + +Abbreviations without a definition are now ignored. This avoids applying +abbr tags to text without a title value. + +Added an optional `glossary` configuration option to the abbreviations extension. +This provides a simple and efficient way to apply a dictionary of abbreviations +to every page. + +Abbreviations can now be disabled by setting their definition to `""` or `''`. +This can be useful when using the `glossary` option. + + ### Fixed * Fixed links to source code on GitHub from the documentation (#1453). diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md index 8a35e526..2e8fd74a 100644 --- a/docs/extensions/abbreviations.md +++ b/docs/extensions/abbreviations.md @@ -46,10 +46,25 @@ Usage See [Extensions](index.md) for general extension usage. Use `abbr` as the name of the extension. -This extension does not accept any special configuration options. +The following options are provided to configure the output: + +* **`glossary`**: + A dictionary where the `key` is the abbreviation and the `value` is the definition. A trivial example: ```python markdown.markdown(some_text, extensions=['abbr']) ``` + +Disabling Abbreviations +----------------------- + +When using the `glossary` option, there may be times when you need to turn off +a specific abbreviation. To do this, set the abbreviation to `''` or `""`. + +```md +The HTML abbreviation is disabled on this page. + +*[HTML]: '' +``` \ No newline at end of file diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 1f81cab3..693c3bba 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -41,15 +41,38 @@ class AbbrExtension(Extension): def __init__(self, **kwargs): """ Initiate Extension and set up configs. """ + self.config = { + 'glossary': [ + {}, + 'A dictionary where the `key` is the abbreviation and the `value` is the definition.' + "Default: `{}`" + ], + } + """ Default configuration options. """ super().__init__(**kwargs) self.abbrs = {} + self.glossary = {} def reset(self): """ Clear all previously defined abbreviations. """ self.abbrs.clear() + if (self.glossary): + self.abbrs.update(self.glossary) + + def reset_glossary(self): + """ Clear all abbreviations from the glossary. """ + self.glossary.clear() + + def load_glossary(self, dictionary: dict[str, str]): + """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten.""" + if dictionary: + self.glossary = {**dictionary, **self.glossary} def extendMarkdown(self, md): """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """ + if (self.config['glossary'][0]): + self.load_glossary(self.config['glossary'][0]) + self.abbrs.update(self.glossary) md.registerExtension(self) md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7) md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16) @@ -69,13 +92,14 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) - self.iter_element(child, el) if text := el.text: for m in reversed(list(self.RE.finditer(text))): - abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]}) - abbr.text = AtomicString(m.group(0)) - abbr.tail = text[m.end():] - el.insert(0, abbr) - text = text[:m.start()] + if self.abbrs[m.group(0)]: + abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]}) + abbr.text = AtomicString(m.group(0)) + abbr.tail = text[m.end():] + el.insert(0, abbr) + text = text[:m.start()] el.text = text - if parent and el.tail: + if parent is not None and el.tail: tail = el.tail index = list(parent).index(el) + 1 for m in reversed(list(self.RE.finditer(tail))): @@ -92,7 +116,9 @@ def run(self, root: etree.Element) -> etree.Element | None: # No abbreviations defined. Skip running processor. return # Build and compile regex - self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs) })\\b") + abbr_list = list(self.abbrs.keys()) + abbr_list.sort(key=len, reverse=True) + self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b") # Step through tree and modify on matches self.iter_element(root) @@ -120,14 +146,18 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool: if m: abbr = m.group('abbr').strip() title = m.group('title').strip() - self.abbrs[abbr] = title - if block[m.end():].strip(): - # Add any content after match back to blocks as separate block - blocks.insert(0, block[m.end():].lstrip('\n')) - if block[:m.start()].strip(): - # Add any content before match back to blocks as separate block - blocks.insert(0, block[:m.start()].rstrip('\n')) - return True + if title and abbr: + if title == "''" or title == '""': + self.abbrs.pop(abbr) + else: + self.abbrs[abbr] = title + if block[m.end():].strip(): + # Add any content after match back to blocks as separate block + blocks.insert(0, block[m.end():].lstrip('\n')) + if block[:m.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m.start()].rstrip('\n')) + return True # No match. Restore block. blocks.insert(0, block) return False diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py index 012e5718..9d3ebb27 100644 --- a/tests/test_syntax/extensions/test_abbr.py +++ b/tests/test_syntax/extensions/test_abbr.py @@ -136,6 +136,69 @@ def test_abbr_override(self): ) ) + def test_abbr_glossary(self): + + glossary = { + "ABBR": "Abbreviation", + "abbr": "Abbreviation", + "HTML": "Hyper Text Markup Language", + "W3C": "World Wide Web Consortium" + } + + self.assertMarkdownRenders( + self.dedent( + """ + ABBR + abbr + + HTML + W3C + """ + ), + self.dedent( + """ +
ABBR + abbr
+HTML + W3C
+ """ + ), + extensions=[AbbrExtension(glossary=glossary)] + ) + + def test_abbr_glossary_2(self): + + glossary = { + "ABBR": "Abbreviation", + "abbr": "Abbreviation", + "HTML": "Hyper Text Markup Language", + "W3C": "World Wide Web Consortium" + } + + glossary_2 = { + "ABBR": "New Abbreviation" + } + + abbr_ext = AbbrExtension(glossary=glossary) + abbr_ext.load_glossary(glossary_2) + + self.assertMarkdownRenders( + self.dedent( + """ + ABBR abbr HTML W3C + """ + ), + self.dedent( + """ +ABBR """ + + """abbr """ + + """HTML """ + + """W3C
+ """ + ), + extensions=[abbr_ext] + ) + def test_abbr_nested(self): self.assertMarkdownRenders( self.dedent( @@ -383,6 +446,79 @@ def test_abbr_with_attr_list(self): extensions=['abbr', 'attr_list'] ) + def test_abbr_superset_vs_subset(self): + self.assertMarkdownRenders( + self.dedent( + """ + abbr, SS, and abbr-SS should have different definitions. + + *[abbr]: Abbreviation Definition + *[abbr-SS]: Abbreviation Superset Definition + *[SS]: Superset Definition + """ + ), + self.dedent( + """ +abbr, """ + + """SS, """ + + """and abbr-SS """ + + """should have different definitions.
+ """ + ) + ) + + def test_abbr_empty(self): + self.assertMarkdownRenders( + self.dedent( + """ + *[abbr]: + Abbreviation Definition + + abbr + + *[]: Empty + + *[ ]: Empty + + *[abbr]: + + *[ABBR]: + + Testing document text. + """ + ), + self.dedent( + """ +abbr
\n""" + + """*[]: Empty
\n""" + + """*[ ]: Empty
\n""" + + """*[abbr]:
\n""" + + """*[ABBR]:
\n""" + + """Testing document text.
+ """ + ) + ) + + def test_abbr_clear(self): + self.assertMarkdownRenders( + self.dedent( + """ + *[abbr]: Abbreviation Definition + *[ABBR]: Abbreviation Definition + + abbr ABBR + + *[abbr]: "" + *[ABBR]: '' + """ + ), + self.dedent( + """ +abbr ABBR
+ """ + ) + ) + def test_abbr_reset(self): ext = AbbrExtension() md = Markdown(extensions=[ext])