From 9fed7aa8cb81f3b2cab4494ab587c3a8c41c189d Mon Sep 17 00:00:00 2001 From: Banyanael Date: Wed, 22 May 2024 21:51:45 -0400 Subject: [PATCH] Added a configuration option to `abbr` and handling for empty abbreviations. - Configuration option `use_last_abbr` allows using the first instance of a term rather than the last. This is useful for sites that auto-append a glossary to pages. - If an abbreviation does not have a definition it will skip the term instead of writing abbr tags with an empty title. --- docs/changelog.md | 14 +++++++- docs/extensions/abbreviations.md | 5 +++ markdown/extensions/abbr.py | 28 +++++++++++----- tests/test_syntax/extensions/test_abbr.py | 41 ++++++++++++++++++++++- 4 files changed, 78 insertions(+), 10 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 4710fcbc..cc99e39a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -25,7 +25,19 @@ A call to `Markdown.reset()` now clears all previously defined abbreviations. Abbreviations are now sorted by length before executing `AbbrTreeprocessor` to ensure that multi-word abbreviations are implemented even if an abbreviation -exists for one of those component words. +exists for one of those component words. (#1465) + +Added an optional `use_last_abbr` configuration option to the abbreviations +extension. Default (`True`) maintains the existing behavior. `False` causes +the extension to only use the first instance of an abbreviation, rather than +the last. + +Empty abbreviations are now skipped by `AbbrTreeprocessor`. This avoids applying +abbr tags to text without a title value. This also allows disabling an +abbreviation, which may be useful for documents that uses two terms with +identical abbreviations. + + ### Fixed diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md index 8a35e526..b067a108 100644 --- a/docs/extensions/abbreviations.md +++ b/docs/extensions/abbreviations.md @@ -1,5 +1,10 @@ title: Abbreviations Extension +ABBR + +*[ABBR]: Abbreviation +*[ABBR]: Override Ignored + Abbreviations ============= diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index cd7719ff..5c6f3595 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -23,6 +23,7 @@ from __future__ import annotations from . import Extension +from ..util import parseBoolValue from ..blockprocessors import BlockProcessor from ..inlinepatterns import InlineProcessor from ..treeprocessors import Treeprocessor @@ -41,6 +42,14 @@ class AbbrExtension(Extension): def __init__(self, **kwargs): """ Initiate Extension and set up configs. """ + self.config = { + 'use_last_abbr': [ + True, + 'True to use the last instance of an abbreviation, rather than the first instance.' + 'Default: `True`.' + ], + } + """ Default configuration options. """ super().__init__(**kwargs) self.abbrs = {} @@ -52,7 +61,7 @@ def extendMarkdown(self, md): """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """ md.registerExtension(self) md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7) - md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16) + md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs, self.getConfigs()), 'abbr', 16) class AbbrTreeprocessor(Treeprocessor): @@ -69,11 +78,12 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) - self.iter_element(child, el) if text := el.text: for m in reversed(list(self.RE.finditer(text))): - abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]}) - abbr.text = AtomicString(m.group(0)) - abbr.tail = text[m.end():] - el.insert(0, abbr) - text = text[:m.start()] + if self.abbrs[m.group(0)]: + abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]}) + abbr.text = AtomicString(m.group(0)) + abbr.tail = text[m.end():] + el.insert(0, abbr) + text = text[:m.start()] el.text = text if parent and el.tail: tail = el.tail @@ -104,8 +114,9 @@ class AbbrBlockprocessor(BlockProcessor): RE = re.compile(r'^[*]\[(?P[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) - def __init__(self, parser: BlockParser, abbrs: dict): + def __init__(self, parser: BlockParser, abbrs: dict, config: dict[str, Any]): self.abbrs: dict = abbrs + self.use_last_abbr: bool = parseBoolValue(config["use_last_abbr"]) super().__init__(parser) def test(self, parent: etree.Element, block: str) -> bool: @@ -122,7 +133,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool: if m: abbr = m.group('abbr').strip() title = m.group('title').strip() - self.abbrs[abbr] = title + if self.use_last_abbr or abbr not in self.abbrs: + self.abbrs[abbr] = title if block[m.end():].strip(): # Add any content after match back to blocks as separate block blocks.insert(0, block[m.end():].lstrip('\n')) diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py index b7c9fbbd..b50f427b 100644 --- a/tests/test_syntax/extensions/test_abbr.py +++ b/tests/test_syntax/extensions/test_abbr.py @@ -133,7 +133,26 @@ def test_abbr_override(self): """ <p><abbr title="The override">ABBR</abbr></p> """ - ) + ), + extensions=[AbbrExtension(use_last_abbr=True)] + ) + + def test_abbr_override_Ignored(self): + self.assertMarkdownRenders( + self.dedent( + """ + ABBR + + *[ABBR]: Abbreviation + *[ABBR]: Override Ignored + """ + ), + self.dedent( + """ + <p><abbr title="Abbreviation">ABBR</abbr></p> + """ + ), + extensions=[AbbrExtension(use_last_abbr=False)] ) def test_abbr_nested(self): @@ -401,6 +420,26 @@ def test_abbr_superset_vs_subset(self): ) ) + def test_abbr_empty(self): + self.assertMarkdownRenders( + self.dedent( + """ + *[abbr]: Abbreviation Definition + + abbr + + *[abbr]: + + Testing document text. + """ + ), + self.dedent( + """ + <p>abbr</p>\n<p>Testing document text.</p> + """ + ) + ) + def test_abbr_reset(self): ext = AbbrExtension() md = Markdown(extensions=[ext])