Skip to content

Commit c03724b

Browse files
committed
Refactor abbr Extension
A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated `AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists, avoiding a conflict between the two extensions. Fixes #1460.
1 parent 993b57b commit c03724b

File tree

3 files changed

+124
-16
lines changed

3 files changed

+124
-16
lines changed

docs/changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010

1111
## [unreleased]
1212

13+
### Changed
14+
15+
#### Refactor `abbr` Extension
16+
17+
A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated
18+
`AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists,
19+
avoiding a conflict between the two extensions (#1460).
20+
1321
### Fixed
1422

1523
* Fixed links to source code on GitHub from the documentation (#1453).

markdown/extensions/abbr.py

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
from . import Extension
2626
from ..blockprocessors import BlockProcessor
2727
from ..inlinepatterns import InlineProcessor
28-
from ..util import AtomicString
28+
from ..treeprocessors import Treeprocessor
29+
from ..util import AtomicString, deprecated
2930
import re
3031
import xml.etree.ElementTree as etree
3132

@@ -34,32 +35,79 @@ class AbbrExtension(Extension):
3435
""" Abbreviation Extension for Python-Markdown. """
3536

3637
def extendMarkdown(self, md):
37-
""" Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
38-
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
39-
40-
41-
class AbbrPreprocessor(BlockProcessor):
42-
""" Abbreviation Preprocessor - parse text for abbr references. """
38+
""" Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
39+
treeprocessor = AbbrTreeprocessor(md)
40+
md.treeprocessors.register(treeprocessor, 'abbr', 7)
41+
md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, treeprocessor.abbrs), 'abbr', 16)
42+
43+
44+
class AbbrTreeprocessor(Treeprocessor):
45+
""" Replace abbr text with `<abbr>` elements. """
46+
47+
def __init__(self, md: Markdown | None=None):
48+
self.abbrs = {}
49+
self.RE = None
50+
super().__init__(md)
51+
52+
def iter_element(self, el, parent=None):
53+
''' Resursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
54+
for child in reversed(el):
55+
self.iter_element(child, el)
56+
if text := el.text:
57+
for m in reversed(list(self.RE.finditer(text))):
58+
abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
59+
abbr.text = AtomicString(m.group(0))
60+
abbr.tail = text[m.end():]
61+
el.insert(0, abbr)
62+
text = text[:m.start()]
63+
el.text = text
64+
if parent and el.tail:
65+
tail = el.tail
66+
index = list(parent).index(el) + 1
67+
for m in reversed(list(self.RE.finditer(tail))):
68+
abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
69+
abbr.text = AtomicString(m.group(0))
70+
abbr.tail = tail[m.end():]
71+
parent.insert(index, abbr)
72+
tail = tail[:m.start()]
73+
el.tail = tail
74+
75+
def run(self, root: etree.Element) -> etree.Element | None:
76+
''' Step through tree to find known abbreviations. '''
77+
if not self.abbrs:
78+
# No abbrs defined. Skip running processor.
79+
return
80+
# Build and compile regex
81+
self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs.keys()) })\\b")
82+
# Step through tree and modify on matches
83+
self.iter_element(root)
84+
return
85+
86+
87+
class AbbrBlockprocessor(BlockProcessor):
88+
""" Abbreviation Blockprocessor - parse text for abbr references. """
4389

4490
RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
4591

92+
def __init__(self, parser, abbrs):
93+
self.abbrs = abbrs
94+
super().__init__(parser)
95+
4696
def test(self, parent: etree.Element, block: str) -> bool:
4797
return True
4898

4999
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
50100
"""
51101
Find and remove all Abbreviation references from the text.
52-
Each reference is set as a new `AbbrPattern` in the markdown instance.
102+
Each reference is added to the abbrs collection.
53103
54104
"""
55105
block = blocks.pop(0)
56106
m = self.RE.search(block)
57107
if m:
58108
abbr = m.group('abbr').strip()
59109
title = m.group('title').strip()
60-
self.parser.md.inlinePatterns.register(
61-
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
62-
)
110+
self.abbrs[abbr] = title
63111
if block[m.end():].strip():
64112
# Add any content after match back to blocks as separate block
65113
blocks.insert(0, block[m.end():].lstrip('\n'))
@@ -71,11 +119,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
71119
blocks.insert(0, block)
72120
return False
73121

74-
def _generate_pattern(self, text: str) -> str:
75-
""" Given a string, returns a regex pattern to match that string. """
76-
return f"(?P<abbr>\\b{ re.escape(text) }\\b)"
77-
78122

123+
@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
79124
class AbbrInlineProcessor(InlineProcessor):
80125
""" Abbreviation inline pattern. """
81126

tests/test_syntax/extensions/test_abbr.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_abbr_lower(self):
6060
)
6161
)
6262

63-
def test_abbr_multiple(self):
63+
def test_abbr_multiple_in_text(self):
6464
self.assertMarkdownRenders(
6565
self.dedent(
6666
"""
@@ -79,6 +79,44 @@ def test_abbr_multiple(self):
7979
)
8080
)
8181

82+
def test_abbr_multiple_in_tail(self):
83+
self.assertMarkdownRenders(
84+
self.dedent(
85+
"""
86+
*The* HTML specification
87+
is maintained by the W3C.
88+
89+
*[HTML]: Hyper Text Markup Language
90+
*[W3C]: World Wide Web Consortium
91+
"""
92+
),
93+
self.dedent(
94+
"""
95+
<p><em>The</em> <abbr title="Hyper Text Markup Language">HTML</abbr> specification
96+
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
97+
"""
98+
)
99+
)
100+
101+
def test_abbr_multiple_nested(self):
102+
self.assertMarkdownRenders(
103+
self.dedent(
104+
"""
105+
The *HTML* specification
106+
is maintained by the *W3C*.
107+
108+
*[HTML]: Hyper Text Markup Language
109+
*[W3C]: World Wide Web Consortium
110+
"""
111+
),
112+
self.dedent(
113+
"""
114+
<p>The <em><abbr title="Hyper Text Markup Language">HTML</abbr></em> specification
115+
is maintained by the <em><abbr title="World Wide Web Consortium">W3C</abbr></em>.</p>
116+
"""
117+
)
118+
)
119+
82120
def test_abbr_override(self):
83121
self.assertMarkdownRenders(
84122
self.dedent(
@@ -325,3 +363,20 @@ def test_abbr_bracket(self):
325363
"""
326364
)
327365
)
366+
367+
def test_abbr_with_attr_list(self):
368+
self.assertMarkdownRenders(
369+
self.dedent(
370+
"""
371+
*[abbr]: Abbreviation Definition
372+
373+
![Image with abbr in title](abbr.png){title="Image with abbr in title"}
374+
"""
375+
),
376+
self.dedent(
377+
"""
378+
<p><img alt="Image with abbr in title" src="abbr.png" title="Image with abbr in title" /></p>
379+
"""
380+
),
381+
extensions = ['abbr', 'attr_list']
382+
)

0 commit comments

Comments
 (0)