25
25
from . import Extension
26
26
from ..blockprocessors import BlockProcessor
27
27
from ..inlinepatterns import InlineProcessor
28
- from ..util import AtomicString
28
+ from ..treeprocessors import Treeprocessor
29
+ from ..util import AtomicString , deprecated
29
30
import re
30
31
import xml .etree .ElementTree as etree
31
32
@@ -34,32 +35,79 @@ class AbbrExtension(Extension):
34
35
""" Abbreviation Extension for Python-Markdown. """
35
36
36
37
def extendMarkdown (self , md ):
37
- """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
38
- md .parser .blockprocessors .register (AbbrPreprocessor (md .parser ), 'abbr' , 16 )
39
-
40
-
41
- class AbbrPreprocessor (BlockProcessor ):
42
- """ Abbreviation Preprocessor - parse text for abbr references. """
38
+ """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
39
+ treeprocessor = AbbrTreeprocessor (md )
40
+ md .treeprocessors .register (treeprocessor , 'abbr' , 7 )
41
+ md .parser .blockprocessors .register (AbbrBlockprocessor (md .parser , treeprocessor .abbrs ), 'abbr' , 16 )
42
+
43
+
44
+ class AbbrTreeprocessor (Treeprocessor ):
45
+ """ Replace abbr text with `<abbr>` elements. """
46
+
47
+ def __init__ (self , md : Markdown | None = None ):
48
+ self .abbrs = {}
49
+ self .RE = None
50
+ super ().__init__ (md )
51
+
52
+ def iter_element (self , el , parent = None ):
53
+ ''' Resursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
54
+ for child in reversed (el ):
55
+ self .iter_element (child , el )
56
+ if text := el .text :
57
+ for m in reversed (list (self .RE .finditer (text ))):
58
+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
59
+ abbr .text = AtomicString (m .group (0 ))
60
+ abbr .tail = text [m .end ():]
61
+ el .insert (0 , abbr )
62
+ text = text [:m .start ()]
63
+ el .text = text
64
+ if parent and el .tail :
65
+ tail = el .tail
66
+ index = list (parent ).index (el ) + 1
67
+ for m in reversed (list (self .RE .finditer (tail ))):
68
+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
69
+ abbr .text = AtomicString (m .group (0 ))
70
+ abbr .tail = tail [m .end ():]
71
+ parent .insert (index , abbr )
72
+ tail = tail [:m .start ()]
73
+ el .tail = tail
74
+
75
+ def run (self , root : etree .Element ) -> etree .Element | None :
76
+ ''' Step through tree to find known abbreviations. '''
77
+ if not self .abbrs :
78
+ # No abbrs defined. Skip running processor.
79
+ return
80
+ # Build and compile regex
81
+ self .RE = re .compile (f"\\ b(?:{ '|' .join (re .escape (key ) for key in self .abbrs .keys ()) } )\\ b" )
82
+ # Step through tree and modify on matches
83
+ self .iter_element (root )
84
+ return
85
+
86
+
87
+ class AbbrBlockprocessor (BlockProcessor ):
88
+ """ Abbreviation Blockprocessor - parse text for abbr references. """
43
89
44
90
RE = re .compile (r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$' , re .MULTILINE )
45
91
92
+ def __init__ (self , parser , abbrs ):
93
+ self .abbrs = abbrs
94
+ super ().__init__ (parser )
95
+
46
96
def test (self , parent : etree .Element , block : str ) -> bool :
47
97
return True
48
98
49
99
def run (self , parent : etree .Element , blocks : list [str ]) -> bool :
50
100
"""
51
101
Find and remove all Abbreviation references from the text.
52
- Each reference is set as a new `AbbrPattern` in the markdown instance .
102
+ Each reference is added to the abbrs collection .
53
103
54
104
"""
55
105
block = blocks .pop (0 )
56
106
m = self .RE .search (block )
57
107
if m :
58
108
abbr = m .group ('abbr' ).strip ()
59
109
title = m .group ('title' ).strip ()
60
- self .parser .md .inlinePatterns .register (
61
- AbbrInlineProcessor (self ._generate_pattern (abbr ), title ), 'abbr-%s' % abbr , 2
62
- )
110
+ self .abbrs [abbr ] = title
63
111
if block [m .end ():].strip ():
64
112
# Add any content after match back to blocks as separate block
65
113
blocks .insert (0 , block [m .end ():].lstrip ('\n ' ))
@@ -71,11 +119,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
71
119
blocks .insert (0 , block )
72
120
return False
73
121
74
- def _generate_pattern (self , text : str ) -> str :
75
- """ Given a string, returns a regex pattern to match that string. """
76
- return f"(?P<abbr>\\ b{ re .escape (text ) } \\ b)"
77
-
78
122
123
+ @deprecated ("This class will be removed in the future; use `AbbrTreeprocessor` instead." )
79
124
class AbbrInlineProcessor (InlineProcessor ):
80
125
""" Abbreviation inline pattern. """
81
126
0 commit comments