@@ -163,6 +163,36 @@ def handle_endtag(self, tag):
163
163
# If we only have one newline before block element, add another
164
164
if not item .endswith ('\n \n ' ) and item .endswith ('\n ' ):
165
165
self .cleandoc .append ('\n ' )
166
+
167
+ # Flatten the HTML structure of "markdown" blocks such that when they
168
+ # get parsed, content will be parsed similar inside the blocks as it
169
+ # does outside the block. Having real HTML elements in the tree before
170
+ # the content adjacent content is processed can cause unpredictable
171
+ # issues for extensions.
172
+ current = element
173
+ last = []
174
+ while current is not None :
175
+ for child in list (current ):
176
+ current .remove (child )
177
+ text = current .text if current .text is not None else ''
178
+ tail = child .tail if child .tail is not None else ''
179
+ child .tail = None
180
+ state = child .attrib .get ('markdown' , 'off' )
181
+
182
+ # If the tail is just a new line, omit it.
183
+ if tail == '\n ' :
184
+ tail = ''
185
+
186
+ # Process the block nested under the spac appropriately
187
+ if state in ('span' , 'block' ):
188
+ current .text = text + '\n ' + self .md .htmlStash .store (child ) + '\n ' + tail
189
+ last .append (child )
190
+ else :
191
+ child .attrib .pop ('markdown' )
192
+ [c .attrib .pop ('markdown' , None ) for c in child .iter ()]
193
+ current .text = text + '\n ' + self .md .htmlStash .store (child ) + '\n ' + tail
194
+ current = last .pop (0 ) if last else None
195
+
166
196
self .cleandoc .append (self .md .htmlStash .store (element ))
167
197
self .cleandoc .append ('\n \n ' )
168
198
self .state = []
@@ -270,53 +300,53 @@ def parse_element_content(self, element: etree.Element) -> None:
270
300
md_attr = element .attrib .pop ('markdown' , 'off' )
271
301
272
302
if md_attr == 'block' :
273
- # Parse content as block level
274
- # The order in which the different parts are parsed (text, children, tails) is important here as the
275
- # order of elements needs to be preserved. We can't be inserting items at a later point in the current
276
- # iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
277
- # example). Therefore, the order of operations is children, tails, text.
278
-
279
- # Recursively parse existing children from raw HTML
280
- for child in list (element ):
281
- self .parse_element_content (child )
282
-
283
- # Parse Markdown text in tail of children. Do this separate to avoid raw HTML parsing.
284
- # Save the position of each item to be inserted later in reverse.
285
- tails = []
286
- for pos , child in enumerate (element ):
287
- if child .tail :
288
- block = child .tail .rstrip ('\n ' )
289
- child .tail = ''
290
- # Use a dummy placeholder element.
291
- dummy = etree .Element ('div' )
292
- self .parser .parseBlocks (dummy , block .split ('\n \n ' ))
293
- children = list (dummy )
294
- children .reverse ()
295
- tails .append ((pos + 1 , children ))
296
-
297
- # Insert the elements created from the tails in reverse.
298
- tails .reverse ()
299
- for pos , tail in tails :
300
- for item in tail :
301
- element .insert (pos , item )
302
-
303
- # Parse Markdown text content. Do this last to avoid raw HTML parsing.
303
+ # Parse the block elements content as Markdown
304
304
if element .text :
305
305
block = element .text .rstrip ('\n ' )
306
306
element .text = ''
307
- # Use a dummy placeholder element as the content needs to get inserted before existing children.
308
- dummy = etree .Element ('div' )
309
- self .parser .parseBlocks (dummy , block .split ('\n \n ' ))
310
- children = list (dummy )
311
- children .reverse ()
312
- for child in children :
313
- element .insert (0 , child )
307
+ self .parser .parseBlocks (element , block .split ('\n \n ' ))
314
308
315
309
elif md_attr == 'span' :
316
- # Span level parsing will be handled by inline processors.
317
- # Walk children here to remove any `markdown` attributes.
318
- for child in list (element ):
319
- self .parse_element_content (child )
310
+ # Span elements need to be recursively processed for block elements and raw HTML
311
+ # as their content is not normally accessed by block processors, so expand stashed
312
+ # HTML under the span. Span content itself will not be parsed here, but will await
313
+ # the inline parser.
314
+ block = element .text
315
+ element .text = ''
316
+ child = None
317
+ start = 0
318
+
319
+ # Search the content for HTML placeholders and process the elements
320
+ for m in util .HTML_PLACEHOLDER_RE .finditer (block ):
321
+ index = int (m .group (1 ))
322
+ el = self .parser .md .htmlStash .rawHtmlBlocks [index ]
323
+ end = m .start ()
324
+
325
+ # Cut out the placeholder and and insert the processed element back in.
326
+ if isinstance (el , etree .Element ):
327
+ if child is None :
328
+ element .text = block [start :end ]
329
+ else :
330
+ child .tail = (child .tail if child .tail is not None else '' ) + block [start :end ]
331
+ element .append (el )
332
+ self .parse_element_content (el )
333
+ child = el
334
+ self .parser .md .htmlStash .rawHtmlBlocks .pop (index )
335
+ self .parser .md .htmlStash .rawHtmlBlocks .insert (index , '' )
336
+
337
+ else :
338
+ # Not an element object, so insert content back into the element
339
+ if child is None :
340
+ element .text = block [start :end ]
341
+ else :
342
+ child .tail = (child .tail if child .tail is not None else '' )+ block [start :end ]
343
+ start = end
344
+
345
+ # Insert anything left after last element
346
+ if child is None :
347
+ element .text = block [start :]
348
+ else :
349
+ child .tail = (child .tail if child .tail is not None else '' ) + block [start :]
320
350
321
351
else :
322
352
# Disable inline parsing for everything else
@@ -336,8 +366,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
336
366
if isinstance (element , etree .Element ):
337
367
# We have a matched element. Process it.
338
368
blocks .pop (0 )
339
- self .parse_element_content (element )
340
369
parent .append (element )
370
+ self .parse_element_content (element )
341
371
# Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
342
372
self .parser .md .htmlStash .rawHtmlBlocks .pop (index )
343
373
self .parser .md .htmlStash .rawHtmlBlocks .insert (index , '' )
0 commit comments