From 2c97732432014df61eb676cbd8fdd25424f99d6c Mon Sep 17 00:00:00 2001 From: Tab Atkins-Bittner Date: Wed, 15 Nov 2023 12:39:22 -0800 Subject: [PATCH] Allow for nodes to *decrement* the line count as well, if they print to more lines than they occupied in the source. --- bikeshed/InputSource.py | 20 ++++++++++++++------ bikeshed/constants.py | 1 + bikeshed/h/parser.py | 12 ++++++++---- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/bikeshed/InputSource.py b/bikeshed/InputSource.py index 29b67941aa..346a7fe99c 100644 --- a/bikeshed/InputSource.py +++ b/bikeshed/InputSource.py @@ -28,19 +28,27 @@ def lines(self) -> list[line.Line]: offset = 0 for i, text in enumerate(self.rawLines, 1): lineNo = i + offset - # The early HTML parser runs before Markdown, - # and in some cases removes linebreaks that were present - # in the source. When properly invoked, it inserts - # a special PUA char for each of these omitted linebreaks, - # so I can remove them here and properly increment the - # line number. + # The early HTML parser can change how nodes print, + # so they occupy a different number of lines than they + # had in the source. Markdown parser needs to know + # the correct source lines, tho, so when this happens, + # the nodes will insert special PUA chars to indicate that. + # I can remove them here and properly adjust the line number. # Current known causes of this: # * line-ending -- turned into em dashes # * multi-line start tags + # * multi-line markdown code spans; + # - the text loses its newlines + # - the original text goes into an attribute on the start + # tag now ilcc = constants.incrementLineCountChar + dlcc = constants.decrementLineCountChar if ilcc in text: offset += text.count(ilcc) text = text.replace(ilcc, "") + if dlcc in text: + offset -= text.count(dlcc) + text = text.replace(dlcc, "") ret.append(line.Line(lineNo, text)) diff --git a/bikeshed/constants.py b/bikeshed/constants.py index 08e5a37f73..98fae99b96 100644 --- a/bikeshed/constants.py +++ b/bikeshed/constants.py @@ -11,4 +11,5 @@ macroStartChar = "\uebbb" macroEndChar = "\uebbc" incrementLineCountChar = "\uebbd" +decrementLineCountChar = "\uebbf" bsComment = "" diff --git a/bikeshed/h/parser.py b/bikeshed/h/parser.py index 7f55eff9cb..c1f6982682 100644 --- a/bikeshed/h/parser.py +++ b/bikeshed/h/parser.py @@ -277,6 +277,7 @@ def initialDocumentParse(text: str, config: ParseConfig, startLine: int = 1) -> def strFromNodes(nodes: t.Iterable[ParserNode], withIlcc: bool = False) -> str: strs = [] ilcc = constants.incrementLineCountChar + dlcc = constants.decrementLineCountChar for node in nodes: if isinstance(node, Comment): # Serialize comments as a standardized, recognizable sequence @@ -287,10 +288,13 @@ def strFromNodes(nodes: t.Iterable[ParserNode], withIlcc: bool = False) -> str: continue s = str(node) if withIlcc: - numLines = s.count("\n") - diffLineNo = node.endLine - node.line - if diffLineNo > numLines: - s += ilcc * (diffLineNo - numLines) + outputExtraLines = s.count("\n") + sourceExtraLines = node.endLine - node.line + diff = sourceExtraLines - outputExtraLines + if diff > 0: + s += ilcc * diff + elif diff < 0: + s += dlcc * -diff strs.append(s) return "".join(strs)