Skip to content

Commit

Permalink
Allow macros to be parsed inside of tags, providing attributes. #2974
Browse files Browse the repository at this point in the history
  • Loading branch information
tabatkins committed Jan 17, 2025
1 parent 2e4d126 commit 13906ad
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 51 deletions.
87 changes: 57 additions & 30 deletions bikeshed/h/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,38 +543,63 @@ def parseTagName(s: Stream, start: int) -> Result[str]:


def parseAttributeList(s: Stream, start: int) -> Result[dict[str, str]]:
i = start
i = parseWhitespace(s, start).i
attr = None
attrs: dict[str, str] = {}
while True:
ws, i = parseWhitespace(s, i).vi
if ws is None:
if attr and s[i] not in ("/", ">"):
m.die(
f"No whitespace after the end of an attribute. (Saw {attr[0]}={s[i-1]}{attr[1]}{s[i-1]}{s[i:i+5]}...)",
lineNum=s.loc(i),
)
if s.eof(i):
break
startAttr = i

# Macros are allowed in attr list context, *if* they expand to an attribute list.

attr, i = parseAttribute(s, i).vi
if attr is None:
if s[i] == "[":
macroAttrs, i = parseMacroToAttrs(s, i).vi
if macroAttrs is None:
break
macroName = s[startAttr:i]
for k, v in macroAttrs.items():
if k in attrs:
m.die(
f"Attribute '{k}', coming from the {macroName} macro, already exists on the element.",
lineNum=s.loc(startAttr),
)
continue
attrs[k] = v
elif preds.isAttrNameChar(s[i]):
attr, i = parseAttribute(s, i).vi
if attr is None:
break
attrName, attrValue = attr
if attrName in attrs:
m.die(f"Attribute '{attrName}' appears twice in the tag.", lineNum=s.loc(startAttr))
return Result.fail(start)
if "[" in attrValue:
attrValue = replaceMacrosInText(
text=attrValue,
macros=s.config.macros,
s=s,
start=i,
context=f"attribute {attrName}='...'",
)
attrs[attrName] = attrValue
else:
break
attrName, attrValue = attr
if attrName in attrs:
m.die(f"Attribute '{attrName}' appears twice in the tag.", lineNum=s.loc(startAttr))
return Result.fail(start)
if "[" in attrValue:
attrValue = replaceMacrosInText(
text=attrValue,
macros=s.config.macros,
s=s,
start=i,
context=f"attribute {attrName}='...'",

ws, i = parseWhitespace(s, i).vi
if ws is None:
# We're definitely done, just see if it should be an error nor not.
if s.eof(i):
# At the end of a macro, most likely
# (or the doc ended with an unclosed tag, so I'll catch that later)
break
if s[i] in ("/", ">"):
# End of a tag
break
m.die(
f"Expected whitespace between attributes. ({s[startAttr:i+5]}...)",
lineNum=s.loc(i),
)
attrs[attrName] = attrValue
break
return Result(attrs, i)


Expand Down Expand Up @@ -2157,13 +2182,14 @@ def parseMacro(
return Result(s[start:i], i)
else:
t.assert_never(context)
macroDisplay = s[start:i]
macroText = s.config.macros[macroName]
streamContext = f"macro {s[start:i]}"
streamContext = f"macro {macroDisplay}"
try:
newStream = s.subStream(context=streamContext, chars=macroText)
except RecursionError:
m.die(
f"Macro replacement for {s[start:i]} recursed more than {s.depth} levels deep; probably your text macros are accidentally recursive.",
f"Macro replacement for {macroDisplay} recursed more than {s.depth} levels deep; probably your text macros are accidentally recursive.",
lineNum=s.loc(start),
)
if context is MacroContext.Nodes:
Expand All @@ -2181,13 +2207,14 @@ def parseMacro(
if context is MacroContext.Nodes:
return Result(list(nodesFromStream(newStream, 0)), i)
elif context is MacroContext.AttrList:
res = parseAttributeList(newStream, 0)
if not newStream.eof(res.i):
attrs, attrsEnd = parseAttributeList(newStream, 0).vi
_, wsEnd = parseWhitespace(newStream, attrsEnd).vi
if not newStream.eof(wsEnd):
m.die(
f"While parsing {streamContext} as an attribute list (in {s.loc(start)}), found content that's not an attribute list.",
lineNum=newStream.loc(res.i),
f"While parsing {macroDisplay} (on {s.loc(start)}) as an attribute list, found non-attribute content: {newStream[attrsEnd:attrsEnd+10]}...",
lineNum=newStream.loc(attrsEnd),
)
return res
return Result(attrs, i)
elif context is MacroContext.Text:
macroText = replaceMacrosInText(macroText, newStream.config.macros, newStream, 0, streamContext)
return Result(macroText, i)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
LINE 432:1: Void element (<img>) with a spurious trailing /.
LINE 476:1: Void element (<img>) with a spurious trailing /.
LINE 522:1: Void element (<img>) with a spurious trailing /.
LINE 553:61: No whitespace after the end of an attribute. (Saw alt="description of the parameters of a cylinder layer"src="...)
LINE 553:61: Expected whitespace between attributes. (alt="description of the parameters of a cylinder layer"src="...)
LINE 553:61: While trying to parse a <img> start tag, ran into some unparseable stuff.
LINE 578:1: Void element (<img>) with a spurious trailing /.
LINE 614:1: Void element (<img>) with a spurious trailing /.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
LINE 632: Saw an end tag </span>, but there's no open element corresponding to it.
Open tags: <div> at 623:2, <pre> at 627:3
LINE 1025:51: No whitespace after the end of an attribute. (Saw width="200"heigh...)
LINE 1025:51: Expected whitespace between attributes. (width="200"heigh...)
LINE 1025:51: While trying to parse a <object> start tag, ran into some unparseable stuff (height="100").
LINE 1025: Saw an end tag </object>, but there's no open element corresponding to it.
Open tags: <details> at 988:2, <div> at 1012:3
LINE 1035:51: No whitespace after the end of an attribute. (Saw width="200"heigh...)
LINE 1035:51: Expected whitespace between attributes. (width="200"heigh...)
LINE 1035:51: While trying to parse a <object> start tag, ran into some unparseable stuff (height="100").
LINE 1035: Saw an end tag </object>, but there's no open element corresponding to it.
Open tags: <details> at 988:2, <div> at 1012:3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
LINE 1664:51: No whitespace after the end of an attribute. (Saw width="200"heigh...)
LINE 1664:51: Expected whitespace between attributes. (width="200"heigh...)
LINE 1664:51: While trying to parse a <object> start tag, ran into some unparseable stuff (height="100").
LINE 1664: Saw an end tag </object>, but there's no open element corresponding to it.
Open tags: <details> at 1627:2, <div> at 1651:3
LINE 1674:51: No whitespace after the end of an attribute. (Saw width="200"heigh...)
LINE 1674:51: Expected whitespace between attributes. (width="200"heigh...)
LINE 1674:51: While trying to parse a <object> start tag, ran into some unparseable stuff (height="100").
LINE 1674: Saw an end tag </object>, but there's no open element corresponding to it.
Open tags: <details> at 1627:2, <div> at 1651:3
Expand Down
24 changes: 24 additions & 0 deletions tests/macros001.bs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ Text Macro: HALF2 OO]
Text Macro: MARKUP <em>markup is allowed</em>
Text Macro: MARKDOWN *foo* `bar`
Text Macro: SPECIALCHARS "'<>
Text Macro: ATTRLIST foo="foo" bar=bar baz
Text Macro: ATTRLISTWITHJUNK foo="foo" <em>bar</em>
Text Macro: NESTEDATTRLIST [ATTRLIST] qux="qux"
</pre>

Section {#section}
Expand Down Expand Up @@ -79,6 +82,27 @@ Section {#section}
<tr>
<td>Special chars don't screw anything up.
<td title="[SPECIALCHARS]">[SPECIALCHARS]
<tr>
<td>Macros are parsed in attr-list position
<td [ATTRLIST]>[ATTRLIST] (look at all the attributes)
<tr>
<td>When used in attr-list position, the whole macro has to be attrs
<td [ATTRLISTWITHJUNK]>[ATTRLISTWITHJUNK]
<tr>
<td>attr-list macros can be nested, as usual
<td [NESTEDATTRLIST]>[NESTEDATTRLIST]
<tr>
<td>attr-list macros are still usable elsewhere, of course
<td title="[ATTRLIST]">[ATTRLIST]
<tr>
<td>attributes repeated in an attr-list are an error
<td foo="original" [ATTRLIST]>foo="original" [ATTRLIST]
<tr>
<td>and that's still true if the literal attribute comes later
<td [ATTRLIST] foo="original">[ATTRLIST] foo="original"
<tr>
<td>and also if both the base and the repeat come from macros
<td [ATTRLIST] [ATTRLIST]>[ATTRLIST] [ATTRLIST]

</table>

Expand Down
14 changes: 10 additions & 4 deletions tests/macros001.console.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
LINE 17: Text Macro names must be all-caps and alphanumeric. Got 'Foobar'
LINE 60:32: Found unmatched text macro [FOOBAR] in attribute title='...'. Correct the macro, or escape it by replacing the opening [ with &#91;.
LINE 60:41: Found unmatched text macro [FOOBAR]. Correct the macro, or escape it by replacing the opening [ with &#91;
LINE 69:22: Macro replacement in attribute title='...' recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 63:32: Found unmatched text macro [FOOBAR] in attribute title='...'. Correct the macro, or escape it by replacing the opening [ with &#91;.
LINE 63:41: Found unmatched text macro [FOOBAR]. Correct the macro, or escape it by replacing the opening [ with &#91;
LINE 72:22: Macro replacement in attribute title='...' recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 1:1 of macro [RECUR]: Macro replacement for [RECUR] recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 72:24: Macro replacement in attribute title='...' recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 75:24: Macro replacement in attribute title='...' recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 1:1 of macro [MUTUAL1]: Macro replacement for [MUTUAL2] recursed more than 10 levels deep; probably your text macros are accidentally recursive.
LINE 1:11 of macro [ATTRLISTWITHJUNK]: While parsing [ATTRLISTWITHJUNK] (on 90:7) as an attribute list, found non-attribute content: <em>bar</e...
LINE 99:22: Attribute 'foo', coming from the [ATTRLIST] macro, already exists on the element.
LINE 102:18: Attribute 'foo' appears twice in the tag.
LINE 105:18: Attribute 'foo', coming from the [ATTRLIST] macro, already exists on the element.
LINE 105:18: Attribute 'bar', coming from the [ATTRLIST] macro, already exists on the element.
LINE 105:18: Attribute 'baz', coming from the [ATTRLIST] macro, already exists on the element.
21 changes: 21 additions & 0 deletions tests/macros001.html
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,27 @@ <h3 class="heading settled" id="section"><span class="content">Section</span></h
<tr>
<td>Special chars don’t screw anything up.
<td title="&quot;&apos;<>">"'&lt;>
<tr>
<td>Macros are parsed in attr-list position
<td bar="bar" baz foo="foo">foo="foo" bar=bar baz (look at all the attributes)
<tr>
<td>When used in attr-list position, the whole macro has to be attrs
<td foo="foo">foo="foo" <em>bar</em>
<tr>
<td>attr-list macros can be nested, as usual
<td bar="bar" baz foo="foo" qux="qux">foo="foo" bar=bar baz qux="qux"
<tr>
<td>attr-list macros are still usable elsewhere, of course
<td title="foo=&quot;foo&quot; bar=bar baz">foo="foo" bar=bar baz
<tr>
<td>attributes repeated in an attr-list are an error
<td bar="bar" baz foo="original">foo="original" foo="foo" bar=bar baz
<tr>
<td>and that’s still true if the literal attribute comes later
<td bar="bar" baz foo="foo">foo="foo" bar=bar baz foo="original"
<tr>
<td>and also if both the base and the repeat come from macros
<td bar="bar" baz foo="foo">foo="foo" bar=bar baz foo="foo" bar=bar baz
</table>
</main>
<h2 class="no-num no-ref heading settled" id="references"><span class="content">References</span></h2>
Expand Down
21 changes: 10 additions & 11 deletions tests/markdown012.console.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,33 @@ LINE 61:16: Tried to parse a markdown link's destination/title, but ran into som
LINE 62:16: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 63:91: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 64:16: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 64:14: While trying to parse a <url> start tag, ran into some unparseable stuff (\).
LINE 69:70: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 71:15: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 72:15: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 73:14: The <>-wrapped destination of a markdown link can't contain a newline
LINE 75:17: Tried to parse a markdown link's destination/title, but ran into some unexpected characters
LINE 75: Saw an end tag </url>, but there's no open element corresponding to it.
Open tags: <foo> at 73:10
LINE 75: Saw an end tag </url>, but there were unclosed elements remaining before the nearest matching start tag (on line 64).
Open tags: <url> at 64:10, <foo> at 73:10
LINE 76:16: Missing required whitespace between markdown link's destination and title
LINE 76: Saw an end tag </url>, but there's no open element corresponding to it.
Open tags: <foo> at 73:10
LINE 76: Saw an end tag </url>, but there were unclosed elements remaining before the nearest matching start tag (on line 64).
Open tags: <url> at 64:10, <foo> at 73:10
LINE 78: The destination/title of a markdown link can't contain a blank line.
LINE 81: The destination/title of a markdown link can't contain a blank line.
LINE 83:14: The <>-wrapped destination of a markdown link can't contain further unescaped < characters
LINE 83:14: While trying to parse a <url> start tag, ran into some unparseable stuff (<).
LINE 84:14: The <>-wrapped destination of a markdown link can't contain further unescaped < characters
LINE 84:14: While trying to parse a <url> start tag, ran into some unparseable stuff (<).
LINE 106: Saw an end tag </em>, but there were unclosed elements remaining before the nearest matching start tag (on line 106).
Open tags: <foo> at 73:10, <em> at 106:3, [...](...) at 106:24
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, [...](...) at 106:24
LINE 106: Saw an end tag </em>, but there were unclosed elements remaining before the nearest matching start tag (on line 106).
Open tags: <foo> at 73:10, <em> at 106:3, <a> at 106:24
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, <a> at 106:24
LINE 107:19: [...](...) shorthand (opened on 107:3) was closed, but there were still open elements inside of it.
Open tags: <foo> at 73:10, <em> at 106:3, [...](...) at 107:3, <em> at 107:8
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, [...](...) at 107:3, <em> at 107:8
LINE 107: Saw an end tag </a>, but there were unclosed elements remaining before the nearest matching start tag (on line 107).
Open tags: <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8
LINE 108: Saw an end tag </a>, but there were unclosed elements remaining before the nearest matching start tag (on line 108).
Open tags: <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8, [...](...) at 108:3
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8, [...](...) at 108:3
LINE 108: Saw an end tag </a>, but there were unclosed elements remaining before the nearest matching start tag (on line 107).
Open tags: <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8
Open tags: <url> at 64:10, <foo> at 73:10, <em> at 106:3, <a> at 107:3, <em> at 107:8
LINE 25: Couldn't find target anchor fragment:
[link](#fragment)
2 changes: 1 addition & 1 deletion tests/markdown012.html
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ <h2 class="heading settled" data-level="3" id="c"><span class="secno">3. </span>
<li data-md>
<p>
[link](
<urlU0005C>\&lt;>) can escape &lt; and > inside &lt;>-wrapped links</urlU0005C>
<url U0005C>\&lt;>) can escape &lt; and > inside &lt;>-wrapped links</url>
</p>
</ul>
<h2 class="heading settled" data-level="4" id="d"><span class="secno">4. </span><span class="content">Errors</span><a class="self-link" href="#d"></a></h2>
Expand Down

0 comments on commit 13906ad

Please sign in to comment.