Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
tabatkins committed Dec 6, 2023
1 parent 3f788e6 commit 4d3780d
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 65 deletions.
3 changes: 3 additions & 0 deletions bikeshed/Spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ def initMetadata(self, inputContent: InputSource.InputContent) -> None:
def earlyParse(self, inputContent: InputSource.InputContent) -> list[l.Line]:
text = h.strFromNodes(h.initialDocumentParse(inputContent.content, h.ParseConfig.fromSpec(self)), withIlcc=True)
inputContent.rawLines = [x + "\n" for x in text.split("\n")]
print("**** raw lines ****")
print("\n".join(repr(x) for x in inputContent.rawLines))
print("\n".join(repr(x) for x in inputContent.lines))
return inputContent.lines

def checkValidity(self) -> bool:
Expand Down
1 change: 1 addition & 0 deletions bikeshed/h/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
)
from .parser import (
ParseConfig,
debugNodes,
initialDocumentParse,
parseLines,
parseText,
Expand Down
1 change: 1 addition & 0 deletions bikeshed/h/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .main import (
debugNodes,
initialDocumentParse,
linesFromNodes,
nodesFromHtml,
Expand Down
12 changes: 3 additions & 9 deletions bikeshed/h/parser/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@
from .stream import Failure, ParseConfig, ParseFailure, Result, Stream


def test() -> None:
import json

with io.open(os.path.abspath("test.txt"), "r") as fh:
vals = "\n".join(x for x in json.load(fh).values())
list(nodesFromHtml(vals, ParseConfig()))


def nodesFromHtml(data: str, config: ParseConfig, startLine: int = 1) -> t.Generator[ParserNode, None, None]:
s = Stream(data, startLine=startLine, config=config)
yield from nodesFromStream(s, 0)
Expand Down Expand Up @@ -78,7 +70,9 @@ def linesFromNodes(nodes: t.Iterable[ParserNode]) -> list[str]:

def debugNodes(nodes: t.Iterable[ParserNode]) -> list[ParserNode]:
nodes = list(nodes)
print("\n".join(repr(x) for x in nodes)) # noqa: T201
for node in nodes:
print(repr(node)) # noqa: T201
print(repr(strFromNodes([node], withIlcc=True))) # noqa: T201
return nodes


Expand Down
15 changes: 9 additions & 6 deletions bikeshed/h/parser/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
class ParserNode(metaclass=ABCMeta):
line: int
endLine: int
context: str | None = None


@dataclass
Expand All @@ -31,16 +32,18 @@ def __str__(self) -> str:
return self.text

def curlifyApostrophes(self, lastNode: ParserNode | None) -> RawText:
if (
self.text[0] == "'"
and isinstance(lastNode, (EndTag, RawElement, SelfClosedTag))
and re.match(r"'\w", self.text)
):
self.text = "’" + self.text[1:]
if re.match(r"'\w", self.text):
if isinstance(lastNode, (EndTag, RawElement, SelfClosedTag)):
self.text = "’" + self.text[1:]
elif isinstance(lastNode, RawText) and re.match(r"\w", lastNode.text[-1]):
self.text = "’" + self.text[1:]
if "'" in self.text:
self.text = re.sub(r"(\w)'(\w)", r"\1’\2", self.text)
return self

def mismatchedLineCount(self):
return self.text.count("\n") != (self.endLine - self.line)


@dataclass
class SafeText(Text):
Expand Down
135 changes: 96 additions & 39 deletions bikeshed/h/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,50 +22,82 @@


def nodesFromStream(s: Stream, start: int) -> t.Generator[ParserNode, None, None]:
i = start
textStart = start
lastNode: ParserNode | None = None
eofI = len(s)
while i < eofI:
# Early continue if the character isn't even *possible*
# to trigger parseNode() successfully. Keep this in sync!
if s[i] not in POSSIBLE_NODE_START_CHARS:
i += 1
continue
result = parseNode(s, i)
if result.value is None:
i += 1
continue
# Found a node, so see if text needs to be emitted first.
if textStart != i:
yield makeText(s, textStart, i, lastNode)
lastNode = None
i = result.i
textStart = result.i
if isinstance(result.value, list):
for n in result.value:
if isinstance(n, RawText):
yield n.curlifyApostrophes(lastNode)
lastNode = None
else:
yield n
lastNode = n
heldLast = False
for node in generateNodes(s, start):
if isinstance(node, RawText):
node.curlifyApostrophes(lastNode)
if node.mismatchedLineCount() or (heldLast and lastNode.context != node.context):
if heldLast:
yield lastNode
yield node
lastNode = node
heldLast = False
continue
if heldLast and isinstance(lastNode, RawText):
lastNode.text += node.text
lastNode.endLine = node.endLine - node.line
else:
lastNode = node
heldLast = True
else:
yield result.value
lastNode = result.value
if textStart != i:
yield makeText(s, textStart, i, lastNode)
if heldLast:
yield lastNode
yield node
lastNode = node
heldLast = False
if heldLast:
yield lastNode


def makeText(s: Stream, start: int, end: int, lastNode: ParserNode | None) -> RawText:
return RawText(
line=s.line(start),
endLine=s.line(end),
text=s[start:end],
).curlifyApostrophes(lastNode)
def generateNodes(s: Stream, start: int) -> t.Generator[ParserNode, None, None]:
i = start
end = len(s)
context = s.config.context
while i < end:
nodes, i = parseAnything(s, i).vi
if nodes is None:
return
elif isinstance(nodes, list):
for node in nodes:
if context is not None:
node.context = context
yield node
else:
if context is not None:
nodes.context = context
yield nodes


POSSIBLE_NODE_START_CHARS = "&<`~'[\\—-|"
POSSIBLE_NODE_START_CHARS = "&<`'~[\\—-|"


def parseAnything(s: Stream, start: int) -> Result[ParserNode | list[ParserNode]]:
"""
Either returns ParserNode(s) a la parseNode(),
or returns a RawText node up to the next POSSIBLE_NODE_START_CHAR
(possibly starting with such a char).
(It does not parse the next node,
but if the possible start char it ends at
does not, in fact, start a node,
it can return multiple RawTexts in a row.)
"""
if s.eof(start):
return Result.fail(start)
if s[start] in POSSIBLE_NODE_START_CHARS:
res = parseNode(s, start)
if res.err is None:
return res
i = start + 1
end = len(s)
while s[i] not in POSSIBLE_NODE_START_CHARS and i < end:
i += 1
node = RawText(
line=s.line(start),
endLine=s.line(i),
text=s[start:i],
)
return Result(node, i)


def parseNode(
Expand Down Expand Up @@ -896,8 +928,9 @@ def parseMacro(s: Stream, start: int) -> Result[ParserNode | list[ParserNode]]:
i,
)
macroText = s.config.macros[macroName]
context = f"macro {match[0]}"
try:
newStream = s.subStream(context=f"macro {match[0]}", chars=macroText)
newStream = s.subStream(context=context, chars=macroText)
except RecursionError:
m.die(
f"Macro replacement for {match[0]} recursed more than {s.depth} levels deep; probably your text macros are accidentally recursive.",
Expand Down Expand Up @@ -1029,3 +1062,27 @@ def parseMetadataBlock(s: Stream, start: int) -> Result[RawElement]:
endLine=s.line(i),
)
return Result(el, i)


########################
# Markdown
########################

"""
def parseMarkdownLink(s: Stream, start: int) -> Result[ParserNode]:
if s[start] != "[":
return Result.fail(start)
if s[start - 1] == "[":
return Result.fail(start)
i = start + 1
nodes, i = parseUntil(s, i, markdownLinkStopper).vi
if nodes is None:
return Result.fail(start)
return Result.fail(start)
def markdownLinkStopper(s: Stream, start: int) -> bool:
return True
"""
22 changes: 11 additions & 11 deletions tests/github/w3c/webauthn/index.console.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,51 +22,51 @@ WARNING: Image doesn't exist, so I couldn't determine its width and height: 'ima
WARNING: Image doesn't exist, so I couldn't determine its width and height: 'images/fido-attestation-structures.svg'
LINK ERROR: Obsolete biblio ref: [rfc8152] is replaced by [rfc9053]. Either update the reference, or use [rfc8152 obsolete] if this is an intentionally-obsolete reference.
LINK ERROR: Obsolete biblio ref: [rfc8152] is replaced by [RFC8152]. Either update the reference, or use [rfc8152 obsolete] if this is an intentionally-obsolete reference.
LINE ~1621: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
LINE ~2622: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
[=AbortSignal/aborted flag=]
LINE ~1647: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
LINE ~2648: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
[=AbortSignal/aborted flag=]
LINE ~2052: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
LINE ~3053: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
[=AbortSignal/aborted flag=]
LINE ~2082: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
LINE ~3083: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
[=AbortSignal/aborted flag=]
LINE ~2871: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
LINE ~3872: No 'dfn' refs found for 'aborted flag' with for='['AbortSignal']'.
[=AbortSignal/aborted flag=]
LINE ~2882: Multiple possible 'window' dfn refs.
LINE ~3883: Multiple possible 'window' dfn refs.
Arbitrarily chose https://drafts.csswg.org/css-color-3/#window
To auto-select one of the following refs, insert one of these lines into a <pre class=link-defaults> block:
spec:css-color-3; type:dfn; text:window
spec:fetch; type:dfn; text:window
spec:screen-capture; type:dfn; text:window
[=Window=]
LINE ~3791: Ambiguous for-less link for 'type', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
LINE ~4792: Ambiguous for-less link for 'type', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
Local references:
spec:webauthn-2; type:dfn; for:public key credential source; text:type
for-less references:
spec:webcryptoapi; type:dfn; for:/; text:type
[=type=]
LINE ~4015: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
LINE ~5016: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
Local references:
spec:utr29; type:dfn; for:/; text:grapheme cluster
for-less references:
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
[=grapheme cluster=]
LINE ~4017: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
LINE ~5018: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
Local references:
spec:utr29; type:dfn; for:/; text:grapheme cluster
for-less references:
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
[=grapheme cluster=]
LINE ~4024: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
LINE ~5025: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
Local references:
spec:utr29; type:dfn; for:/; text:grapheme cluster
for-less references:
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
spec:i18n-glossary; type:dfn; for:/; text:grapheme cluster
[=grapheme cluster=]
LINE ~4026: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
LINE ~5027: Ambiguous for-less link for 'grapheme cluster', please see <https://speced.github.io/bikeshed/#ambi-for> for instructions:
Local references:
spec:utr29; type:dfn; for:/; text:grapheme cluster
for-less references:
Expand Down

0 comments on commit 4d3780d

Please sign in to comment.