Skip to content

Commit

Permalink
Don't let section start token break unclosed parent node
Browse files Browse the repository at this point in the history
it shouldn't pop nodes in parser stack if it's inside an unclosed node
like a template
  • Loading branch information
xxyzz committed Sep 26, 2024
1 parent 66545a6 commit 8428306
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 26 deletions.
59 changes: 33 additions & 26 deletions src/wikitextprocessor/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,27 +259,27 @@ class NodeKind(enum.Flag):
)

# Node types that have arguments separated by the vertical bar (|)
HAVE_ARGS_KINDS: tuple[NodeKind, ...] = (
NodeKind.LINK,
NodeKind.TEMPLATE,
NodeKind.TEMPLATE_ARG,
NodeKind.PARSER_FN,
NodeKind.URL,
HAVE_ARGS_KIND_FLAGS = (
NodeKind.LINK
| NodeKind.TEMPLATE
| NodeKind.TEMPLATE_ARG
| NodeKind.PARSER_FN
| NodeKind.URL
)


# Node kinds that generate an error if they have not been properly closed.
MUST_CLOSE_KINDS: tuple[NodeKind, ...] = (
NodeKind.ITALIC,
NodeKind.BOLD,
NodeKind.PRE,
NodeKind.HTML,
NodeKind.LINK,
NodeKind.TEMPLATE,
NodeKind.TEMPLATE_ARG,
NodeKind.PARSER_FN,
NodeKind.URL,
NodeKind.TABLE,
MUST_CLOSE_KIND_FLAGS = (
NodeKind.ITALIC
| NodeKind.BOLD
| NodeKind.PRE
| NodeKind.HTML
| NodeKind.LINK
| NodeKind.TEMPLATE
| NodeKind.TEMPLATE_ARG
| NodeKind.PARSER_FN
| NodeKind.URL
| NodeKind.TABLE
)

# regex for finding html-tags so that we can replace single-quotes
Expand Down Expand Up @@ -714,7 +714,7 @@ def _parser_pop(ctx: "Wtp", warn_unclosed: bool) -> None:
node = ctx.parser_stack[-1]

# Warn about unclosed syntaxes.
if warn_unclosed and node.kind in MUST_CLOSE_KINDS:
if warn_unclosed and node.kind in MUST_CLOSE_KIND_FLAGS:
if node.kind == NodeKind.HTML:
ctx.debug(
"HTML tag <{}> not properly closed".format(node.sarg),
Expand Down Expand Up @@ -769,7 +769,7 @@ def _parser_pop(ctx: "Wtp", warn_unclosed: bool) -> None:

# If the node has arguments, move remaining children to be the last
# argument
if node.kind in HAVE_ARGS_KINDS:
if node.kind in HAVE_ARGS_KIND_FLAGS:
node.largs.append(node.children)
node.children = []

Expand Down Expand Up @@ -805,11 +805,11 @@ def _parser_pop(ctx: "Wtp", warn_unclosed: bool) -> None:
ctx.parser_stack.pop()


def _parser_have(ctx: "Wtp", kind: NodeKind) -> bool:
def _parser_have(ctx: "Wtp", kind_flags: NodeKind) -> bool:
"""Returns True if any node on the stack is of the given kind."""
assert isinstance(kind, NodeKind)
assert isinstance(kind_flags, NodeKind)
for node in ctx.parser_stack:
if node.kind == kind:
if node.kind in kind_flags:
return True
return False

Expand Down Expand Up @@ -1000,6 +1000,8 @@ def subtitle_start_fn(ctx, token) -> None:
break
if node.kind == NodeKind.HTML and node.sarg not in ("span",):
break
if node.kind in MUST_CLOSE_KIND_FLAGS & ~NodeKind.HTML:
break
_parser_pop(ctx, True)

# Push the subtitle node. Subtitle start nodes are guaranteed to have
Expand Down Expand Up @@ -1552,14 +1554,19 @@ def vbar_fn(ctx: "Wtp", token: str) -> None:
templates, template argument references, links, etc, and it can
also separate table row cells."""
node = ctx.parser_stack[-1]
if node.kind in HAVE_ARGS_KINDS and node.kind is not NodeKind.URL:
if node.kind in HAVE_ARGS_KIND_FLAGS and node.kind is not NodeKind.URL:
# [http://url.com these do not use vbars, only one initial space]
_parser_merge_str_children(ctx)
node.largs.append(node.children)
node.children = []
return

table_cell_fn(ctx, token)
elif _parser_have(ctx, NodeKind.TABLE):
table_cell_fn(ctx, token)
elif _parser_have(ctx, HAVE_ARGS_KIND_FLAGS):
_parser_pop(ctx, True)
vbar_fn(ctx, token)
else:
text_fn(ctx, token)


def double_vbar_fn(ctx: "Wtp", token: str) -> None:
Expand All @@ -1570,7 +1577,7 @@ def double_vbar_fn(ctx: "Wtp", token: str) -> None:
contain header cells this actually generates a new header cell in
MediaWiki, so we'll do the same."""
node = ctx.parser_stack[-1]
if node.kind in HAVE_ARGS_KINDS:
if node.kind in HAVE_ARGS_KIND_FLAGS:
vbar_fn(ctx, "|")
vbar_fn(ctx, "|")
return
Expand Down
20 changes: 20 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2977,6 +2977,26 @@ def test_clean_node_lists(self):
print(cleaned)
self.assertEqual(cleaned, wikitext)

def test_section_in_template(self):
# https://fr.wiktionary.org/wiki/Conjugaison:français/bayer
# GH issue #310
self.ctx.start_page("")
root = self.ctx.parse("""{{Onglets conjugaison
| contenu1 = 1
| contenu2 = 2
=== section ===
text
| contenu3 = 3
}}""")
self.assertEqual(len(root.children), 1)
template = root.children[0]
second_arg_list = template.template_parameters.get("contenu2")
self.assertEqual(len(second_arg_list), 2)
heading_node = second_arg_list[1]
self.assertIsInstance(heading_node, WikiNode)
self.assertEqual(heading_node.kind, NodeKind.LEVEL3)
self.assertEqual(template.template_parameters.get("contenu3"), "3")


# XXX implement <nowiki/> marking for links, templates
# - https://en.wikipedia.org/wiki/Help:Wikitext#Nowiki
Expand Down

0 comments on commit 8428306

Please sign in to comment.