From d61ec5a78f9f220b2535a93d82851262a29d9f0b Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Wed, 18 Oct 2023 12:07:44 +1100 Subject: [PATCH] Update InRow state to current spec Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63242 --- .../org/jsoup/parser/HtmlTreeBuilder.java | 6 ++- .../jsoup/parser/HtmlTreeBuilderState.java | 39 +++++++++++------- src/test/resources/fuzztests/63242.html.gz | Bin 0 -> 161 bytes 3 files changed, 30 insertions(+), 15 deletions(-) create mode 100644 src/test/resources/fuzztests/63242.html.gz diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java index 3658bbba2b..e29051ba27 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java @@ -308,6 +308,7 @@ void error(HtmlTreeBuilderState state) { currentToken.tokenType(), currentToken, state)); } + /** Inserts an HTML element for the given tag) */ Element insert(final Token.StartTag startTag) { dedupeAttributes(startTag); @@ -714,7 +715,10 @@ private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nulla // don't walk too far up the tree for (int pos = bottom; pos >= top; pos--) { - final String elName = stack.get(pos).normalName(); + Element el = stack.get(pos); + if (!el.tag().namespace().equals(NamespaceHtml)) continue; + + final String elName = el.normalName(); if (inSorted(elName, targetNames)) return true; if (inSorted(elName, baseTypes)) diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index df2d81eb7a..21620b1a7d 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -1299,13 +1299,20 @@ boolean process(Token t, HtmlTreeBuilder tb) { Token.StartTag startTag = t.asStartTag(); String name = startTag.normalName(); - if (inSorted(name, InCellNames)) { + if (inSorted(name, InCellNames)) { // th, th tb.clearStackToTableRowContext(); tb.insert(startTag); tb.transition(InCell); tb.insertMarkerToFormattingElements(); - } else if (inSorted(name, InRowMissing)) { - return handleMissingTr(t, tb); + } else if (inSorted(name, InRowMissing)) { // "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" + if (!tb.inTableScope("tr")) { + tb.error(this); + return false; + } + tb.clearStackToTableRowContext(); + tb.pop(); // tr + tb.transition(InTableBody); + return tb.process(t); } else { return anythingElse(t, tb); } @@ -1322,15 +1329,27 @@ boolean process(Token t, HtmlTreeBuilder tb) { tb.pop(); // tr tb.transition(InTableBody); } else if (name.equals("table")) { - return handleMissingTr(t, tb); - } else if (inSorted(name, InTableToBody)) { - if (!tb.inTableScope(name) || !tb.inTableScope("tr")) { + if (!tb.inTableScope("tr")) { + tb.error(this); + return false; + } + tb.clearStackToTableRowContext(); + tb.pop(); // tr + tb.transition(InTableBody); + return tb.process(t); + } else if (inSorted(name, InTableToBody)) { // "tbody", "tfoot", "thead" + if (!tb.inTableScope(name)) { tb.error(this); return false; } + if (!tb.inTableScope("tr")) { + // not an error per spec? + return false; + } tb.clearStackToTableRowContext(); tb.pop(); // tr tb.transition(InTableBody); + return tb.process(t); } else if (inSorted(name, InRowIgnore)) { tb.error(this); return false; @@ -1346,14 +1365,6 @@ boolean process(Token t, HtmlTreeBuilder tb) { private boolean anythingElse(Token t, HtmlTreeBuilder tb) { return tb.process(t, InTable); } - - private boolean handleMissingTr(Token t, TreeBuilder tb) { - boolean processed = tb.processEndTag("tr"); - if (processed) - return tb.process(t); - else - return false; - } }, InCell { boolean process(Token t, HtmlTreeBuilder tb) { diff --git a/src/test/resources/fuzztests/63242.html.gz b/src/test/resources/fuzztests/63242.html.gz new file mode 100644 index 0000000000000000000000000000000000000000..9f1fda21514202e6061842b3cda82d4e6fada147 GIT binary patch literal 161 zcmV;S0ABweiwFo%B`;+F12!`sZa1q{QB@=FWAGWtLw9m>NZlw+gC00G(zNC2WB8?GF?R{XqhJdzO4*r)