diff --git a/CHANGES b/CHANGES index 1e84076053..e3661405c4 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,10 @@ Release 1.17.1 [PENDING] elements to be returned when used on elements other than the root document. + * Bugfix: in a sub-query such as `p:has(> span, > i)`, combinators following the `,` Or combinator would be + incorrectly skipped, such that the sub-query was parsed as `i` instead of `> i`. + + Release 1.16.2 [20-Oct-2023] * Improvement: optimized the performance of complex CSS selectors, by adding a cost-based query planner. Evaluators are sorted by their relative execution cost, and executed in order of lower to higher cost. This speeds the diff --git a/src/main/java/org/jsoup/select/QueryParser.java b/src/main/java/org/jsoup/select/QueryParser.java index 09f53bdd00..30872eb53b 100644 --- a/src/main/java/org/jsoup/select/QueryParser.java +++ b/src/main/java/org/jsoup/select/QueryParser.java @@ -145,18 +145,21 @@ private void combinator(char combinator) { private String consumeSubQuery() { StringBuilder sq = StringUtil.borrowBuilder(); + boolean seenNonCombinator = false; // eat until we hit a combinator after eating something else while (!tq.isEmpty()) { if (tq.matches("(")) sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); else if (tq.matches("[")) sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); else if (tq.matchesAny(Combinators)) - if (sq.length() > 0) + if (seenNonCombinator) break; else - tq.consume(); - else + sq.append(tq.consume()); + else { + seenNonCombinator = true; sq.append(tq.consume()); + } } return StringUtil.releaseBuilder(sq); } diff --git a/src/test/java/org/jsoup/select/QueryParserTest.java b/src/test/java/org/jsoup/select/QueryParserTest.java index ae2f344886..51b7c925d2 100644 --- a/src/test/java/org/jsoup/select/QueryParserTest.java +++ b/src/test/java/org/jsoup/select/QueryParserTest.java @@ -18,10 +18,10 @@ public class QueryParserTest { "
  • l2
  • " + "

    yes

    " + ""); - assertEquals("l1 l2 yes", doc.body().select(">p>strong,>*>li>strong").text()); + assertEquals("l1 yes", doc.body().select(">p>strong,>li>strong").text()); // selecting immediate from body + assertEquals("l2 yes", doc.select("body>p>strong,body>*>li>strong").text()); + assertEquals("l2 yes", doc.select("body>*>li>strong,body>p>strong").text()); assertEquals("l2 yes", doc.select("body>p>strong,body>*>li>strong").text()); - assertEquals("yes", doc.select(">body>*>li>strong,>body>p>strong").text()); - assertEquals("l2", doc.select(">body>p>strong,>body>*>li>strong").text()); } @Test public void testImmediateParentRun() { diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index e941f9032b..686214aa6b 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -1206,4 +1206,17 @@ public void wildcardNamespaceMatchesNoNamespace() { Elements innerLisFromParent = li2.select("ul li"); assertEquals(innerLis, innerLisFromParent); } + + @Test public void rootImmediateParentSubquery() { + // a combinator at the start of the query is applied to the Root selector. i.e. "> p" matches a P immediately parented + // by the Root (which is for a top level query, or the context element in :has) + // in the sub query, the combinator was dropped incorrectly + String html = "

    A

    B

    C

    \n"; + Document doc = Jsoup.parse(html); + + Elements els = doc.select("p:has(> span, > i)"); // should match a p with an immediate span or i + assertEquals(2, els.size()); + assertEquals("0", els.get(0).id()); + assertEquals("2", els.get(1).id()); + } }