From 086010a380f694cbe1773c79b6b2cc27b3f0526e Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 26 Sep 2023 08:18:25 +0000 Subject: [PATCH] HTML API: Add `matches_breadcrumbs()` method for better querying. Inside a `next_tag()` loop it can be challenging to use breadcrumbs because they are only exposed inside the call to `next_tag()` via the `$query` arg. In this patch a new method, `matches_breadcrumbs()`, is exposed which allows for querying within the `next_tag()` loop for more complicated queries. This method exposes a wildcard `*` operator to allow matching ''any HTML tag'' that the currently-matched tag is a child or descendant of. Props dmsnell, westonruter, mukesh27. Fixes #59400. git-svn-id: https://develop.svn.wordpress.org/trunk@56702 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 70 +++++++++++++++---- .../html-api/wpHtmlProcessorBreadcrumbs.php | 58 +++++++++++++++ 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 52735f29e6318..31e1490d88af2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -357,6 +357,7 @@ public function get_last_error() { * Defaults to first tag. * @type string|null $class_name Tag must contain this whole class name to match. * @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. + * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. * } * @return bool Whether a tag was matched. */ @@ -406,26 +407,67 @@ public function next_tag( $query = null ) { $breadcrumbs = $query['breadcrumbs']; $match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1; - $crumb = end( $breadcrumbs ); - $target = strtoupper( $crumb ); while ( $match_offset > 0 && $this->step() ) { - if ( $target !== $this->get_tag() ) { - continue; + if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) { + return true; } + } - // Look up the stack to see if the breadcrumbs match. - foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { - if ( strtoupper( $crumb ) !== $node->node_name ) { - break; - } + return false; + } - $crumb = prev( $breadcrumbs ); - if ( false === $crumb && 0 === --$match_offset && ! $this->is_tag_closer() ) { - return true; - } + /** + * Indicates if the currently-matched tag matches the given breadcrumbs. + * + * A "*" represents a single tag wildcard, where any tag matches, but not no tags. + * + * At some point this function _may_ support a `**` syntax for matching any number + * of unspecified tags in the breadcrumb stack. This has been intentionally left + * out, however, to keep this function simple and to avoid introducing backtracking, + * which could open up surprising performance breakdowns. + * + * Example: + * + * $processor = WP_HTML_Processor::createFragment( '
' ); + * $processor->next_tag( 'img' ); + * true === $processor->matches_breadcrumbs( array( 'figure', 'img' ) ); + * true === $processor->matches_breadcrumbs( array( 'span', 'figure', 'img' ) ); + * false === $processor->matches_breadcrumbs( array( 'span', 'img' ) ); + * true === $processor->matches_breadcrumbs( array( 'span', '*', 'img' ) ); + * + * @since 6.4.0 + * + * @param string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. + * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. + * @return bool Whether the currently-matched tag is found at the given nested structure. + */ + public function matches_breadcrumbs( $breadcrumbs ) { + if ( ! $this->get_tag() ) { + return false; + } + + // Everything matches when there are zero constraints. + if ( 0 === count( $breadcrumbs ) ) { + return true; + } + + // Start at the last crumb. + $crumb = end( $breadcrumbs ); + + if ( '*' !== $crumb && $this->get_tag() !== strtoupper( $crumb ) ) { + return false; + } + + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + $crumb = strtoupper( current( $breadcrumbs ) ); + + if ( '*' !== $crumb && $node->node_name !== $crumb ) { + return false; } - $crumb = end( $breadcrumbs ); + if ( false === prev( $breadcrumbs ) ) { + return true; + } } return false; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 28a7414a6ed52..4f86f856e7772 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -352,6 +352,64 @@ public function data_html_target_with_breadcrumbs() { ); } + /** + * @ticket 59400 + * + * @dataProvider data_html_with_breadcrumbs_of_various_specificity + * + * @param string $html_with_target_node HTML with a node containing a "target" attribute. + * @param string[] $breadcrumbs Breadcrumbs to test at the target node. + * @param bool $should_match Whether the target node should match the breadcrumbs. + */ + public function test_reports_if_tag_matches_breadcrumbs_of_various_specificity( $html_with_target_node, $breadcrumbs, $should_match ) { + $processor = WP_HTML_Processor::createFragment( $html_with_target_node ); + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $matches = $processor->matches_breadcrumbs( $breadcrumbs ); + $path = implode( ', ', $breadcrumbs ); + if ( $should_match ) { + $this->assertTrue( $matches, "HTML tag {$processor->get_tag()} should have matched breadcrumbs but didn't: {$path}." ); + } else { + $this->assertFalse( $matches, "HTML tag {$processor->get_tag()} should not have matched breadcrumbs but did: {$path}." ); + } + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_html_with_breadcrumbs_of_various_specificity() { + return array( + // Test with void elements. + 'Inner IMG' => array( '
', array( 'span', 'figure', 'img' ), true ), + 'Inner IMG wildcard' => array( '
', array( 'span', '*', 'img' ), true ), + 'Inner IMG no wildcard' => array( '
', array( 'span', 'img' ), false ), + 'Full specification' => array( '
', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ), + 'Invalid Full specification' => array( '
', array( 'html', 'div', 'span', 'figure', 'img' ), false ), + + // Test also with non-void elements that open and close. + 'Inner P' => array( '

', array( 'span', 'figure', 'p' ), true ), + 'Inner P wildcard' => array( '

', array( 'span', '*', 'p' ), true ), + 'Inner P no wildcard' => array( '

', array( 'span', 'p' ), false ), + 'Full specification (P)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ), + 'Invalid Full specification (P)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), + + // Ensure that matches aren't on tag closers. + 'Inner P' => array( '

', array( 'span', 'figure', 'p' ), false ), + 'Inner P wildcard' => array( '

', array( 'span', '*', 'p' ), false ), + 'Inner P no wildcard' => array( '

', array( 'span', 'p' ), false ), + 'Full specification (P)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ), + 'Invalid Full specification (P)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), + + // Test wildcard behaviors. + 'Single wildcard element' => array( '

', array( '*' ), true ), + 'Child of wildcard element' => array( '

', array( 'SPAN', '*' ), true ), + ); + } + /** * Ensures that the ability to set attributes isn't broken by the HTML Processor. *