Skip to content

Commit

Permalink
HTML API: Add matches_breadcrumbs() method for better querying.
Browse files Browse the repository at this point in the history
Inside a `next_tag()` loop it can be challenging to use breadcrumbs because they are only exposed inside the call to `next_tag()` via the `$query` arg.

In this patch a new method, `matches_breadcrumbs()`, is exposed which allows for querying within the `next_tag()` loop for more complicated queries.

This method exposes a wildcard `*` operator to allow matching ''any HTML tag'' that the currently-matched tag is a child or descendant of.

Props dmsnell, westonruter, mukesh27.
Fixes #59400.

git-svn-id: https://develop.svn.wordpress.org/trunk@56702 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
ockham committed Sep 26, 2023
1 parent 77de774 commit 086010a
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 14 deletions.
70 changes: 56 additions & 14 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ public function get_last_error() {
* Defaults to first tag.
* @type string|null $class_name Tag must contain this whole class name to match.
* @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
* May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
* }
* @return bool Whether a tag was matched.
*/
Expand Down Expand Up @@ -406,26 +407,67 @@ public function next_tag( $query = null ) {
$breadcrumbs = $query['breadcrumbs'];
$match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;

$crumb = end( $breadcrumbs );
$target = strtoupper( $crumb );
while ( $match_offset > 0 && $this->step() ) {
if ( $target !== $this->get_tag() ) {
continue;
if ( $this->matches_breadcrumbs( $breadcrumbs ) && 0 === --$match_offset ) {
return true;
}
}

// Look up the stack to see if the breadcrumbs match.
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
if ( strtoupper( $crumb ) !== $node->node_name ) {
break;
}
return false;
}

$crumb = prev( $breadcrumbs );
if ( false === $crumb && 0 === --$match_offset && ! $this->is_tag_closer() ) {
return true;
}
/**
* Indicates if the currently-matched tag matches the given breadcrumbs.
*
* A "*" represents a single tag wildcard, where any tag matches, but not no tags.
*
* At some point this function _may_ support a `**` syntax for matching any number
* of unspecified tags in the breadcrumb stack. This has been intentionally left
* out, however, to keep this function simple and to avoid introducing backtracking,
* which could open up surprising performance breakdowns.
*
* Example:
*
* $processor = WP_HTML_Processor::createFragment( '<div><span><figure><img></figure></span></div>' );
* $processor->next_tag( 'img' );
* true === $processor->matches_breadcrumbs( array( 'figure', 'img' ) );
* true === $processor->matches_breadcrumbs( array( 'span', 'figure', 'img' ) );
* false === $processor->matches_breadcrumbs( array( 'span', 'img' ) );
* true === $processor->matches_breadcrumbs( array( 'span', '*', 'img' ) );
*
* @since 6.4.0
*
* @param string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
* May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
* @return bool Whether the currently-matched tag is found at the given nested structure.
*/
public function matches_breadcrumbs( $breadcrumbs ) {
if ( ! $this->get_tag() ) {
return false;
}

// Everything matches when there are zero constraints.
if ( 0 === count( $breadcrumbs ) ) {
return true;
}

// Start at the last crumb.
$crumb = end( $breadcrumbs );

if ( '*' !== $crumb && $this->get_tag() !== strtoupper( $crumb ) ) {
return false;
}

foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
$crumb = strtoupper( current( $breadcrumbs ) );

if ( '*' !== $crumb && $node->node_name !== $crumb ) {
return false;
}

$crumb = end( $breadcrumbs );
if ( false === prev( $breadcrumbs ) ) {
return true;
}
}

return false;
Expand Down
58 changes: 58 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,64 @@ public function data_html_target_with_breadcrumbs() {
);
}

/**
* @ticket 59400
*
* @dataProvider data_html_with_breadcrumbs_of_various_specificity
*
* @param string $html_with_target_node HTML with a node containing a "target" attribute.
* @param string[] $breadcrumbs Breadcrumbs to test at the target node.
* @param bool $should_match Whether the target node should match the breadcrumbs.
*/
public function test_reports_if_tag_matches_breadcrumbs_of_various_specificity( $html_with_target_node, $breadcrumbs, $should_match ) {
$processor = WP_HTML_Processor::createFragment( $html_with_target_node );
while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) {
continue;
}

$matches = $processor->matches_breadcrumbs( $breadcrumbs );
$path = implode( ', ', $breadcrumbs );
if ( $should_match ) {
$this->assertTrue( $matches, "HTML tag {$processor->get_tag()} should have matched breadcrumbs but didn't: {$path}." );
} else {
$this->assertFalse( $matches, "HTML tag {$processor->get_tag()} should not have matched breadcrumbs but did: {$path}." );
}
}

/**
* Data provider.
*
* @return array[].
*/
public function data_html_with_breadcrumbs_of_various_specificity() {
return array(
// Test with void elements.
'Inner IMG' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
'Inner IMG wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
'Inner IMG no wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
'Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
'Invalid Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),

// Test also with non-void elements that open and close.
'Inner P' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
'Inner P wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
'Inner P no wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),

// Ensure that matches aren't on tag closers.
'Inner P' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
'Inner P wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
'Inner P no wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
'Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
'Invalid Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),

// Test wildcard behaviors.
'Single wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
'Child of wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
);
}

/**
* Ensures that the ability to set attributes isn't broken by the HTML Processor.
*
Expand Down

0 comments on commit 086010a

Please sign in to comment.