From cecc810f9170d297fbb1efe7e8439112359af8d8 Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 26 Sep 2023 09:15:21 +0000 Subject: [PATCH] HTML API: Add class name utilities `has_class()` and `class_list()`. This patch adds two new public methods to the HTML Tag Processor: - `has_class()` indicates if a matched tag contains a given CSS class name. - `class_list()` returns a generator to iterate over all the class names in a matched tag. Included in this patch is a refactoring of the internal logic when matching a tag to reuse the new `has_class()` function. Previously it was relying on optimized code in the `matches()` function which performed byte-for-byte class name comparison. With the change in this patch it will perform class name matching on the decoded value, which might differ if a class attribute contains character references. These methods may be useful for running more complicated queries based on the presence or absence of CSS class names. The use of these methods avoids the need to manually decode the class attribute as reported by `$process->get_attribute( 'class' )`. Props dmsnell. Fixes #59209. git-svn-id: https://develop.svn.wordpress.org/trunk@56703 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 147 ++++++++++------- .../tests/html-api/wpHtmlTagProcessor.php | 155 ++++++++++++++++++ 2 files changed, 244 insertions(+), 58 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 7aea3d40cda72..818dca6a0f875 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -626,6 +626,94 @@ public function next_tag( $query = null ) { } + /** + * Generator for a foreach loop to step through each class name for the matched tag. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $p = new WP_HTML_Tag_Processor( "
" ); + * $p->next_tag(); + * foreach ( $p->class_list() as $class_name ) { + * echo "{$class_name} "; + * } + * // Outputs: "free lang-en " + * + * @since 6.4.0 + */ + public function class_list() { + /** @var string $class contains the string value of the class attribute, with character references decoded. */ + $class = $this->get_attribute( 'class' ); + + if ( ! is_string( $class ) ) { + return; + } + + $seen = array(); + + $at = 0; + while ( $at < strlen( $class ) ) { + // Skip past any initial boundary characters. + $at += strspn( $class, " \t\f\r\n", $at ); + if ( $at >= strlen( $class ) ) { + return; + } + + // Find the byte length until the next boundary. + $length = strcspn( $class, " \t\f\r\n", $at ); + if ( 0 === $length ) { + return; + } + + /* + * CSS class names are case-insensitive in the ASCII range. + * + * @see https://www.w3.org/TR/CSS2/syndata.html#x1 + */ + $name = strtolower( substr( $class, $at, $length ) ); + $at += $length; + + /* + * It's expected that the number of class names for a given tag is relatively small. + * Given this, it is probably faster overall to scan an array for a value rather + * than to use the class name as a key and check if it's a key of $seen. + */ + if ( in_array( $name, $seen, true ) ) { + continue; + } + + $seen[] = $name; + yield $name; + } + } + + + /** + * Returns if a matched tag contains the given ASCII case-insensitive class name. + * + * @since 6.4.0 + * + * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. + * @return bool|null Whether the matched tag contains the given class name, or null if not matched. + */ + public function has_class( $wanted_class ) { + if ( ! $this->tag_name_starts_at ) { + return null; + } + + $wanted_class = strtolower( $wanted_class ); + + foreach ( $this->class_list() as $class_name ) { + if ( $class_name === $wanted_class ) { + return true; + } + } + + return false; + } + + /** * Sets a bookmark in the HTML document. * @@ -2347,64 +2435,7 @@ private function matches() { } } - $needs_class_name = null !== $this->sought_class_name; - - if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) { - return false; - } - - /* - * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name. - * - * This will overlook certain classes that exist in other lexical variations - * than was supplied to the search query, but requires more complicated searching. - */ - if ( $needs_class_name ) { - $class_start = $this->attributes['class']->value_starts_at; - $class_end = $class_start + $this->attributes['class']->value_length; - $class_at = $class_start; - - /* - * Ensure that boundaries surround the class name to avoid matching on - * substrings of a longer name. For example, the sequence "not-odd" - * should not match for the class "odd" even though "odd" is found - * within the class attribute text. - * - * See https://html.spec.whatwg.org/#attributes-3 - * See https://html.spec.whatwg.org/#space-separated-tokens - */ - while ( - // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition - false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) && - $class_at < $class_end - ) { - /* - * Verify this class starts at a boundary. - */ - if ( $class_at > $class_start ) { - $character = $this->html[ $class_at - 1 ]; - - if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { - $class_at += strlen( $this->sought_class_name ); - continue; - } - } - - /* - * Verify this class ends at a boundary as well. - */ - if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) { - $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ]; - - if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { - $class_at += strlen( $this->sought_class_name ); - continue; - } - } - - return true; - } - + if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) { return false; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 7111571b1f113..4469f90c4f276 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -498,6 +498,17 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() { $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); } + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::next_tag + */ + public function test_next_tag_matches_decoded_class_names() { + $p = new WP_HTML_Tag_Processor( '
' ); + + $this->assertTrue( $p->next_tag( array( 'class_name' => '' ) ), 'Failed to find tag with HTML-encoded class name.' ); + } + /** * @ticket 56299 * @ticket 57852 @@ -1957,6 +1968,150 @@ public function data_next_tag_ignores_contents_of_rawtext_tags() { ); } + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_missing_class() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_class_is_boolean() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_class_is_empty() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_visits_each_class_in_order() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( 'one', 'two', 'three' ), $found_classes, 'Failed to visit the class names in their original order.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_decodes_class_names() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( '∉-class', '', "\u{ff03}" ), $found_classes, 'Failed to report class names in their decoded form.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_visits_unique_class_names_only_once() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::has_class + * + * @dataProvider data_html_with_variations_of_class_values_and_sought_class_names + * + * @param string $html Contains a tag optionally containing a `class` attribute. + * @param string $sought_class Name of class to find in the input tag's `class`. + * @param bool $has_class Whether the sought class exists in the given HTML. + */ + public function test_has_class_handles_expected_class_name_variations( $html, $sought_class, $has_class ) { + $p = new WP_HTML_Tag_Processor( $html ); + $p->next_tag(); + + if ( $has_class ) { + $this->assertTrue( $p->has_class( $sought_class ), "Failed to find expected class {$sought_class}." ); + } else { + $this->assertFalse( $p->has_class( $sought_class ), "Found class {$sought_class} when it doesn't exist." ); + } + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_html_with_variations_of_class_values_and_sought_class_names() { + return array( + 'Tag without any classes' => array( '
', 'foo', false ), + 'Tag with boolean class' => array( '', 'foo', false ), + 'Tag with empty class' => array( '

', 'foo', false ), + 'Tag with exact match' => array( '