From 4a2aa99d517bb5be7468024c13cecc8f73c686d0 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 1 Feb 2024 00:10:19 +0000 Subject: [PATCH] HTML API: Fix CDATA lookalike matching invalid CDATA When `next_token()` was introduced to the HTML Tag Processor, it started classifying comments that look like they were intended to be CDATA sections. In one of the changes made during development, however, a typo slipped through code review that treated comments as CDATA even if they only ended in `]>` and not the required `]]>`. The consequences of this defect were minor because in all cases these are treated as HTML comments from invalid syntax, but this patch adds the missing check to ensure the proper reporting of CDATA-lookalikes. Follow-up to [57348] Props jonsurrell Fixes #60406 git-svn-id: https://develop.svn.wordpress.org/trunk@57506 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 3 +- .../wpHtmlTagProcessor-token-scanning.php | 38 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 169fabe750fcf..1b4db41bcee12 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1762,7 +1762,8 @@ private function parse_next_tag() { 'T' === $html[ $this->token_starts_at + 6 ] && 'A' === $html[ $this->token_starts_at + 7 ] && '[' === $html[ $this->token_starts_at + 8 ] && - ']' === $html[ $closer_at - 1 ] + ']' === $html[ $closer_at - 1 ] && + ']' === $html[ $closer_at - 2 ] ) { $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_CDATA_LOOKALIKE; diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php index 538144910550f..295063a04c2ff 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php @@ -347,6 +347,38 @@ public function test_basic_assertion_cdata_section() { ); } + /** + * Ensures that normative CDATA sections are properly parsed. + * + * @ticket 60406 + * + * @since 6.5.0 + * + * @covers WP_HTML_Tag_Processor::next_token + */ + public function test_cdata_comment_with_incorrect_closer() { + $processor = new WP_HTML_Tag_Processor( '' ); + $processor->next_token(); + + $this->assertSame( + '#comment', + $processor->get_token_name(), + "Should have found comment token but found {$processor->get_token_name()} instead." + ); + + $this->assertSame( + WP_HTML_Processor::COMMENT_AS_INVALID_HTML, + $processor->get_comment_type(), + 'Should have detected invalid HTML comment.' + ); + + $this->assertSame( + '[CDATA[this is missing a closing square bracket]', + $processor->get_modifiable_text(), + 'Found incorrect modifiable text.' + ); + } + /** * Ensures that abruptly-closed CDATA sections are properly parsed as comments. * @@ -366,6 +398,12 @@ public function test_basic_assertion_abruptly_closed_cdata_section() { "Should have found a bogus comment but found {$processor->get_token_name()} instead." ); + $this->assertSame( + WP_HTML_Processor::COMMENT_AS_INVALID_HTML, + $processor->get_comment_type(), + 'Should have detected invalid HTML comment.' + ); + $this->assertNull( $processor->get_tag(), 'Should not have been able to query tag name on non-element token.'