diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index ee6209c69e0ae..289347840d002 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -247,6 +247,86 @@
* }
* }
*
+ * ## Tokens and finer-grained processing.
+ *
+ * >>> Stub documentation.
+ *
+ * It's also possible to scan through every lexical token in
+ * the HTML document using the `next_token()` function. This
+ * alternative form takes no argument and provides no built-in
+ * query syntax.
+ *
+ * Example:
+ *
+ * $title = '(untitled)';
+ * $text_content = '';
+ * while ( $processor->next_token() ) {
+ * switch ( $processor->get_node_name() ) {
+ * case '#text':
+ * $text .= $processor->get_node_text();
+ * break;
+ *
+ * case 'HR':
+ * $text .= "\n";
+ * break;
+ *
+ * case 'TITLE':
+ * $title = $processor->get_node_text();
+ * break;
+ * }
+ * }
+ * return trim( "# {$title}\n\n{$text_content}\n" );
+ *
+ * ### Tokens and _modifiable text_.
+ *
+ * #### Special "atomic" HTML elements.
+ *
+ * Not all HTML elements are able to contain other elements inside of them.
+ * For instance, the contents inside a TITLE element are plaintext (except
+ * that character references like & will be decoded). This means that
+ * if the string `
` appears inside a TITLE element, then it's not an
+ * image tag, but rather it's text describing an image tag. Likewise, the
+ * contents of a SCRIPT or STYLE element are handled entirely separately in
+ * a browser than the contents of other elements because they represent a
+ * different language than HTML.
+ *
+ * For these elements the Tag Processor treats the entire sequence as one,
+ * from the opening tag, including its contents, through its closing tag.
+ * This means that the it's not possible to match the closing tag for a
+ * SCRIPT element unless it's unexpected; the Tag Processor already matched
+ * it when it found the opening tag.
+ *
+ * The inner contents of these elements are that element's _modifiable text_.
+ *
+ * The special elements are:
+ * - `SCRIPT` whose contents are treated as raw plaintext but supports a legacy
+ * style of including Javascript inside of HTML comments to avoid accidentally
+ * closing the SCRIPT from inside a Javascript string. E.g. `console.log( '' )`.
+ * - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any
+ * character references are decoded. E.g. "1 < 2 < 3" becomes "1 < 2 < 3".
+ * - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as
+ * raw plaintext and left as-si. E.g. "1 < 2 < 3" remains "1 < 2 < 3".
+ *
+ * #### Other tokens with modifiable text.
+ *
+ * There are also non-elements which are atomic in nature and contain modifiable text.
+ *
+ * - `#text` nodes, whose entire token _is_ the modifiable text.
+ * - Comment nodes and nodes that became comments because of some syntax error. The
+ * text for these nodes is the portion of the comment inside of the syntax. E.g. for
+ * "<!-- comment -->" the text is " comment " (note that the spaces are part of it).
+ * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
+ * "<![CDATA[some content]]>" the text is "some content".
+ * - "Funky comments," which are a special case of invalid closing tags whose name is
+ * invalid. The text for these nodes is the text that a browser would transform into
+ * an HTML when parsing. E.g. for "</%post_author>" the text is "%post_author".
+ *
+ * And there are non-elements which are atomic in nature but have no modifiable text.
+ * - `DOCTYPE` nodes like "<DOCTYPE html>" which have no closing tag.
+ * - XML Processing instruction nodes like "<".
+ * - The empty end tag "<>" which is ignored in the browser and DOM but exposed
+ * to the HTML API.
+ *
* ## Design and limitations
*
* The Tag Processor is designed to linearly scan HTML documents and tokenize
@@ -320,7 +400,8 @@
* @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
* @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
* @since 6.5.0 Pauses processor when input ends in an incomplete syntax token.
- * Introduces "special" elements which act like void elements, e.g. STYLE.
+ * Introduces "special" elements which act like void elements, e.g. TITLE, STYLE.
+ * Allows scanning through all tokens and processing modifiable text, where applicable.
*/
class WP_HTML_Tag_Processor {
/**
@@ -396,12 +477,18 @@ class WP_HTML_Tag_Processor {
/**
* Specifies mode of operation of the parser at any given time.
*
- * | State | Meaning |
- * | --------------|----------------------------------------------------------------------|
- * | *Ready* | The parser is ready to run. |
- * | *Complete* | There is nothing left to parse. |
- * | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. |
- * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. |
+ * | State | Meaning |
+ * | ----------------|----------------------------------------------------------------------|
+ * | *Ready* | The parser is ready to run. |
+ * | *Complete* | There is nothing left to parse. |
+ * | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. |
+ * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. |
+ * | *Text node* | Found a #text node; this is plaintext and modifiable. |
+ * | *CDATA node* | Found a CDATA section; this is modifiable. |
+ * | *PI node* | Found a Processing Instruction; this is modifiable. |
+ * | *Comment* | Found a comment or bogus comment; this is modifiable. |
+ * | *Presumptuous* | Found an empty tag closer: `>`. |
+ * | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. |
*
* @since 6.5.0
*
@@ -409,6 +496,13 @@ class WP_HTML_Tag_Processor {
* @see WP_HTML_Tag_Processor::STATE_COMPLETE
* @see WP_HTML_Tag_Processor::STATE_INCOMPLETE
* @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
+ * @see WP_HTML_Tag_Processor::STATE_TEXT_NODE
+ * @see WP_HTML_Tag_Processor::STATE_CDATA_NODE
+ * @see WP_HTML_Tag_Processor::STATE_PI_NODE
+ * @see WP_HTML_Tag_Processor::STATE_COMMENT
+ * @see WP_HTML_Tag_Processor::STATE_DOCTYPE
+ * @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG
+ * @see WP_HTML_Tag_Processor::STATE_FUNKY_COMMENT
*
* @var string
*/
@@ -490,6 +584,24 @@ class WP_HTML_Tag_Processor {
*/
private $tag_name_length;
+ /**
+ * Byte offset into input document where current modifiable text starts.
+ *
+ * @since 6.5.0
+ *
+ * @var int
+ */
+ private $text_starts_at;
+
+ /**
+ * Byte length of modifiable text.
+ *
+ * @since 6.5.0
+ *
+ * @var string
+ */
+ private $text_length;
+
/**
* Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
.
*
@@ -705,8 +817,8 @@ public function next_tag( $query = null ) {
* @return bool Whether a token was parsed.
*/
public function next_token() {
- $this->get_updated_html();
$was_at = $this->bytes_already_parsed;
+ $this->get_updated_html();
// Don't proceed if there's nothing more to scan.
if (
@@ -736,6 +848,19 @@ public function next_token() {
return false;
}
+ /*
+ * for legacy reasons the rest of this function handles tags and their
+ * attributes. if the processor has reached the end of the document
+ * or if it matched any other token then it should return here.
+ */
+ if (
+ self::STATE_INCOMPLETE !== $this->parser_state &&
+ self::STATE_COMPLETE !== $this->parser_state &&
+ self::STATE_MATCHED_TAG !== $this->parser_state
+ ) {
+ return true;
+ }
+
// Parse all of its attributes.
while ( $this->parse_next_attribute() ) {
continue;
@@ -762,7 +887,7 @@ public function next_token() {
}
$this->parser_state = self::STATE_MATCHED_TAG;
$this->token_length = $tag_ends_at - $this->token_starts_at;
- $this->bytes_already_parsed = $tag_ends_at;
+ $this->bytes_already_parsed = $tag_ends_at + 1;
/*
* For non-DATA sections which might contain text that looks like HTML tags but
@@ -771,8 +896,8 @@ public function next_token() {
*/
$t = $this->html[ $this->tag_name_starts_at ];
if (
- ! $this->is_closing_tag &&
- (
+ $this->is_closing_tag ||
+ ! (
'i' === $t || 'I' === $t ||
'n' === $t || 'N' === $t ||
's' === $t || 'S' === $t ||
@@ -780,38 +905,69 @@ public function next_token() {
'x' === $t || 'X' === $t
)
) {
- $tag_name = $this->get_tag();
+ return true;
+ }
- if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
- $this->parser_state = self::STATE_INCOMPLETE;
- $this->bytes_already_parsed = $was_at;
+ $tag_name = $this->get_tag();
+ if (
+ 'SCRIPT' !== $tag_name &&
+ 'TEXTAREA' !== $tag_name &&
+ 'TITLE' !== $tag_name &&
+ 'IFRAME' !== $tag_name &&
+ 'NOEMBED' !== $tag_name &&
+ 'NOFRAMES' !== $tag_name &&
+ 'STYLE' !== $tag_name &&
+ 'XMP' !== $tag_name
+ ) {
+ return true;
+ }
- return false;
- } elseif (
- ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) &&
- ! $this->skip_rcdata( $tag_name )
- ) {
- $this->parser_state = self::STATE_INCOMPLETE;
- $this->bytes_already_parsed = $was_at;
+ // Preserve the opening tag pointers.
+ $tag_name_starts_at = $this->tag_name_starts_at;
+ $tag_name_length = $this->tag_name_length;
+ $tag_ends_at = $this->token_starts_at + $this->token_length;
- return false;
- } elseif (
- (
- 'IFRAME' === $tag_name ||
- 'NOEMBED' === $tag_name ||
- 'NOFRAMES' === $tag_name ||
- 'STYLE' === $tag_name ||
- 'XMP' === $tag_name
- ) &&
- ! $this->skip_rawtext( $tag_name )
- ) {
- $this->parser_state = self::STATE_INCOMPLETE;
- $this->bytes_already_parsed = $was_at;
+ // Find the closing tag.
+ $found_closer = false;
+ switch ( $tag_name ) {
+ case 'SCRIPT':
+ $found_closer = $this->skip_script_data();
+ break;
- return false;
- }
+ case 'TEXTAREA':
+ case 'TITLE':
+ $found_closer = $this->skip_rcdata( $tag_name );
+ break;
+
+ case 'IFRAME':
+ case 'NOEMBED':
+ case 'NOFRAMES':
+ case 'STYLE':
+ case 'XMP':
+ $found_closer = $this->skip_rawtext( $tag_name );
+ break;
}
+ if ( ! $found_closer ) {
+ $this->parser_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
+ return false;
+ }
+
+ /*
+ * The values here look like they reference the opening tag but they reference
+ * the closing that instead. This is why the opening tag values were stored
+ * above in a variable. It reads confusingly here, but that's because the
+ * functions that skip the contents have moved all the internal cursors past
+ * the inner content of the tag.
+ */
+ $this->token_starts_at = $was_at;
+ $this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
+ $this->text_starts_at = $tag_ends_at + 1;
+ $this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
+ $this->tag_name_starts_at = $tag_name_starts_at;
+ $this->tag_name_length = $tag_name_length;
+
return true;
}
@@ -1007,7 +1163,7 @@ public function has_class( $wanted_class ) {
*/
public function set_bookmark( $name ) {
// It only makes sense to set a bookmark if the parser has paused on a concrete token.
- if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
+ if ( self::STATE_INCOMPLETE === $this->parser_state ) {
return false;
}
@@ -1082,15 +1238,15 @@ private function skip_rcdata( $tag_name ) {
$at = $this->bytes_already_parsed;
while ( false !== $at && $at < $doc_length ) {
- $at = strpos( $this->html, '', $at );
+ $at = strpos( $this->html, '', $at );
+ $this->tag_name_starts_at = $at;
// Fail if there is no possible tag closer.
if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
return false;
}
- $closer_potentially_starts_at = $at;
- $at += 2;
+ $at += 2;
/*
* Find a case-insensitive match to the tag name.
@@ -1131,13 +1287,23 @@ private function skip_rcdata( $tag_name ) {
while ( $this->parse_next_attribute() ) {
continue;
}
+
$at = $this->bytes_already_parsed;
if ( $at >= strlen( $this->html ) ) {
return false;
}
- if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) {
- $this->bytes_already_parsed = $closer_potentially_starts_at;
+ if ( '>' === $html[ $at ] ) {
+ $this->bytes_already_parsed = $at + 1;
+ return true;
+ }
+
+ if ( $at + 1 >= strlen( $this->html ) ) {
+ return false;
+ }
+
+ if ( '/' === $html[ $at ] && '>' === $html[ $at + 1 ] ) {
+ $this->bytes_already_parsed = $at + 2;
return true;
}
}
@@ -1259,6 +1425,7 @@ private function skip_script_data() {
if ( $is_closing ) {
$this->bytes_already_parsed = $closer_potentially_starts_at;
+ $this->tag_name_starts_at = $closer_potentially_starts_at;
if ( $this->bytes_already_parsed >= $doc_length ) {
return false;
}
@@ -1274,7 +1441,7 @@ private function skip_script_data() {
}
if ( '>' === $html[ $this->bytes_already_parsed ] ) {
- $this->bytes_already_parsed = $closer_potentially_starts_at;
+ ++$this->bytes_already_parsed;
return true;
}
}
@@ -1303,17 +1470,34 @@ private function parse_next_tag() {
$html = $this->html;
$doc_length = strlen( $html );
- $at = $this->bytes_already_parsed;
+ $was_at = $this->bytes_already_parsed;
+ $at = $was_at;
- while ( false !== $at && $at < $doc_length ) {
+ while ( false !== $at && $at <= $doc_length ) {
$at = strpos( $html, '<', $at );
+ if ( $at > $was_at ) {
+ $this->parser_state = self::STATE_TEXT_NODE;
+ $this->token_starts_at = $was_at;
+ $this->token_length = $at - $was_at;
+ $this->text_starts_at = $was_at;
+ $this->text_length = $this->token_length;
+ $this->bytes_already_parsed = $at;
+ return true;
+ }
+
/*
* This does not imply an incomplete parse; it indicates that there
* can be nothing left in the document other than a #text node.
*/
if ( false === $at ) {
- return false;
+ $this->parser_state = self::STATE_TEXT_NODE;
+ $this->token_starts_at = $was_at;
+ $this->token_length = strlen( $html ) - $was_at;
+ $this->text_starts_at = $was_at;
+ $this->text_length = $this->token_length;
+ $this->bytes_already_parsed = strlen( $html );
+ return true;
}
$this->token_starts_at = $at;
@@ -1342,8 +1526,9 @@ private function parse_next_tag() {
$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
if ( $tag_name_prefix_length > 0 ) {
++$at;
- $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+ $this->parser_state = self::STATE_MATCHED_TAG;
$this->tag_name_starts_at = $at;
+ $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
$this->bytes_already_parsed = $at + $this->tag_name_length;
return true;
}
@@ -1383,8 +1568,13 @@ private function parse_next_tag() {
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
- $at = $closer_at + $span_of_dashes + 1;
- continue;
+ // @todo This could go wrong if the closer is shorter than `` because there's no inside.
+ $this->parser_state = self::STATE_COMMENT;
+ $this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 4;
+ $this->text_length = max( 0, $closer_at - $this->text_starts_at );
+ $this->bytes_already_parsed = $closer_at + $span_of_dashes + 1;
+ return true;
}
/*
@@ -1403,13 +1593,25 @@ private function parse_next_tag() {
}
if ( $closer_at + 2 < $doc_length && '>' === $html[ $closer_at + 2 ] ) {
- $at = $closer_at + 3;
- continue 2;
+ $this->parser_state = self::STATE_COMMENT;
+ $this->token_length = $closer_at + 3 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 4;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 3;
+ return true;
}
- if ( $closer_at + 3 < $doc_length && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
- $at = $closer_at + 4;
- continue 2;
+ if (
+ $closer_at + 3 < $doc_length &&
+ '!' === $html[ $closer_at + 2 ] &&
+ '>' === $html[ $closer_at + 3 ]
+ ) {
+ $this->parser_state = self::STATE_COMMENT;
+ $this->token_length = $closer_at + 4 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 4;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 4;
+ return true;
}
}
}
@@ -1436,8 +1638,12 @@ private function parse_next_tag() {
return false;
}
- $at = $closer_at + 3;
- continue;
+ $this->parser_state = self::STATE_CDATA_NODE;
+ $this->token_length = $closer_at + 4 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 9;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 3;
+ return true;
}
/*
@@ -1462,8 +1668,12 @@ private function parse_next_tag() {
return false;
}
- $at = $closer_at + 1;
- continue;
+ $this->parser_state = self::STATE_DOCTYPE;
+ $this->token_length = $closer_at + 1 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 9;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 1;
+ return true;
}
/*
@@ -1477,8 +1687,6 @@ private function parse_next_tag() {
return false;
}
-
- continue;
}
/*
@@ -1491,8 +1699,10 @@ private function parse_next_tag() {
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
- ++$at;
- continue;
+ $this->parser_state = self::STATE_PRESUMPTUOUS_TAG;
+ $this->token_length = $at + 2 - $this->token_starts_at;
+ $this->bytes_already_parsed = $at + 2;
+ return true;
}
/*
@@ -1507,8 +1717,12 @@ private function parse_next_tag() {
return false;
}
- $at = $closer_at + 1;
- continue;
+ $this->parser_state = self::STATE_DOCTYPE;
+ $this->token_length = $closer_at + 1 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 2;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 1;
+ return true;
}
/*
@@ -1530,8 +1744,12 @@ private function parse_next_tag() {
return false;
}
- $at = $closer_at + 1;
- continue;
+ $this->parser_state = self::STATE_FUNKY_COMMENT;
+ $this->token_length = $closer_at + 1 - $this->token_starts_at;
+ $this->text_starts_at = $this->token_starts_at + 2;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->bytes_already_parsed = $closer_at + 1;
+ return true;
}
++$at;
@@ -1692,6 +1910,8 @@ private function after_tag() {
$this->token_length = null;
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
+ $this->text_starts_at = 0;
+ $this->text_length = 0;
$this->is_closing_tag = null;
$this->attributes = array();
$this->duplicate_attributes = null;
@@ -2281,6 +2501,70 @@ public function is_tag_closer() {
);
}
+ public function get_node_type() {
+ switch ( $this->parser_state ) {
+ case self::STATE_MATCHED_TAG:
+ return '#tag';
+
+ case self::STATE_DOCTYPE:
+ return '#doctype';
+
+ case self::STATE_PI_NODE:
+ return '#processing-instruction';
+
+ default:
+ return $this->get_node_name();
+ }
+ }
+
+ public function get_node_name() {
+ switch ( $this->parser_state ) {
+ case self::STATE_MATCHED_TAG:
+ return $this->get_tag();
+
+ case self::STATE_TEXT_NODE:
+ return '#text';
+
+ case self::STATE_CDATA_NODE:
+ return '#cdata-section';
+
+ case self::STATE_PI_NODE:
+ // @todo add the PI tag.
+ return '?';
+
+ case self::STATE_COMMENT:
+ return '#comment';
+
+ case self::STATE_DOCTYPE:
+ return 'html';
+
+ case self::STATE_PRESUMPTUOUS_TAG:
+ return '#presumptuous-tag';
+
+ case self::STATE_FUNKY_COMMENT:
+ return '#funky-comment';
+ }
+ }
+
+ public function get_node_text() {
+ $at = $this->text_starts_at;
+ $length = $this->text_length;
+
+ if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
+ switch ( $this->get_tag() ) {
+ case 'PRE':
+ case 'TEXTAREA':
+ if ( "\n" === $this->html[ $at ] ) {
+ ++$at;
+ --$length;
+ }
+ break;
+ }
+ }
+
+ return substr( $this->html, $at, $length );
+ }
+
/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
@@ -2797,4 +3081,12 @@ private function matches() {
* @access private
*/
const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
+
+ const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
+ const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
+ const STATE_PI_NODE = 'STATE_PI_NODE';
+ const STATE_COMMENT = 'STATE_COMMENT';
+ const STATE_DOCTYPE = 'STATE_DOCTYPE';
+ const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG';
+ const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY';
}
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
index 8a681d2cb0042..355c03d941183 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -557,8 +557,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
$p = new WP_HTML_Tag_Processor( '' );
$p->next_tag();
- $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer' );
- $this->assertTrue( $p->is_tag_closer(), 'Indicated a ' );
$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer when there was no tag opener' );
@@ -566,8 +568,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
$p = new WP_HTML_Tag_Processor( '' );
$p->next_tag();
- $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer' );
- $this->assertTrue( $p->is_tag_closer(), 'Indicated a ' );
$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer when there was no tag opener' );
@@ -575,8 +579,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
$p = new WP_HTML_Tag_Processor( 'abc' );
$p->next_tag();
- $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer' );
- $this->assertTrue( $p->is_tag_closer(), 'Indicated a tag opener is a tag closer' );
+ $this->assertFalse(
+ $p->next_tag( array( 'tag_closers' => 'visit' ) ),
+ 'Should not have found closing TITLE when closing an opener.'
+ );
$p = new WP_HTML_Tag_Processor( 'abc' );
$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the tag closer when there was no tag opener' );