diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index ee6209c69e0ae..289347840d002 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -247,6 +247,86 @@
  *         }
  *     }
  *
+ * ## Tokens and finer-grained processing.
+ *
+ * >>> Stub documentation.
+ *
+ * It's also possible to scan through every lexical token in
+ * the HTML document using the `next_token()` function. This
+ * alternative form takes no argument and provides no built-in
+ * query syntax.
+ *
+ * Example:
+ *
+ *      $title        = '(untitled)';
+ *      $text_content = '';
+ *      while ( $processor->next_token() ) {
+ *          switch ( $processor->get_node_name() ) {
+ *              case '#text':
+ *                  $text .= $processor->get_node_text();
+ *                  break;
+ *
+ *              case 'HR':
+ *                  $text .= "\n";
+ *                  break;
+ *
+ *              case 'TITLE':
+ *                  $title = $processor->get_node_text();
+ *                  break;
+ *          }
+ *      }
+ *      return trim( "# {$title}\n\n{$text_content}\n" );
+ *
+ * ### Tokens and _modifiable text_.
+ *
+ * #### Special "atomic" HTML elements.
+ *
+ * Not all HTML elements are able to contain other elements inside of them.
+ * For instance, the contents inside a TITLE element are plaintext (except
+ * that character references like &amp; will be decoded). This means that
+ * if the string `<img>` appears inside a TITLE element, then it's not an
+ * image tag, but rather it's text describing an image tag. Likewise, the
+ * contents of a SCRIPT or STYLE element are handled entirely separately in
+ * a browser than the contents of other elements because they represent a
+ * different language than HTML.
+ *
+ * For these elements the Tag Processor treats the entire sequence as one,
+ * from the opening tag, including its contents, through its closing tag.
+ * This means that the it's not possible to match the closing tag for a
+ * SCRIPT element unless it's unexpected; the Tag Processor already matched
+ * it when it found the opening tag.
+ *
+ * The inner contents of these elements are that element's _modifiable text_.
+ *
+ * The special elements are:
+ *  - `SCRIPT` whose contents are treated as raw plaintext but supports a legacy
+ *    style of including Javascript inside of HTML comments to avoid accidentally
+ *    closing the SCRIPT from inside a Javascript string. E.g. `console.log( '</script>' )`.
+ *  - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any
+ *    character references are decoded. E.g. "1 &amp;lt; 2 < 3" becomes "1 < 2 < 3".
+ *  - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as
+ *    raw plaintext and left as-si. E.g. "1 &amp;lt; 2 < 3" remains "1 &amp;lt; 2 < 3".
+ *
+ * #### Other tokens with modifiable text.
+ *
+ * There are also non-elements which are atomic in nature and contain modifiable text.
+ *
+ *  - `#text` nodes, whose entire token _is_ the modifiable text.
+ *  - Comment nodes and nodes that became comments because of some syntax error. The
+ *    text for these nodes is the portion of the comment inside of the syntax. E.g. for
+ *    "&lt;!-- comment -->" the text is " comment " (note that the spaces are part of it).
+ *  - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
+ *    "&lt;![CDATA[some content]]>" the text is "some content".
+ *  - "Funky comments," which are a special case of invalid closing tags whose name is
+ *    invalid. The text for these nodes is the text that a browser would transform into
+ *    an HTML when parsing. E.g. for "&lt;/%post_author>" the text is "%post_author".
+ *
+ * And there are non-elements which are atomic in nature but have no modifiable text.
+ *  - `DOCTYPE` nodes like "&lt;DOCTYPE html>" which have no closing tag.
+ *  - XML Processing instruction nodes like "&lt;<?xml charset="utf8"?>".
+ *  - The empty end tag "&lt;</>" which is ignored in the browser and DOM but exposed
+ *    to the HTML API.
+ *
  * ## Design and limitations
  *
  * The Tag Processor is designed to linearly scan HTML documents and tokenize
@@ -320,7 +400,8 @@
  * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
  * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
  * @since 6.5.0 Pauses processor when input ends in an incomplete syntax token.
- *              Introduces "special" elements which act like void elements, e.g. STYLE.
+ *              Introduces "special" elements which act like void elements, e.g. TITLE, STYLE.
+ *              Allows scanning through all tokens and processing modifiable text, where applicable.
  */
 class WP_HTML_Tag_Processor {
 	/**
@@ -396,12 +477,18 @@ class WP_HTML_Tag_Processor {
 	/**
 	 * Specifies mode of operation of the parser at any given time.
 	 *
-	 * | State         | Meaning                                                              |
-	 * | --------------|----------------------------------------------------------------------|
-	 * | *Ready*       | The parser is ready to run.                                          |
-	 * | *Complete*    | There is nothing left to parse.                                      |
-	 * | *Incomplete*  | The HTML ended in the middle of a token; nothing more can be parsed. |
-	 * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes.           |
+	 * | State           | Meaning                                                              |
+	 * | ----------------|----------------------------------------------------------------------|
+	 * | *Ready*         | The parser is ready to run.                                          |
+	 * | *Complete*      | There is nothing left to parse.                                      |
+	 * | *Incomplete*    | The HTML ended in the middle of a token; nothing more can be parsed. |
+	 * | *Matched tag*   | Found an HTML tag; it's possible to modify its attributes.           |
+	 * | *Text node*     | Found a #text node; this is plaintext and modifiable.                |
+	 * | *CDATA node*    | Found a CDATA section; this is modifiable.                           |
+	 * | *PI node*       | Found a Processing Instruction; this is modifiable.                  |
+	 * | *Comment*       | Found a comment or bogus comment; this is modifiable.                |
+	 * | *Presumptuous*  | Found an empty tag closer: `</>`.                                    |
+	 * | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable.     |
 	 *
 	 * @since 6.5.0
 	 *
@@ -409,6 +496,13 @@ class WP_HTML_Tag_Processor {
 	 * @see WP_HTML_Tag_Processor::STATE_COMPLETE
 	 * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE
 	 * @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
+	 * @see WP_HTML_Tag_Processor::STATE_TEXT_NODE
+	 * @see WP_HTML_Tag_Processor::STATE_CDATA_NODE
+	 * @see WP_HTML_Tag_Processor::STATE_PI_NODE
+	 * @see WP_HTML_Tag_Processor::STATE_COMMENT
+	 * @see WP_HTML_Tag_Processor::STATE_DOCTYPE
+	 * @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG
+	 * @see WP_HTML_Tag_Processor::STATE_FUNKY_COMMENT
 	 *
 	 * @var string
 	 */
@@ -490,6 +584,24 @@ class WP_HTML_Tag_Processor {
 	 */
 	private $tag_name_length;
 
+	/**
+	 * Byte offset into input document where current modifiable text starts.
+	 *
+	 * @since 6.5.0
+	 *
+	 * @var int
+	 */
+	private $text_starts_at;
+
+	/**
+	 * Byte length of modifiable text.
+	 *
+	 * @since 6.5.0
+	 *
+	 * @var string
+	 */
+	private $text_length;
+
 	/**
 	 * Whether the current tag is an opening tag, e.g. <div>, or a closing tag, e.g. </div>.
 	 *
@@ -705,8 +817,8 @@ public function next_tag( $query = null ) {
 	 * @return bool Whether a token was parsed.
 	 */
 	public function next_token() {
-		$this->get_updated_html();
 		$was_at = $this->bytes_already_parsed;
+		$this->get_updated_html();
 
 		// Don't proceed if there's nothing more to scan.
 		if (
@@ -736,6 +848,19 @@ public function next_token() {
 			return false;
 		}
 
+		/*
+		 * for legacy reasons the rest of this function handles tags and their
+		 * attributes. if the processor has reached the end of the document
+		 * or if it matched any other token then it should return here.
+		 */
+		if (
+			self::STATE_INCOMPLETE !== $this->parser_state &&
+			self::STATE_COMPLETE !== $this->parser_state &&
+			self::STATE_MATCHED_TAG !== $this->parser_state
+		) {
+			return true;
+		}
+
 		// Parse all of its attributes.
 		while ( $this->parse_next_attribute() ) {
 			continue;
@@ -762,7 +887,7 @@ public function next_token() {
 		}
 		$this->parser_state         = self::STATE_MATCHED_TAG;
 		$this->token_length         = $tag_ends_at - $this->token_starts_at;
-		$this->bytes_already_parsed = $tag_ends_at;
+		$this->bytes_already_parsed = $tag_ends_at + 1;
 
 		/*
 		 * For non-DATA sections which might contain text that looks like HTML tags but
@@ -771,8 +896,8 @@ public function next_token() {
 		 */
 		$t = $this->html[ $this->tag_name_starts_at ];
 		if (
-			! $this->is_closing_tag &&
-			(
+			$this->is_closing_tag ||
+			! (
 				'i' === $t || 'I' === $t ||
 				'n' === $t || 'N' === $t ||
 				's' === $t || 'S' === $t ||
@@ -780,38 +905,69 @@ public function next_token() {
 				'x' === $t || 'X' === $t
 			)
 		) {
-			$tag_name = $this->get_tag();
+			return true;
+		}
 
-			if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
-				$this->parser_state         = self::STATE_INCOMPLETE;
-				$this->bytes_already_parsed = $was_at;
+		$tag_name = $this->get_tag();
+		if (
+			'SCRIPT' !== $tag_name &&
+			'TEXTAREA' !== $tag_name &&
+			'TITLE' !== $tag_name &&
+			'IFRAME' !== $tag_name &&
+			'NOEMBED' !== $tag_name &&
+			'NOFRAMES' !== $tag_name &&
+			'STYLE' !== $tag_name &&
+			'XMP' !== $tag_name
+		) {
+			return true;
+		}
 
-				return false;
-			} elseif (
-				( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) &&
-				! $this->skip_rcdata( $tag_name )
-			) {
-				$this->parser_state         = self::STATE_INCOMPLETE;
-				$this->bytes_already_parsed = $was_at;
+		// Preserve the opening tag pointers.
+		$tag_name_starts_at = $this->tag_name_starts_at;
+		$tag_name_length    = $this->tag_name_length;
+		$tag_ends_at        = $this->token_starts_at + $this->token_length;
 
-				return false;
-			} elseif (
-				(
-					'IFRAME' === $tag_name ||
-					'NOEMBED' === $tag_name ||
-					'NOFRAMES' === $tag_name ||
-					'STYLE' === $tag_name ||
-					'XMP' === $tag_name
-				) &&
-				! $this->skip_rawtext( $tag_name )
-			) {
-				$this->parser_state         = self::STATE_INCOMPLETE;
-				$this->bytes_already_parsed = $was_at;
+		// Find the closing tag.
+		$found_closer = false;
+		switch ( $tag_name ) {
+			case 'SCRIPT':
+				$found_closer = $this->skip_script_data();
+				break;
 
-				return false;
-			}
+			case 'TEXTAREA':
+			case 'TITLE':
+				$found_closer = $this->skip_rcdata( $tag_name );
+				break;
+
+			case 'IFRAME':
+			case 'NOEMBED':
+			case 'NOFRAMES':
+			case 'STYLE':
+			case 'XMP':
+				$found_closer = $this->skip_rawtext( $tag_name );
+				break;
 		}
 
+		if ( ! $found_closer ) {
+			$this->parser_state         = self::STATE_INCOMPLETE;
+			$this->bytes_already_parsed = $was_at;
+			return false;
+		}
+
+		/*
+		 * The values here look like they reference the opening tag but they reference
+		 * the closing that instead. This is why the opening tag values were stored
+		 * above in a variable. It reads confusingly here, but that's because the
+		 * functions that skip the contents have moved all the internal cursors past
+		 * the inner content of the tag.
+		 */
+		$this->token_starts_at    = $was_at;
+		$this->token_length       = $this->bytes_already_parsed - $this->token_starts_at;
+		$this->text_starts_at     = $tag_ends_at + 1;
+		$this->text_length        = $this->tag_name_starts_at - $this->text_starts_at;
+		$this->tag_name_starts_at = $tag_name_starts_at;
+		$this->tag_name_length    = $tag_name_length;
+
 		return true;
 	}
 
@@ -1007,7 +1163,7 @@ public function has_class( $wanted_class ) {
 	 */
 	public function set_bookmark( $name ) {
 		// It only makes sense to set a bookmark if the parser has paused on a concrete token.
-		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
+		if ( self::STATE_INCOMPLETE === $this->parser_state ) {
 			return false;
 		}
 
@@ -1082,15 +1238,15 @@ private function skip_rcdata( $tag_name ) {
 		$at = $this->bytes_already_parsed;
 
 		while ( false !== $at && $at < $doc_length ) {
-			$at = strpos( $this->html, '</', $at );
+			$at                       = strpos( $this->html, '</', $at );
+			$this->tag_name_starts_at = $at;
 
 			// Fail if there is no possible tag closer.
 			if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
 				return false;
 			}
 
-			$closer_potentially_starts_at = $at;
-			$at                          += 2;
+			$at += 2;
 
 			/*
 			 * Find a case-insensitive match to the tag name.
@@ -1131,13 +1287,23 @@ private function skip_rcdata( $tag_name ) {
 			while ( $this->parse_next_attribute() ) {
 				continue;
 			}
+
 			$at = $this->bytes_already_parsed;
 			if ( $at >= strlen( $this->html ) ) {
 				return false;
 			}
 
-			if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) {
-				$this->bytes_already_parsed = $closer_potentially_starts_at;
+			if ( '>' === $html[ $at ] ) {
+				$this->bytes_already_parsed = $at + 1;
+				return true;
+			}
+
+			if ( $at + 1 >= strlen( $this->html ) ) {
+				return false;
+			}
+
+			if ( '/' === $html[ $at ] && '>' === $html[ $at + 1 ] ) {
+				$this->bytes_already_parsed = $at + 2;
 				return true;
 			}
 		}
@@ -1259,6 +1425,7 @@ private function skip_script_data() {
 
 			if ( $is_closing ) {
 				$this->bytes_already_parsed = $closer_potentially_starts_at;
+				$this->tag_name_starts_at   = $closer_potentially_starts_at;
 				if ( $this->bytes_already_parsed >= $doc_length ) {
 					return false;
 				}
@@ -1274,7 +1441,7 @@ private function skip_script_data() {
 				}
 
 				if ( '>' === $html[ $this->bytes_already_parsed ] ) {
-					$this->bytes_already_parsed = $closer_potentially_starts_at;
+					++$this->bytes_already_parsed;
 					return true;
 				}
 			}
@@ -1303,17 +1470,34 @@ private function parse_next_tag() {
 
 		$html       = $this->html;
 		$doc_length = strlen( $html );
-		$at         = $this->bytes_already_parsed;
+		$was_at     = $this->bytes_already_parsed;
+		$at         = $was_at;
 
-		while ( false !== $at && $at < $doc_length ) {
+		while ( false !== $at && $at <= $doc_length ) {
 			$at = strpos( $html, '<', $at );
 
+			if ( $at > $was_at ) {
+				$this->parser_state         = self::STATE_TEXT_NODE;
+				$this->token_starts_at      = $was_at;
+				$this->token_length         = $at - $was_at;
+				$this->text_starts_at       = $was_at;
+				$this->text_length          = $this->token_length;
+				$this->bytes_already_parsed = $at;
+				return true;
+			}
+
 			/*
 			 * This does not imply an incomplete parse; it indicates that there
 			 * can be nothing left in the document other than a #text node.
 			 */
 			if ( false === $at ) {
-				return false;
+				$this->parser_state         = self::STATE_TEXT_NODE;
+				$this->token_starts_at      = $was_at;
+				$this->token_length         = strlen( $html ) - $was_at;
+				$this->text_starts_at       = $was_at;
+				$this->text_length          = $this->token_length;
+				$this->bytes_already_parsed = strlen( $html );
+				return true;
 			}
 
 			$this->token_starts_at = $at;
@@ -1342,8 +1526,9 @@ private function parse_next_tag() {
 			$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
 			if ( $tag_name_prefix_length > 0 ) {
 				++$at;
-				$this->tag_name_length      = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+				$this->parser_state         = self::STATE_MATCHED_TAG;
 				$this->tag_name_starts_at   = $at;
+				$this->tag_name_length      = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
 				$this->bytes_already_parsed = $at + $this->tag_name_length;
 				return true;
 			}
@@ -1383,8 +1568,13 @@ private function parse_next_tag() {
 					// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
 					$span_of_dashes = strspn( $html, '-', $closer_at );
 					if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
-						$at = $closer_at + $span_of_dashes + 1;
-						continue;
+						// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside.
+						$this->parser_state         = self::STATE_COMMENT;
+						$this->token_length         = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
+						$this->text_starts_at       = $this->token_starts_at + 4;
+						$this->text_length          = max( 0, $closer_at - $this->text_starts_at );
+						$this->bytes_already_parsed = $closer_at + $span_of_dashes + 1;
+						return true;
 					}
 
 					/*
@@ -1403,13 +1593,25 @@ private function parse_next_tag() {
 						}
 
 						if ( $closer_at + 2 < $doc_length && '>' === $html[ $closer_at + 2 ] ) {
-							$at = $closer_at + 3;
-							continue 2;
+							$this->parser_state         = self::STATE_COMMENT;
+							$this->token_length         = $closer_at + 3 - $this->token_starts_at;
+							$this->text_starts_at       = $this->token_starts_at + 4;
+							$this->text_length          = $closer_at - $this->text_starts_at;
+							$this->bytes_already_parsed = $closer_at + 3;
+							return true;
 						}
 
-						if ( $closer_at + 3 < $doc_length && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
-							$at = $closer_at + 4;
-							continue 2;
+						if (
+							$closer_at + 3 < $doc_length &&
+							'!' === $html[ $closer_at + 2 ] &&
+							'>' === $html[ $closer_at + 3 ]
+						) {
+							$this->parser_state         = self::STATE_COMMENT;
+							$this->token_length         = $closer_at + 4 - $this->token_starts_at;
+							$this->text_starts_at       = $this->token_starts_at + 4;
+							$this->text_length          = $closer_at - $this->text_starts_at;
+							$this->bytes_already_parsed = $closer_at + 4;
+							return true;
 						}
 					}
 				}
@@ -1436,8 +1638,12 @@ private function parse_next_tag() {
 						return false;
 					}
 
-					$at = $closer_at + 3;
-					continue;
+					$this->parser_state         = self::STATE_CDATA_NODE;
+					$this->token_length         = $closer_at + 4 - $this->token_starts_at;
+					$this->text_starts_at       = $this->token_starts_at + 9;
+					$this->text_length          = $closer_at - $this->text_starts_at;
+					$this->bytes_already_parsed = $closer_at + 3;
+					return true;
 				}
 
 				/*
@@ -1462,8 +1668,12 @@ private function parse_next_tag() {
 						return false;
 					}
 
-					$at = $closer_at + 1;
-					continue;
+					$this->parser_state         = self::STATE_DOCTYPE;
+					$this->token_length         = $closer_at + 1 - $this->token_starts_at;
+					$this->text_starts_at       = $this->token_starts_at + 9;
+					$this->text_length          = $closer_at - $this->text_starts_at;
+					$this->bytes_already_parsed = $closer_at + 1;
+					return true;
 				}
 
 				/*
@@ -1477,8 +1687,6 @@ private function parse_next_tag() {
 
 					return false;
 				}
-
-				continue;
 			}
 
 			/*
@@ -1491,8 +1699,10 @@ private function parse_next_tag() {
 			 * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
 			 */
 			if ( '>' === $html[ $at + 1 ] ) {
-				++$at;
-				continue;
+				$this->parser_state         = self::STATE_PRESUMPTUOUS_TAG;
+				$this->token_length         = $at + 2 - $this->token_starts_at;
+				$this->bytes_already_parsed = $at + 2;
+				return true;
 			}
 
 			/*
@@ -1507,8 +1717,12 @@ private function parse_next_tag() {
 					return false;
 				}
 
-				$at = $closer_at + 1;
-				continue;
+				$this->parser_state         = self::STATE_DOCTYPE;
+				$this->token_length         = $closer_at + 1 - $this->token_starts_at;
+				$this->text_starts_at       = $this->token_starts_at + 2;
+				$this->text_length          = $closer_at - $this->text_starts_at;
+				$this->bytes_already_parsed = $closer_at + 1;
+				return true;
 			}
 
 			/*
@@ -1530,8 +1744,12 @@ private function parse_next_tag() {
 					return false;
 				}
 
-				$at = $closer_at + 1;
-				continue;
+				$this->parser_state         = self::STATE_FUNKY_COMMENT;
+				$this->token_length         = $closer_at + 1 - $this->token_starts_at;
+				$this->text_starts_at       = $this->token_starts_at + 2;
+				$this->text_length          = $closer_at - $this->text_starts_at;
+				$this->bytes_already_parsed = $closer_at + 1;
+				return true;
 			}
 
 			++$at;
@@ -1692,6 +1910,8 @@ private function after_tag() {
 		$this->token_length         = null;
 		$this->tag_name_starts_at   = null;
 		$this->tag_name_length      = null;
+		$this->text_starts_at       = 0;
+		$this->text_length          = 0;
 		$this->is_closing_tag       = null;
 		$this->attributes           = array();
 		$this->duplicate_attributes = null;
@@ -2281,6 +2501,70 @@ public function is_tag_closer() {
 		);
 	}
 
+	public function get_node_type() {
+		switch ( $this->parser_state ) {
+			case self::STATE_MATCHED_TAG:
+				return '#tag';
+
+			case self::STATE_DOCTYPE:
+				return '#doctype';
+
+			case self::STATE_PI_NODE:
+				return '#processing-instruction';
+
+			default:
+				return $this->get_node_name();
+		}
+	}
+
+	public function get_node_name() {
+		switch ( $this->parser_state ) {
+			case self::STATE_MATCHED_TAG:
+				return $this->get_tag();
+
+			case self::STATE_TEXT_NODE:
+				return '#text';
+
+			case self::STATE_CDATA_NODE:
+				return '#cdata-section';
+
+			case self::STATE_PI_NODE:
+				// @todo add the PI tag.
+				return '?';
+
+			case self::STATE_COMMENT:
+				return '#comment';
+
+			case self::STATE_DOCTYPE:
+				return 'html';
+
+			case self::STATE_PRESUMPTUOUS_TAG:
+				return '#presumptuous-tag';
+
+			case self::STATE_FUNKY_COMMENT:
+				return '#funky-comment';
+		}
+	}
+
+	public function get_node_text() {
+		$at     = $this->text_starts_at;
+		$length = $this->text_length;
+
+		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
+			switch ( $this->get_tag() ) {
+				case 'PRE':
+				case 'TEXTAREA':
+					if ( "\n" === $this->html[ $at ] ) {
+						++$at;
+						--$length;
+					}
+					break;
+			}
+		}
+
+		return substr( $this->html, $at, $length );
+	}
+
 	/**
 	 * Updates or creates a new attribute on the currently matched tag with the passed value.
 	 *
@@ -2797,4 +3081,12 @@ private function matches() {
 	 * @access private
 	 */
 	const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
+
+	const STATE_TEXT_NODE        = 'STATE_TEXT_NODE';
+	const STATE_CDATA_NODE       = 'STATE_CDATA_NODE';
+	const STATE_PI_NODE          = 'STATE_PI_NODE';
+	const STATE_COMMENT          = 'STATE_COMMENT';
+	const STATE_DOCTYPE          = 'STATE_DOCTYPE';
+	const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG';
+	const STATE_FUNKY_COMMENT    = 'STATE_WP_FUNKY';
 }
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
index 8a681d2cb0042..355c03d941183 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -557,8 +557,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
 		$p = new WP_HTML_Tag_Processor( '<script>abc</script>' );
 
 		$p->next_tag();
-		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </script> tag closer' );
-		$this->assertTrue( $p->is_tag_closer(), 'Indicated a <script> tag opener is a tag closer' );
+		$this->assertFalse(
+			$p->next_tag( array( 'tag_closers' => 'visit' ) ),
+			'Should not have found closing SCRIPT tag when closing an opener.'
+		);
 
 		$p = new WP_HTML_Tag_Processor( 'abc</script>' );
 		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </script> tag closer when there was no tag opener' );
@@ -566,8 +568,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
 		$p = new WP_HTML_Tag_Processor( '<textarea>abc</textarea>' );
 
 		$p->next_tag();
-		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </textarea> tag closer' );
-		$this->assertTrue( $p->is_tag_closer(), 'Indicated a <textarea> tag opener is a tag closer' );
+		$this->assertFalse(
+			$p->next_tag( array( 'tag_closers' => 'visit' ) ),
+			'Should not have found closing TEXTAREA when closing an opener.'
+		);
 
 		$p = new WP_HTML_Tag_Processor( 'abc</textarea>' );
 		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </textarea> tag closer when there was no tag opener' );
@@ -575,8 +579,10 @@ public function test_next_tag_should_stop_on_rcdata_and_script_tag_closers_when_
 		$p = new WP_HTML_Tag_Processor( '<title>abc</title>' );
 
 		$p->next_tag();
-		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </title> tag closer' );
-		$this->assertTrue( $p->is_tag_closer(), 'Indicated a <title> tag opener is a tag closer' );
+		$this->assertFalse(
+			$p->next_tag( array( 'tag_closers' => 'visit' ) ),
+			'Should not have found closing TITLE when closing an opener.'
+		);
 
 		$p = new WP_HTML_Tag_Processor( 'abc</title>' );
 		$this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Did not find the </title> tag closer when there was no tag opener' );