Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Implement set_inner_html #9

Draft
wants to merge 16 commits into
base: trunk
Choose a base branch
from
Draft
167 changes: 166 additions & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,115 @@ public static function create_full_parser( $html, $known_definite_encoding = 'UT
return $processor;
}

public function set_inner_html( ?string $html ) {
if ( $this->is_virtual() ) {
return false;
}

if ( $this->get_token_type() !== '#tag' ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( ! $this->expects_closer() ) {
return false;
}

if (
'html' !== $this->state->current_token->namespace &&
$this->state->current_token->has_self_closing_flag
) {
return false;
}

if ( null === $html ) {
$html = '';
}
if ( '' !== $html ) {
$fragment_parser = $this->spawn_fragment_parser( $html );
if (
null === $fragment_parser
) {
return false;
}

try {
$html = $fragment_parser->serialize();
} catch ( Exception $e ) {
return false;
}
}

// @todo apply modifications if there are any???

if ( ! parent::set_bookmark( 'SET_INNER_HTML: opener' ) ) {
return false;
}

if ( ! $this->seek_to_matching_closer() ) {
parent::seek( 'SET_INNER_HTML: opener' );
return false;
}

if ( ! parent::set_bookmark( 'SET_INNER_HTML: closer' ) ) {
return false;
}

$inner_html_start = $this->bookmarks['SET_INNER_HTML: opener']->start + $this->bookmarks['SET_INNER_HTML: opener']->length;
$inner_html_length = $this->bookmarks['SET_INNER_HTML: closer']->start - $inner_html_start;

$this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement(
$inner_html_start,
$inner_html_length,
$html
);

parent::seek( 'SET_INNER_HTML: opener' );
parent::release_bookmark( 'SET_INNER_HTML: opener' );
parent::release_bookmark( 'SET_INNER_HTML: closer' );

// @todo check for whether that html will make a mess!
// Will it break out of tags?

return true;
}

public function seek_to_matching_closer(): bool {
$tag_name = $this->get_tag();

if ( null === $tag_name ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( ! $this->expects_closer() ) {
return false;
}

$breadcrumbs = $this->breadcrumbs;
array_pop( $breadcrumbs );

// @todo Can't use these queries together
while ( $this->next_tag(
array(
'tag_name' => $this->get_tag(),
'tag_closers' => 'visit',
)
) ) {
if ( $this->get_breadcrumbs() === $breadcrumbs ) {
return true;
}
}
return false;
}


/**
* Constructor.
*
Expand Down Expand Up @@ -424,6 +533,61 @@ function ( WP_HTML_Token $token ): void {
};
}

/**
* Creates a fragment processor with the current node as its context element.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
*
* @param string $html Input HTML fragment to process.
* @return static|null The created processor if successful, otherwise null.
*/
public function spawn_fragment_parser( string $html ): ?self {
if ( $this->get_token_type() !== '#tag' ) {
return null;
}

$namespace = $this->get_namespace();

/*
* Prevent creating fragments at "self-contained" nodes.
*
* @see https://github.com/WordPress/wordpress-develop/pull/7141
* @see https://github.com/WordPress/wordpress-develop/pull/7198
*/
if (
'html' === $namespace &&
in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
) {
return null;
}

$fragment_processor = self::create_fragment( $html );
$fragment_processor->compat_mode = $this->compat_mode;

$fragment_processor->context_node = clone $this->state->current_token;
$fragment_processor->context_node->bookmark_name = 'context-node';
$fragment_processor->context_node->on_destroy = null;

$context_element = array( $fragment_processor->context_node->node_name, array() );
foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) {
$context_element[1][ $name ] = $value;
}

$fragment_processor->breadcrumbs = array();

if ( 'TEMPLATE' === $context_element[0] ) {
$fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
}

$fragment_processor->reset_insertion_mode_appropriately();

// @todo Set the parser's form element pointer.

$fragment_processor->state->encoding_confidence = 'irrelevant';

return $fragment_processor;
}

/**
* Stops the parser and terminates its execution when encountering unsupported markup.
*
Expand Down Expand Up @@ -522,6 +686,7 @@ public function get_unsupported_exception() {
* 1 for "first" tag, 3 for "third," etc.
* Defaults to first tag.
* @type string|null $class_name Tag must contain this whole class name to match.
* @type string $tag_name Tag name to match.
* @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`.
* May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`.
* }
Expand All @@ -545,7 +710,7 @@ public function next_tag( $query = null ): bool {
}

if ( is_string( $query ) ) {
$query = array( 'breadcrumbs' => array( $query ) );
$query = array( 'tag_name' => $query );
}

if ( ! is_array( $query ) ) {
Expand Down
97 changes: 97 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -863,4 +863,101 @@
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}

/**
* @ticket TBD
*
* @dataProvider data_set_inner_html
*/
public function test_set_inner_html( string $html, ?string $replacement, string $expected ) {
$processor = WP_HTML_Processor::create_fragment( $html );
while ( $processor->next_tag() ) {
if ( $processor->get_attribute( 'target' ) ) {
break;
}
}

$this->assertTrue( $processor->set_inner_html( $replacement ) );
$this->assertSame( $expected, $processor->get_updated_html() );
}

public static function data_set_inner_html() {
return array(
array(
'<div target>replace me</div>',
'with me!',
'<div target>with me!</div>',
),
array(
'<div target><div><p><a>replace me</div></div>',
'with me!',
'<div target>with me!</div>',
),
array(
'<table target><td>replace me</table>',
'<td>with me!',
'<table target><tbody><tr><td>with me!</td></tr></tbody></table>',
),
);
}

/**
* @ticket TBD
*
* @dataProvider data_set_inner_html_not_allowed
*/
public function test_set_inner_html_not_allowed( string $html, string $replacement ) {
$processor = WP_HTML_Processor::create_fragment( $html );
while ( $processor->next_tag() ) {
if ( $processor->get_attribute( 'target' ) ) {
break;
}
}
$this->assertFalse( $processor->set_inner_html( $replacement ), "Should have failed but produced: {$processor->get_updated_html()}" );
$this->assertSame( $html, $processor->get_updated_html() );
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_set_inner_html_not_allowed(): array {
return array(
'not allowed in void tags' => array(

Check warning on line 927 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 9 space(s) between "'not allowed in void tags'" and double arrow, but found 1.
'<br target>',
'anything',
),
'not allowed in self-closing tags' => array(
'<svg><text target />',
'anything',
),
'must have closing tag' => array(

Check warning on line 935 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 12 space(s) between "'must have closing tag'" and double arrow, but found 1.
'<body><div target></body>',
'anything',
),

'a in a' => array(

Check warning on line 940 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 27 space(s) between "'a in a'" and double arrow, but found 1.
'<a target></a>',
'<a>',
),
'a nested in a' => array(

Check warning on line 944 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 20 space(s) between "'a nested in a'" and double arrow, but found 1.
'<a><i><em><strong target></a>',
'<a>A cannot nest inside a',
),

'text in table' => array(

Check warning on line 949 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 20 space(s) between "'text in table'" and double arrow, but found 1.
'<table target><td>hello</table>',
'text triggers forstering - not allowed',
),
'text in thead' => array(

Check warning on line 953 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 20 space(s) between "'text in thead'" and double arrow, but found 1.
'<table><thead target><td>hello</thead>',
'text triggers forstering - not allowed',
),
'text in tr' => array(

Check warning on line 957 in tests/phpunit/tests/html-api/wpHtmlProcessor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Array double arrow not aligned correctly; expected 23 space(s) between "'text in tr'" and double arrow, but found 1.
'<table><tr target>hello</tr>',
'text triggers forstering - not allowed',
),
);
}
}
80 changes: 71 additions & 9 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,6 @@ public function data_external_html5lib_tests() {
* @return bool True if the test case should be skipped. False otherwise.
*/
private static function should_skip_test( ?string $test_context_element, string $test_name ): bool {
if ( null !== $test_context_element && 'body' !== $test_context_element ) {
return true;
}

if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
return true;
}
Expand All @@ -157,11 +153,77 @@ private static function should_skip_test( ?string $test_context_element, string
* @return string|null Tree structure of parsed HTML, if supported, else null.
*/
private static function build_tree_representation( ?string $fragment_context, string $html ) {
$processor = $fragment_context
? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
: WP_HTML_Processor::create_full_parser( $html );
if ( null === $processor ) {
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
$processor = null;
if ( $fragment_context ) {
if ( 'body' === $fragment_context ) {
$processor = WP_HTML_Processor::create_fragment( $html );
} else {

/*
* If the string of characters starts with "svg ", the context
* element is in the SVG namespace and the substring after
* "svg " is the local name. If the string of characters starts
* with "math ", the context element is in the MathML namespace
* and the substring after "math " is the local name.
* Otherwise, the context element is in the HTML namespace and
* the string is the local name.
*/
if ( str_starts_with( $fragment_context, 'svg ' ) ) {
$tag_name = substr( $fragment_context, 4 );
if ( 'svg' === $tag_name ) {
$parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><svg>' );
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><svg><{$tag_name}>" );
}
$parent_processor->next_tag( $tag_name );
} elseif ( str_starts_with( $fragment_context, 'math ' ) ) {
$tag_name = substr( $fragment_context, 5 );
if ( 'math' === $tag_name ) {
$parent_processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><math>' );
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><math><{$tag_name}>" );
}
$parent_processor->next_tag( $tag_name );
} else {
if ( in_array(
$fragment_context,
array(
'caption',
'col',
'colgroup',
'tbody',
'td',
'tfoot',
'th',
'thead',
'tr',
),
true
) ) {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><table><{$fragment_context}>" );
$parent_processor->next_tag();
} else {
$parent_processor = WP_HTML_Processor::create_full_parser( "<!DOCTYPE html><{$fragment_context}>" );
}
$parent_processor->next_tag( $fragment_context );
}
if ( null !== $parent_processor->get_unsupported_exception() ) {
throw $parent_processor->get_unsupported_exception();
}
if ( null !== $parent_processor->get_last_error() ) {
throw new Exception( $parent_processor->get_last_error() );
}
$processor = $parent_processor->spawn_fragment_parser( $html );
}

if ( null === $processor ) {
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
}
} else {
$processor = WP_HTML_Processor::create_full_parser( $html );
if ( null === $processor ) {
throw new Exception( 'Could not create a full parser.' );
}
}

/*
Expand Down
Loading