diff --git a/.eslintignore b/.eslintignore index b3ac4e5866..2f55011606 100644 --- a/.eslintignore +++ b/.eslintignore @@ -7,6 +7,7 @@ __pycache__ packages/playground/wordpress-builds/src/wordpress packages/playground/wordpress-builds/public packages/playground/sync/src/test/wp-* +packages/playground/data-liberation/tests/fixtures packages/php-wasm/node/src/test/__test* *.timestamp-1678999213403.mjs .local diff --git a/.prettierignore b/.prettierignore index 9162807152..de4d6784be 100644 --- a/.prettierignore +++ b/.prettierignore @@ -8,6 +8,7 @@ /packages/playground/wordpress-builds/build/build-assets /packages/playground/wordpress-builds/src/wordpress /packages/playground/wordpress-builds/public/ +/packages/playground/data-liberation/tests/fixtures /packages/php-wasm/node/src/test/__test* __pycache__ *.timestamp-1678999213403.mjs diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php index f63fb20c52..78918f5b4f 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php @@ -52,7 +52,7 @@ public function get_all_metadata() { return $this->frontmatter; } - public function get_meta_value( $key ) { + public function get_first_meta_value( $key ) { if ( ! array_key_exists( $key, $this->frontmatter ) ) { return null; } diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 56d10ac191..91038c1ae3 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -48,11 +48,13 @@ require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php'; require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php'; require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php'; +require_once __DIR__ . '/src/block-markup/WP_HTML_To_Blocks.php'; require_once __DIR__ . '/src/block-markup/WP_URL.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php'; require_once __DIR__ . '/src/wxr/WP_WXR_Reader.php'; +require_once __DIR__ . '/src/import/WP_Import_Utils.php'; require_once __DIR__ . '/src/import/WP_Block_Object.php'; require_once __DIR__ . '/src/import/WP_Entity_Importer.php'; require_once __DIR__ . '/src/import/WP_File_Visitor.php'; @@ -64,6 +66,8 @@ require_once __DIR__ . '/src/import/WP_Stream_Importer.php'; require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php'; require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php'; +require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; require_once __DIR__ . '/src/utf8_decoder.php'; diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 800b55f189..9646f33205 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -2,6 +2,8 @@ + tests/WPHTMLEntityReaderTests.php + tests/WPHTMLToBlocksTests.php tests/WPWXRReaderTests.php tests/WPRewriteUrlsTests.php tests/WPURLInTextProcessorTests.php diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php index e3cd04b6de..1133293296 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php @@ -1,8 +1,57 @@ Block Markup + Metadata converter. + * + * Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc. + * and convert them to WordPress posts. + */ interface WP_Block_Markup_Converter { + /** + * Converts the input document specified in the constructor to block markup. + * + * @return bool Whether the conversion was successful. + */ public function convert(); + + /** + * Gets the block markup generated by the convert() method. + * + * @return string The block markup. + */ public function get_block_markup(); + + /** + * Gets all the metadata sourced from the input document by the convert() method. + * The data format is: + * + * array( + * 'post_title' => array( 'The Name of the Wind' ), + * 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' ) + * ) + * + * Note each meta key may have multiple values. The consumer of this interface + * must account for this. + * + * @return array The metadata sourced from the input document. + */ public function get_all_metadata(); - public function get_meta_value( $key ); + + /** + * Gets the first metadata value for a given key. + * + * Example: + * + * Metadata: + * array( + * 'post_title' => array( 'The Name of the Wind' ), + * 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' ) + * ) + * + * get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'. + * + * @param string $key The metadata key. + * @return mixed The metadata value. + */ + public function get_first_meta_value( $key ); } diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 101cc63484..04785fe138 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -58,6 +58,25 @@ public function get_block_attributes() { return $this->block_attributes; } + /** + * Overwrites all the block attributes of the currently matched block + * opener. + * + * @param array $attributes The attributes to set. + * @return bool Whether the attributes were set. + */ + public function set_block_attributes( $attributes ) { + if ( '#block-comment' !== $this->get_token_type() ) { + return false; + } + if ( $this->is_block_closer() ) { + return false; + } + $this->block_attributes = $attributes; + $this->block_attributes_updated = true; + return true; + } + public function is_block_closer() { return $this->block_name !== null && $this->block_closer === true; } @@ -165,17 +184,23 @@ private function block_attribute_updates_to_modifiable_text_updates() { if ( ! $this->block_attributes_updated ) { return false; } + $encoded_attributes = json_encode( + $this->block_attributes_iterator + ? $this->block_attributes_iterator->getSubIterator( 0 )->getArrayCopy() + : $this->block_attributes, + JSON_HEX_TAG | // Convert < and > to \u003C and \u003E + JSON_HEX_AMP // Convert & to \u0026 + ); + if ( $encoded_attributes === '[]' ) { + $encoded_attributes = ''; + } else { + $encoded_attributes .= ' '; + } $this->set_modifiable_text( ' ' . - $this->block_name . ' ' . - json_encode( - $this->block_attributes_iterator - ? $this->block_attributes_iterator->getSubIterator( 0 )->getArrayCopy() - : $this->block_attributes, - JSON_HEX_TAG | // Convert < and > to \u003C and \u003E - JSON_HEX_AMP // Convert & to \u0026 - ) - . ' ' + $this->block_name . + ' ' . + $encoded_attributes ); return true; diff --git a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php new file mode 100644 index 0000000000..329e75bc39 --- /dev/null +++ b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php @@ -0,0 +1,428 @@ + + *

Hello world!

+ * + * Becomes: + * + * + *

Hello world!

+ * + * + * With the following metadata: + * + * array( + * 'post_title' => array( 'My first post' ), + * ) + */ +class WP_HTML_To_Blocks implements WP_Block_Markup_Converter { + const STATE_READY = 'STATE_READY'; + const STATE_COMPLETE = 'STATE_COMPLETE'; + + private $state = self::STATE_READY; + private $block_stack = array(); + private $html; + private $ignore_text = false; + private $in_ephemeral_paragraph = false; + private $block_markup = ''; + private $metadata = array(); + + public function __construct( $html ) { + $this->html = WP_HTML_Processor::create_fragment( $html ); + } + + /** + * @inheritDoc + */ + public function convert() { + if ( self::STATE_READY !== $this->state ) { + return false; + } + + while ( $this->html->next_token() ) { + switch ( $this->html->get_token_type() ) { + case '#text': + if ( $this->ignore_text ) { + break; + } + $this->append_html( htmlspecialchars( $this->html->get_modifiable_text() ) ); + break; + case '#tag': + $this->handle_tag(); + break; + } + } + + $this->close_ephemeral_paragraph(); + return true; + } + + /** + * @inheritDoc + */ + public function get_first_meta_value( $key ) { + if ( ! array_key_exists( $key, $this->metadata ) ) { + return null; + } + return $this->metadata[ $key ][0]; + } + + /** + * @inheritDoc + */ + public function get_all_metadata() { + return $this->metadata; + } + + /** + * @inheritDoc + */ + public function get_block_markup() { + return $this->block_markup; + } + + /** + * Converts the currently matched HTML tag to block markup + * or metadata. + */ + private function handle_tag() { + $html = $this->html; + $tag = $html->get_tag(); + $tag_lowercase = strtolower( $tag ); + + $is_opener = ! $html->is_tag_closer() && $html->expects_closer(); + $is_closer = $html->is_tag_closer(); + $is_void_tag = ! $html->expects_closer(); + $prefix = ( + $is_void_tag ? '' : ( + $is_closer ? '-' : '+' + ) + ); + $event = $prefix . $tag; + switch ( $event ) { + case 'META': + $key = $html->get_attribute( 'name' ); + $value = $html->get_attribute( 'content' ); + if ( ! array_key_exists( $key, $this->metadata ) ) { + $this->metadata[ $key ] = array(); + } + $this->metadata[ $key ][] = $value; + break; + case 'IMG': + $template = new \WP_HTML_Tag_Processor( '' ); + $template->next_tag(); + foreach ( array( 'alt', 'title', 'src' ) as $attr ) { + if ( $html->get_attribute( $attr ) ) { + $template->set_attribute( $attr, $html->get_attribute( $attr ) ); + } + } + $this->append_html( $template->get_updated_html() ); + break; + case 'INPUT': + // Insert the input tag as HTML blocks. + $this->push_block( 'html' ); + $template = new \WP_HTML_Tag_Processor( '' ); + $template->next_tag(); + $attrs = $this->html->get_attribute_names_with_prefix( '' ); + foreach ( $attrs as $attr ) { + $template->set_attribute( $attr, $this->html->get_attribute( $attr ) ); + } + $this->append_html( htmlspecialchars( $template->get_updated_html() ) ); + $this->pop_block(); + break; + case 'HR': + $this->push_block( 'separator' ); + $this->block_markup .= '
'; + $this->pop_block(); + break; + + // Block elements + case '+SCRIPT': + $this->ignore_text = true; + break; + case '-SCRIPT': + $this->ignore_text = false; + break; + + case '+UL': + case '+OL': + $this->push_block( 'list', array( 'ordered' => $tag === 'ol' ) ); + $this->block_markup .= '
    '; + break; + case '-UL': + case '-OL': + $this->block_markup .= '
'; + $this->pop_block(); + break; + + case '+LI': + $this->push_block( 'list-item' ); + $this->block_markup .= '<' . $tag_lowercase . '>'; + break; + case '-LI': + $this->block_markup .= ''; + $this->pop_block(); + break; + + case '+TABLE': + $this->push_block( 'table' ); + $this->block_markup .= '
'; + $this->block_markup .= ''; + break; + case '-TABLE': + $this->block_markup .= '
'; + $this->block_markup .= '
'; + $this->pop_block(); + break; + + case '+THEAD': + case '+TBODY': + case '+TFOOT': + case '+TR': + case '+TD': + case '+TH': + $this->block_markup .= '<' . $tag_lowercase . '>'; + break; + case '-THEAD': + case '-TBODY': + case '-TFOOT': + case '-TR': + case '-TD': + case '-TH': + $this->block_markup .= ''; + break; + + case '+BLOCKQUOTE': + $this->push_block( 'quote' ); + $this->block_markup .= '<' . $tag_lowercase . '>'; + break; + case '-BLOCKQUOTE': + $this->block_markup .= ''; + $this->pop_block(); + break; + + case '+PRE': + $this->push_block( 'code' ); + $this->block_markup .= '<' . $tag_lowercase . ' class="wp-block-code">'; + break; + case '-PRE': + $this->block_markup .= ''; + $this->pop_block(); + break; + + case '+CODE': + /* + * Guess whether this is: + * - An inline element? Let's convert it into a formatting element. + * - A block element? Let's convert it into a block. + */ + if ( $this->is_at_inline_code_element() ) { + $this->append_html( '<' . $tag_lowercase . '>' ); + } else { + $this->push_block( 'code' ); + $this->block_markup .= '<' . $tag_lowercase . ' class="wp-block-code">'; + } + break; + case '-CODE': + $this->block_markup .= ''; + if ( ! $this->is_at_inline_code_element() ) { + $this->pop_block(); + } + break; + + case '+P': + $this->push_block( 'paragraph' ); + $this->block_markup .= '

'; + break; + case '-P': + $this->block_markup .= '

'; + $this->pop_block(); + break; + + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + $this->push_block( + 'heading', + array( + 'level' => (int) $tag[1] ? (int) $tag[1] : 1, + ) + ); + $this->block_markup .= ''; + break; + case '-H1': + case '-H2': + case '-H3': + case '-H4': + case '-H5': + case '-H6': + $this->block_markup .= ''; + $this->pop_block(); + break; + + // Inline elements + case '+A': + $template = new \WP_HTML_Tag_Processor( '' ); + $template->next_tag(); + if ( $html->get_attribute( 'href' ) ) { + $template->set_attribute( 'href', $html->get_attribute( 'href' ) ); + } + $this->append_html( $template->get_updated_html() ); + break; + case '-A': + $this->block_markup .= ''; + break; + + // Formats – just pass through (minus the HTML attributes) + default: + if ( $this->should_preserve_tag_in_rich_text( $tag ) ) { + if ( $is_opener ) { + $this->append_html( '<' . $tag_lowercase . '>' ); + } elseif ( $is_closer ) { + $this->append_html( '' ); + } + } else { + /* + * Ignore all the other tags. We've included all the meaningful + * handlers in the switch statement above and there's not much + * we can do with generic tags such as
, ,
, etc. + */ + } + break; + } + } + + /** + * Checks whether the given tag is an inline formatting element + * that we want to preserve when parsing rich text. For example, + * tags are meaningful from the rich text perspective, but + *
tags are not. + * + * @param string $tag The tag to check. + * @return bool Whether the tag should be preserved in rich text. + */ + private function should_preserve_tag_in_rich_text( $tag ) { + return in_array( + $tag, + array( + 'B', + 'STRONG', + 'I', + 'U', + 'S', + 'SMALL', + 'SUP', + 'SUB', + 'MARK', + 'EM', + 'CITE', + 'DFN', + 'CODE', + 'KBD', + 'SAMP', + 'VAR', + ), + true + ); + } + + private function is_at_inline_code_element() { + $breadcrumbs = $this->html->get_breadcrumbs(); + foreach ( $breadcrumbs as $tag ) { + switch ( $tag ) { + case 'A': + case 'P': + case 'LI': + case 'TABLE': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + return true; + } + } + return false; + } + + /** + * Appends a snippet of HTML to the block markup. + * Ensures given $html is a part of a block. If no block is + * currently open, it appends a new paragraph block. + * + * @param string $html The HTML snippet to append. + */ + private function append_html( $html ) { + $html = trim( $html ); + if ( empty( $html ) ) { + return; + } + // Make sure two subsequent append_html() calls don't merge the text. + $html .= ' '; + $this->ensure_open_block(); + $this->block_markup .= $html; + } + + /** + * Pushes a new block onto the stack of open blocks and appends the block + * opener to the block markup. + * + * @param string $name The name of the block to push. + * @param array $attributes The attributes of the block to push. + */ + private function push_block( $name, $attributes = array() ) { + $this->close_ephemeral_paragraph(); + $block = new \WP_Block_Object( $name, $attributes ); + array_push( $this->block_stack, $block ); + $this->block_markup .= WP_Import_Utils::block_opener( $block->block_name, $block->attrs ) . "\n"; + } + + /** + * Pops the last block from the stack of open blocks and appends the block + * closer to the block markup. + * + * @return \WP_Block_Object The last block that was popped. + */ + private function pop_block() { + if ( ! empty( $this->block_stack ) ) { + $popped = array_pop( $this->block_stack ); + $this->block_markup .= WP_Import_Utils::block_closer( $popped->block_name ) . "\n"; + return $popped; + } + } + + /** + * Ensures that a block is open. If no block is currently open, it appends + * a new, ephemeral paragraph block that will be automatically closed + * when the next block opens OR when the HTML ends. + */ + private function ensure_open_block() { + if ( empty( $this->block_stack ) && ! $this->in_ephemeral_paragraph ) { + $this->block_markup .= WP_Import_Utils::block_opener( 'paragraph' ) . "\n"; + $this->block_markup .= '

'; + $this->in_ephemeral_paragraph = true; + } + } + + /** + * Closes the ephemeral paragraph if it is currently open. + */ + private function close_ephemeral_paragraph() { + if ( $this->in_ephemeral_paragraph ) { + $this->block_markup .= '

'; + $this->block_markup .= WP_Import_Utils::block_closer( 'paragraph' ); + $this->in_ephemeral_paragraph = false; + } + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php new file mode 100644 index 0000000000..a45017fd0f --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php @@ -0,0 +1,95 @@ +get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) { + $this->next(); + } + return $this->get_entity(); + } + + private $last_next_result = null; + public function next(): void { + // @TODO: Don't keep track of this. Just make sure the next_entity() + // call will make the is_finished() true. + $this->last_next_result = $this->next_entity(); + } + + public function key(): string { + return $this->get_reentrancy_cursor(); + } + + public function valid(): bool { + return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error(); + } + + public function rewind(): void { + // Haven't started yet. + if ( null === $this->last_next_result ) { + return; + } + _doing_it_wrong( + __METHOD__, + 'WP_WXR_Entity_Reader does not support rewinding.', + null + ); + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php new file mode 100644 index 0000000000..95923ef390 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php @@ -0,0 +1,140 @@ +html = $html; + $this->post_id = $post_id; + } + + /** + * Advances to the next entity. + * + * @return bool Whether the next entity was found. + */ + public function next_entity() { + // If we're finished, we're finished. + if ( $this->finished ) { + return false; + } + + // If we've already read some entities, skip to the next one. + if ( null !== $this->entities ) { + if ( count( $this->entities ) <= 1 ) { + $this->finished = true; + return false; + } + array_shift( $this->entities ); + return true; + } + + // We did not read any entities yet. Let's convert the HTML document into entities. + $converter = new WP_HTML_To_Blocks( $this->html ); + if ( false === $converter->convert() ) { + return false; + } + + $all_metadata = $converter->get_all_metadata(); + $post_fields = array(); + $other_metadata = array(); + foreach ( $all_metadata as $key => $values ) { + if ( in_array( $key, WP_Imported_Entity::POST_FIELDS, true ) ) { + $post_fields[ $key ] = $values[0]; + } else { + $other_metadata[ $key ] = $values[0]; + } + } + + // Emit the post entity. + $this->entities[] = new WP_Imported_Entity( + 'post', + array_merge( + $post_fields, + array( + 'post_id' => $this->post_id, + 'content' => $converter->get_block_markup(), + ) + ) + ); + + // Emit all the metadata that don't belong to the post entity. + foreach ( $other_metadata as $key => $value ) { + $this->entities[] = new WP_Imported_Entity( + 'post_meta', + array( + 'post_id' => $this->post_id, + 'meta_key' => $key, + 'meta_value' => $value, + ) + ); + } + return true; + } + + /** + * Returns the current entity. + * + * @return WP_Imported_Entity|false The current entity, or false if there are no entities left. + */ + public function get_entity() { + if ( $this->is_finished() ) { + return false; + } + return $this->entities[0]; + } + + /** + * Checks if this reader has finished yet. + * + * @return bool Whether the reader has finished. + */ + public function is_finished(): bool { + return $this->finished; + } + + /** + * Returns the last error that occurred when processing the HTML. + * + * @return string|null The last error, or null if there was no error. + */ + public function get_last_error(): ?string { + return null; + } +} diff --git a/packages/playground/data-liberation/src/import/WP_Import_Utils.php b/packages/playground/data-liberation/src/import/WP_Import_Utils.php new file mode 100644 index 0000000000..61dbf97a82 --- /dev/null +++ b/packages/playground/data-liberation/src/import/WP_Import_Utils.php @@ -0,0 +1,60 @@ +"; + $processor = new WP_Block_Markup_Processor( $template ); + $processor->next_token(); + $processor->set_block_attributes( $attrs ); + return $processor->get_updated_html(); + } + + /** + * Generates a block closer comment. + * + * @param string $block_name The name of the block. + * @return string The block closer. + */ + public static function block_closer( $block_name ) { + return ""; + } + + /** + * Convert an array of WP_Block_Object objects to HTML markup. + * + * @param array $blocks The blocks to convert to markup. + * @return string The HTML markup. + */ + public static function convert_blocks_to_markup( $blocks ) { + $block_markup = ''; + + foreach ( $blocks as $block ) { + // Allow mixing of inner blocks and content strings. + if ( is_string( $block ) ) { + $block_markup .= $block; + continue; + } + // Start of block comment + $block_markup .= self::block_opener( $block->block_name, $block->attrs ); + $block_markup .= $block->attrs['content'] ?? ''; + $block_markup .= self::convert_blocks_to_markup( $block->inner_blocks ); + $block_markup .= self::block_closer( $block->block_name ); + } + + return $block_markup; + } +} diff --git a/packages/playground/data-liberation/src/import/WP_Imported_Entity.php b/packages/playground/data-liberation/src/import/WP_Imported_Entity.php index 96c3dd3dd2..41a11e8491 100644 --- a/packages/playground/data-liberation/src/import/WP_Imported_Entity.php +++ b/packages/playground/data-liberation/src/import/WP_Imported_Entity.php @@ -1,5 +1,9 @@ + + + +

It is our pleasure to announce that WordPress 6.8 was released

+

Last week, WordPress 6.8 was released.

+HTML; + $reader = new WP_HTML_Entity_Reader( $html, 1 ); + $entities = []; + while ( $reader->next_entity() ) { + $data = $reader->get_entity()->get_data(); + if(isset($data['content'])) { + $data['content'] = $this->normalize_markup( $data['content'] ); + } + $entities[] = [ + 'type' => $reader->get_entity()->get_type(), + 'data' => $data, + ]; + } + $expected_entities = [ + [ + 'type' => 'post', + 'data' => [ + 'post_title' => 'WordPress 6.8 was released', + 'post_date' => '2024-12-16', + 'post_id' => 1, + 'content' => $this->normalize_markup(<< +

It is our pleasure to announce that WordPress 6.8 was released

+ + + +

Last week, WordPress 6.8 was released.

+ +HTML) + ] + ], + [ + 'type' => 'post_meta', + 'data' => [ + 'post_id' => 1, + 'meta_key' => 'custom_post_meta', + 'meta_value' => 'custom_post_meta_value', + ] + ], + [ + 'type' => 'post_meta', + 'data' => [ + 'post_id' => 1, + 'meta_key' => 'color_palette', + 'meta_value' => 'use_that_pretty_one', + ] + ], + ]; + $this->assertEquals( $expected_entities, $entities ); + } + + private function normalize_markup( $markup ) { + return str_replace( "\n", '', WP_HTML_Processor::create_fragment( $markup )->serialize() ); + } + +} diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php new file mode 100644 index 0000000000..41d6ba8ae8 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -0,0 +1,151 @@ + + + + + + + +

WordPress 6.8 was released

+

Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.

+HTML; + $converter = new WP_HTML_To_Blocks( $html ); + $converter->convert( $html ); + $metadata = $converter->get_all_metadata(); + $expected_metadata = [ + 'post_title' => ['WordPress 6.8 was released'], + 'post_date' => ['2024-12-16'], + 'post_modified' => ['2024-12-16'], + 'post_author' => ['1'], + 'post_author_name' => ['The WordPress Team'], + 'post_author_url' => ['https://wordpress.org'], + 'post_author_avatar' => ['https://wordpress.org/wp-content/uploads/2024/04/wordpress-logo-2024.png'], + ]; + $this->assertEquals( $expected_metadata, $metadata ); + } + + /** + * @dataProvider provider_test_conversion + */ + public function test_html_to_blocks_conversion( $html, $expected ) { + $converter = new WP_HTML_To_Blocks( $html ); + $converter->convert( $html ); + $blocks = $converter->get_block_markup(); + + $this->assertEquals( $this->normalize_markup($expected), $this->normalize_markup($blocks) ); + } + + private function normalize_markup( $markup ) { + $processor = WP_HTML_Processor::create_fragment( $markup ); + $serialized = $processor->serialize(); + $serialized = trim( + str_replace( + // Naively remove all the newlines to prevent minor formatting differences + // from causing false negatives in $expected === $actual. + "\n", + '', + $serialized + ) + ); + return $serialized; + } + + public function provider_test_conversion() { + return [ + 'A simple paragraph' => [ + 'html' => '

A simple paragraph

', + 'expected' => "

A simple paragraph

" + ], + 'A simple list' => [ + 'html' => '
  • Item 1
  • Item 2
', + 'expected' => <<
    \n
  • Item 1
  • Item 2
+HTML + ], + 'A non-normative list' => [ + 'html' => '