diff --git a/CHANGELOG.md b/CHANGELOG.md index 745fa7c..b4a3dbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,24 @@ + # Changelog All notable changes to `WP Block Converter` will be documented in this file. +## 1.5.0 + +### Added + +- Add collection of attachments that are created during the conversion process. + Two methods now exist on the converter to help: + - `get_created_attachment_ids()` - Returns the attachment IDs that were created. + - `assign_parent_to_attachments()` - Assigns a parent post to all attachments. + +### Changed + +- Nested images are now properly sideloaded and converted to image blocks where + appropriate. For example, an image inside a figure tag will now be converted + to an image block. An image within a paragraph tag will be sideloaded but + won't be converted to an image block. + ## 1.4.0 - Drops support for PHP 8.0. diff --git a/README.md b/README.md index eaba953..dd28bd4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # WP Block Converter -[![Coding Standards](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/coding-standards.yml/badge.svg)](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/coding-standards.yml) -[![Testing Suite](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/unit-test.yml/badge.svg)](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/unit-test.yml) +[![Testing Suite](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/all-pr-tests.yml/badge.svg)](https://github.com/alleyinteractive/wp-block-converter/actions/workflows/all-pr-tests.yml) Convert HTML into Gutenberg Blocks with PHP @@ -13,9 +12,9 @@ You can install the package via Composer: composer require alleyinteractive/wp-block-converter ``` -This project is built to be used in a WordPress environment, so it is -recommended to use this package in a WordPress plugin or theme. Using it in -isolation is not supported at this time. +This project is built to be used in a WordPress environment, so it is recommended to use this +package in a WordPress plugin or theme. Using it in isolation is not supported at this time. This +package does not use any NPM library such as `@wordpress/blocks` to convert HTML to blocks. ## Usage @@ -26,7 +25,7 @@ use Alley\WP\Block_Converter\Block_Converter; $converter = new Block_Converter( '

Some HTML

' ); -$blocks = $converter->convert(); +$blocks = $converter->convert(); // Returns a string of converted blocks. ``` ### Filtering the Blocks @@ -61,6 +60,24 @@ add_filter( 'wp_block_converter_document_html', function( string $blocks, \DOMNo }, 10, 2 ); ``` +### Attachment Parents + +When converting HTML to blocks, you may need to attach the images that were +sideloaded to a post parent. After the HTML is converted to blocks, you can get +the attachment IDs that were created or simply attach them to a post. + +```php +$converter = new Block_Converter( '

Some HTML

' ); +$blocks = $converter->convert(); + +// Get the attachment IDs that were created. +$attachment_ids = $converter->get_created_attachment_ids(); + +// Attach the images to a post. +$parent_id = 123; +$converter->assign_parent_to_attachments( $parent_id ); +``` + ### Extending the Converter with Macros You can extend the converter with macros to add custom tags that are not yet diff --git a/src/class-block-converter.php b/src/class-block-converter.php index 37ad774..bb2ca84 100644 --- a/src/class-block-converter.php +++ b/src/class-block-converter.php @@ -13,21 +13,32 @@ use DOMNode; use Exception; use Mantle\Support\Traits\Macroable; +use RuntimeException; +use Throwable; /** * Converts a DOMDocument to Gutenberg block HTML. + * + * Mirrors the `htmlToBlocks()`/`rawHandler()` from the `@wordpress/blocks` package. + * + * @todo Improve logging to not silently fail when importing images. */ class Block_Converter { - use Macroable { + use Concerns\Listens_For_Attachments, Macroable { __call as macro_call; } /** * Setup the class. * + * @throws RuntimeException If WordPress is not loaded. + * * @param string $html The HTML to parse. */ public function __construct( public string $html ) { + if ( ! function_exists( 'do_action' ) ) { + throw new RuntimeException( 'WordPress must be loaded to use the Block_Converter class.' ); + } } /** @@ -36,6 +47,8 @@ public function __construct( public string $html ) { * @return string The HTML. */ public function convert(): string { + $this->listen_for_attachment_creation(); + // Get tags from the html. $content = static::get_node_tag_from_html( $this->html ); @@ -51,24 +64,8 @@ public function convert(): string { continue; } - /** - * Hook to allow output customizations. - * - * @since 1.0.0 - * - * @param Block|null $block The generated block object. - * @param DOMNode $node The node being converted. - */ - $tag_block = apply_filters( 'wp_block_converter_block', $this->{$node->nodeName}( $node ), $node ); - - // Bail early if is empty. - if ( empty( $tag_block ) ) { - continue; - } - // Merge the block into the HTML collection. - - $html[] = $this->minify_block( (string) $tag_block ); + $html[] = $this->minify_block( (string) $this->convert_node( $node ) ); } $html = implode( "\n\n", $html ); @@ -84,7 +81,103 @@ public function convert(): string { * @param string $html HTML converted into Gutenberg blocks. * @param DOMNodeList $content The original DOMNodeList. */ - return trim( (string) apply_filters( 'wp_block_converter_document_html', $html, $content ) ); + $html = trim( (string) apply_filters( 'wp_block_converter_document_html', $html, $content ) ); + + $this->detach_attachment_creation_listener(); + + return $html; + } + + /** + * Convert a node to a block. + * + * @param DOMNode $node The node to convert. + * @return Block|null + */ + protected function convert_node( DOMNode $node ): ?Block { + if ( '#text' === $node->nodeName ) { + return null; + } + + /** + * Hook to allow output customizations. + * + * @since 1.0.0 + * + * @param Block|null $block The generated block object. + * @param DOMNode $node The node being converted. + */ + $block = apply_filters( 'wp_block_converter_block', $this->{$node->nodeName}( $node ), $node ); + + if ( ! $block || ! $block instanceof Block ) { + return null; + } + + return $block; + } + + /** + * Sideload any child images of a DOMNode and replace the src with the new URL. + * + * @param DOMNode $node The node. + * @return DOMNode + */ + protected function sideload_child_images( DOMNode $node ): void { + $children = $node->childNodes; + + if ( ! $children->length ) { + return; + } + + foreach ( $children as $child_node ) { + // Skip if the node is not an image or is not an instance of DOMElement. + if ( 'img' !== $child_node->nodeName || ! $child_node instanceof DOMElement ) { + // Recursively sideload images in child nodes. + if ( $child_node->hasChildNodes() ) { + $this->sideload_child_images( $child_node ); + } + + continue; + } + + $src = $child_node->getAttribute( 'src' ); + + if ( empty( $src ) ) { + continue; + } + + try { + $previous_src = $src; + $src = $this->upload_image( $src, $child_node->getAttribute( 'alt' ) ); + + if ( $src ) { + $child_node->setAttribute( 'src', $src ); + + // Remove any srcset attributes. + if ( $child_node->hasAttribute( 'srcset' ) ) { + $child_node->removeAttribute( 'srcset' ); + } + + // Update the parent node with the new link if the parent + // node is an anchor. + if ( 'a' === $node->nodeName && $previous_src === $node->getAttribute( 'href' ) ) { + $node->setAttribute( 'href', $src ); + } + + /** + * Fires after a child image has been sideloaded. + * + * @since 1.5.0 + * + * @param string $src The image source URL. + * @param DOMNode $child_node The child node. + */ + do_action( 'wp_block_converter_sideloaded_image', $src, $child_node ); + } + } catch ( Throwable ) { // phpcs:ignore Squiz.Commenting.EmptyCatchComment.Missing, Generic.CodeAnalysis.EmptyStatement.DetectedCatch + // Do nothing. + } + } } /** @@ -106,6 +199,7 @@ public function __call( $name, $arguments ): ?Block { 'blockquote' => $this->blockquote( $arguments[0] ), 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' => $this->h( $arguments[0] ), 'p', 'a', 'abbr', 'b', 'code', 'em', 'i', 'strong', 'sub', 'sup', 'span', 'u' => $this->p( $arguments[0] ), + 'figure' => $this->figure( $arguments[0] ), 'br', 'cite', 'source' => null, 'hr' => $this->separator(), default => $this->html( $arguments[0] ), @@ -201,8 +295,16 @@ protected function blockquote( DOMNode $node ): ?Block { * @return Block|null */ protected function p( DOMNode $node ): ?Block { + if ( $this->is_anchor_wrapped_image( $node ) ) { + return $this->img( $node ); + } + + $this->sideload_child_images( $node ); + $content = static::get_node_html( $node ); + // TODO: Account for Twitter/Facebook embeds being inline links in + // content and not full embeds. if ( ! empty( filter_var( $node->textContent, FILTER_VALIDATE_URL ) ) ) { if ( \str_contains( $node->textContent, '//x.com' ) || \str_contains( $node->textContent, '//www.x.com' ) ) { $node->textContent = str_replace( 'x.com', 'twitter.com', $node->textContent ); @@ -234,6 +336,85 @@ protected function p( DOMNode $node ): ?Block { ); } + /** + * Create figure blocks. + * + * This method only supports converting a
block that has either a + * , or
child. If the
block has other children + * the block will be converted to a HTML block. + * + * @param DOMNode $node The node. + * @return Block|null + */ + public function figure( DOMNode $node ): ?Block { + if ( $this->is_supported_figure( $node ) ) { + $this->sideload_child_images( $node ); + + // Ensure it has the "wp-block-image" class. + if ( $node instanceof DOMElement ) { + $node->setAttribute( 'class', 'wp-block-image' ); + } + + return new Block( + block_name: 'image', + content: static::get_node_html( $node ), + ); + } + + return $this->html( $node ); + } + + /** + * Check if the figure node is supported for conversion. + * + * @param DOMNode $node The node. + * @return bool + */ + protected function is_supported_figure( DOMNode $node ): bool { + $children = $node->childNodes; + + if ( ! $children->length ) { + return false; + } + + if ( $children->length > 2 ) { + return false; + } + + if ( 2 === $children->length ) { + if ( 'figcaption' !== $children->item( 1 )->nodeName ) { + return false; + } + } + + // Check if the first child is an or an with an child. + if ( 'img' === $children->item( 0 )->nodeName || $this->is_anchor_wrapped_image( $children->item( 0 ) ) ) { + return true; + } + + return false; + } + + /** + * Check if the figure node is an anchor wrapped image. + * + * @param DOMNode $node The node. + * @return bool + */ + protected function is_anchor_wrapped_image( DOMNode $node ): bool { + $children = $node->childNodes; + + if ( ! $children->length ) { + return false; + } + + if ( 1 === $children->length && 'img' === $children->item( 0 )->nodeName ) { + return true; + } + + return false; + } + /** * Create ul blocks. * @@ -241,6 +422,8 @@ protected function p( DOMNode $node ): ?Block { * @return Block */ protected function ul( DOMNode $node ): Block { + $this->sideload_child_images( $node ); + return new Block( block_name: 'list', content: static::get_node_html( $node ), @@ -248,7 +431,12 @@ protected function ul( DOMNode $node ): Block { } /** - * Create img blocks. + * Create img block. + * + * Supports being passed a element that is a or a parent element that + * contains an . If it is passed a parent element that contains an + * tag, the resulting block will preserve the parent element and wrap + * it in a
tag. * * @param DOMElement|DOMNode $element The node. * @return Block|null @@ -258,16 +446,36 @@ protected function img( DOMElement|DOMNode $element ): ?Block { return null; } - $image_src = $element->getAttribute( 'data-srcset' ); - $alt = $element->getAttribute( 'alt' ); + // If the element passed isn't an attempt to find it from the children. + if ( 'img' !== $element->nodeName ) { + $image_node = $element->getElementsByTagName( 'img' )->item( 0 ); - if ( empty( $image_src ) && ! empty( $element->getAttribute( 'src' ) ) ) { - $image_src = $element->getAttribute( 'src' ); + // Bail early if the image node is not found. + if ( ! $image_node || ! $image_node instanceof DOMElement ) { + return null; + } + } else { + $image_node = $element; + } + + $image_src = $image_node->getAttribute( 'data-srcset' ); + $alt = $image_node->getAttribute( 'alt' ); + + if ( empty( $image_src ) && ! empty( $image_node->getAttribute( 'src' ) ) ) { + $image_src = $image_node->getAttribute( 'src' ); } try { $image_src = $this->upload_image( $image_src, $alt ); - } catch ( Exception $e ) { + + // Update the image src attribute. + $image_node->setAttribute( 'src', $image_src ); + + // Remove any srcset attributes. + if ( $image_node->hasAttribute( 'srcset' ) ) { + $image_node->removeAttribute( 'srcset' ); + } + } catch ( Exception ) { return null; } @@ -278,9 +486,8 @@ protected function img( DOMElement|DOMNode $element ): ?Block { return new Block( block_name: 'image', content: sprintf( - '
%s
', - esc_url( $image_src ), - esc_attr( $alt ), + '
%s
', + static::get_node_html( $element ), ), ); } @@ -292,6 +499,8 @@ protected function img( DOMElement|DOMNode $element ): ?Block { * @return Block */ protected function ol( DOMNode $node ): Block { + $this->sideload_child_images( $node ); + return new Block( block_name: 'list', attributes: [ @@ -379,7 +588,7 @@ protected function instagram_embed( string $url ): Block { * Create Instagram embed blocks. * * @param string $url The URL. - * @return Blockx + * @return Block */ protected function facebook_embed( string $url ): Block { $atts = [ @@ -421,6 +630,8 @@ protected function separator(): Block { * @return Block|null */ protected function html( DOMNode $node ): ?Block { + $this->sideload_child_images( $node ); + // Get the raw HTML. $html = static::get_node_html( $node ); @@ -437,6 +648,11 @@ protected function html( DOMNode $node ): ?Block { /** * Get nodes from a specific tag. * + * **Note:** This method converts the node to HTML and then gets the nodes. + * It cannot be use for DOMNode object modification. + * + * @deprecated Not used by the library. Will be removed in a future release. + * * @param DOMNode $node The current DOMNode. * @param string $tag The tag to search for. * @return DOMNodeList The raw HTML. diff --git a/src/concerns/trait-listens-for-attachments.php b/src/concerns/trait-listens-for-attachments.php new file mode 100644 index 0000000..88f1e5a --- /dev/null +++ b/src/concerns/trait-listens-for-attachments.php @@ -0,0 +1,68 @@ + + */ + protected array $created_attachment_ids = []; + + /** + * Retrieve the attachment IDs created during the conversion. + * + * @return array + */ + public function get_created_attachment_ids(): array { + return $this->created_attachment_ids; + } + + /** + * Assign a parent post ID to the created attachments. + * + * @param int $parent_post_id Parent post ID. + */ + public function assign_parent_to_attachments( int $parent_post_id ): void { + foreach ( $this->get_created_attachment_ids() as $attachment_id ) { + wp_update_post( [ + 'ID' => $attachment_id, + 'post_parent' => $parent_post_id, + ] ); + } + } + + /** + * Listen for the creation of attachments. + */ + public function listen_for_attachment_creation(): void { + $this->created_attachment_ids = []; + + add_action( 'add_attachment', [ $this, 'track_attachment_creation' ] ); + } + + /** + * Detach the attachment creation listener. + */ + public function detach_attachment_creation_listener(): void { + remove_action( 'add_attachment', [ $this, 'track_attachment_creation' ] ); + } + + /** + * Track the creation of an attachment. + * + * @param int $attachment_id Attachment ID. + */ + public function track_attachment_creation( int $attachment_id ): void { + $this->created_attachment_ids[] = $attachment_id; + } +} diff --git a/src/helpers.php b/src/helpers.php index 448ac84..5d0cba3 100644 --- a/src/helpers.php +++ b/src/helpers.php @@ -71,14 +71,16 @@ function create_or_get_attachment_from_url( string $src, array $args = [], strin update_post_meta( $attachment_id, $meta_key, $src ); $postarr = [ - 'post_content' => $args['description'] ?? null, - 'post_excerpt' => $args['caption'] ?? null, - 'post_title' => $args['title'] ?? null, - 'meta_input' => array_merge( - (array) ( $args['meta'] ?? [] ), - [ - '_wp_attachment_image_alt' => $args['alt'] ?? null, - ], + 'post_content' => $args['description'] ?? '', + 'post_excerpt' => $args['caption'] ?? '', + 'post_title' => $args['title'] ?? '', + 'meta_input' => array_filter( + array_merge( + (array) ( $args['meta'] ?? [] ), + [ + '_wp_attachment_image_alt' => $args['alt'] ?? null, + ], + ), ), ]; diff --git a/tests/feature/BlockConverterTest.php b/tests/feature/BlockConverterTest.php index 14ad854..d3b38e5 100644 --- a/tests/feature/BlockConverterTest.php +++ b/tests/feature/BlockConverterTest.php @@ -12,17 +12,17 @@ use DOMNode; use Mantle\Testing\Concerns\Prevent_Remote_Requests; use Mantle\Testkit\Test_Case; +use Mantle\Support\Str; +use Mantle\Testing\Concerns\Refresh_Database; use PHPUnit\Framework\Attributes\DataProvider; -use PHPUnit\Framework\Attributes\Group; use function Mantle\Testing\mock_http_response; /** * Test case for Block Block_Converter Module. */ -#[Group( 'block' )] class BlockConverterTest extends Test_Case { - use Prevent_Remote_Requests; + use Prevent_Remote_Requests, Refresh_Database; protected function setUp(): void { parent::setUp(); @@ -31,6 +31,11 @@ protected function setUp(): void { 'https://publish.twitter.com/oembed?maxwidth=500&maxheight=750&url=https%3A%2F%2Ftwitter.com%2Falleyco%2Fstatus%2F1679189879086018562&dnt=1&format=json' => mock_http_response()->with_json( '{"url":"https:\/\/twitter.com\/alleyco\/status\/1679189879086018562","author_name":"Alley","author_url":"https:\/\/twitter.com\/alleyco","html":"\u003Cblockquote class=\"twitter-tweet\" data-width=\"500\" data-dnt=\"true\"\u003E\u003Cp lang=\"en\" dir=\"ltr\"\u003EWe’re a full-service digital agency with the foresight, perspective, and grit to power your brightest ideas and build solutions for your most evasive problems. Learn more about our services here:\u003Ca href=\"https:\/\/t.co\/8zZ5zP1Oyc\"\u003Ehttps:\/\/t.co\/8zZ5zP1Oyc\u003C\/a\u003E\u003C\/p\u003E— Alley (@alleyco) \u003Ca href=\"https:\/\/twitter.com\/alleyco\/status\/1679189879086018562?ref_src=twsrc%5Etfw\"\u003EJuly 12, 2023\u003C\/a\u003E\u003C\/blockquote\u003E\n\u003Cscript async src=\"https:\/\/platform.twitter.com\/widgets.js\" charset=\"utf-8\"\u003E\u003C\/script\u003E\n\n","width":500,"height":null,"type":"rich","cache_age":"3153600000","provider_name":"Twitter","provider_url":"https:\/\/twitter.com","version":"1.0"}' ), 'https://www.tiktok.com/oembed?maxwidth=500&maxheight=750&url=https%3A%2F%2Fwww.tiktok.com%2F%40atribecalledval%2Fvideo%2F7348705314746699054&dnt=1&format=json' => mock_http_response()->with_json( '{"version":"1.0","type":"video","title":"Andre 3000 performing at Luna Luna was such an incredible night. I will never forget this night. #losangeles #andre3000 #fyp #foryou #foryoupage ","author_url":"https://www.tiktok.com/@atribecalledval","author_name":"Valeria Cardona","width":"100%","height":"100%","html":"
@atribecalledval

Andre 3000 performing at Luna Luna was such an incredible night. I will never forget this night. #losangeles #andre3000 #fyp #foryou #foryoupage

♬ I swear, I Really Wanted To Make A \"Rap\" Album But This Is Literally The Way The Wind Blew Me This Time - André 3000
","thumbnail_width":576,"thumbnail_height":1024,"thumbnail_url":"https://p19-pu-sign-useast8.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/afac3ae6ea3343c890e12e3cbbca1218_1711003872?lk3s=b59d6b55&nonce=81617&refresh_token=bf81ce66fb4d648cbd499791f37a6354&x-expires=1722110400&x-signature=tpTiBYwvSXjjAEgNRU2F%2BUAz7jo%3D&shp=b59d6b55&shcp=-","provider_url":"https://www.tiktok.com","provider_name":"TikTok","author_unique_id":"atribecalledval","embed_product_id":"7348705314746699054","embed_type":"video"}' ), ] ); + + // Delete all uploaded files between tests. + $dir = wp_upload_dir(); + + shell_exec( "rm -rf {$dir['path']}/*" ); } #[DataProvider( 'converter_data_provider' )] @@ -93,6 +98,51 @@ public static function converter_data_provider() { ]; } + #[DataProvider( 'image_dataprovider' )] + public function test_image( string $html, string $expected ) { + $this->fake_request( 'https://alley.com/wp-content/uploads/2022/01/Screen-Shot-2022-01-19-at-2.51.37-PM.png' ) + ->with_file( __DIR__ . '/../fixtures/image.png' ); + + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertEquals( $expected, $block ); + + $this->assertCount( 1, $converter->get_created_attachment_ids() ); + $this->assertRequestSent( 'https://alley.com/wp-content/uploads/2022/01/Screen-Shot-2022-01-19-at-2.51.37-PM.png', 1 ); + } + + public static function image_dataprovider(): array { + $url = wp_upload_dir()['url']; + + return [ + 'image wrapped with figure/a' => [ + '
', + '
' + ], + 'image wrapped with figure/a with caption' => [ + '
Image caption
', + '
Image caption
' + ], + 'image wrapped with anchor' => [ + '', + '
', + ], + 'image wrapped with paragraph' => [ + '

Content to migrate

', + '

Content to migrate

' + ], + 'image wrapped with paragraph and anchor' => [ + '

Content to migrate

', + '

Content to migrate

' + ], + 'image not wrapped' => [ + '', + '
', + ], + ]; + } + public function test_convert_with_empty_paragraphs_of_arbitrary_length_to_block() { $arbitraryNewLines = str_repeat( "\n\r", mt_rand( 1, 1000) ); $arbitrarySpaces = str_repeat( " ", mt_rand( 1, 1000 ) ); diff --git a/tests/fixtures/image.png b/tests/fixtures/image.png new file mode 100644 index 0000000..99eb4a8 Binary files /dev/null and b/tests/fixtures/image.png differ