diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php new file mode 100644 index 0000000000..ceda2728c9 --- /dev/null +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php @@ -0,0 +1,91 @@ + $markdown_directory, + 'first_post_id' => 1, + 'allowed_extensions' => array( 'md' ), + 'index_file_patterns' => array( '#^index\.md$#' ), + 'markup_converter_factory' => function( $content ) { + return new WP_Markdown_To_Blocks( $content ); + }, + ) + ); + }, + $options, + $cursor + ); + } + + protected static function parse_options( $options ) { + if ( ! isset( $options['source_site_url'] ) ) { + _doing_it_wrong( __METHOD__, 'The source_site_url option is required.', '__WP_VERSION__' ); + return false; + } + $options['default_source_site_url'] = $options['source_site_url']; + + if ( ! isset( $options['local_markdown_assets_root'] ) ) { + _doing_it_wrong( __METHOD__, 'The markdown_assets_root option is required.', '__WP_VERSION__' ); + return false; + } + if ( ! is_dir( $options['local_markdown_assets_root'] ) ) { + _doing_it_wrong( __METHOD__, 'The markdown_assets_root option must point to a directory.', '__WP_VERSION__' ); + return false; + } + $options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' ); + + return parent::parse_options( $options ); + } + + protected function rewrite_attachment_url( string $raw_url, $context_path = null ) { + /** + * For Docusaurus docs, URLs starting with `@site` are referring + * to local files. Let's convert them to file:// URLs. + */ + if ( + isset( $this->options['local_markdown_assets_url_prefix'] ) && + str_starts_with( $raw_url, $this->options['local_markdown_assets_url_prefix'] ) + ) { + // @TODO: Source the file from the current input stream if we can. + // This would allow stream-importing zipped Markdown and WXR directory + // structures. + // Maybe for v1 we could just support importing them from ZIP files + // that are already downloaded and available in a local directory just + // to avoid additional data transfer and the hurdle with implementing + // multiple range requests. + $relative_asset_path = substr( $raw_url, strlen( $this->options['local_markdown_assets_url_prefix'] ) ); + $relative_asset_path = '/' . ltrim( $relative_asset_path, '/' ); + $raw_url = ( + 'file://' . + $this->options['local_markdown_assets_root'] . + $relative_asset_path + ); + } + + return parent::rewrite_attachment_url( $raw_url, $context_path ); + } + + /** + * When processing Markdown, we'll download all the images + * referenced in the image tags. + * + * @TODO: Actually, should we? + * @TODO: How can we process the videos? + * @TODO: What other asset types are there? + */ + protected function url_processor_matched_asset_url( WP_Block_Markup_Url_Processor $p ) { + return ( + $p->get_tag() === 'IMG' && + $p->get_inspected_attribute_name() === 'src' + ); + } +} diff --git a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php similarity index 90% rename from packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php rename to packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php index 424ebd5b8d..2316dcee61 100644 --- a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php @@ -21,8 +21,7 @@ use League\CommonMark\Extension\Table\TableRow; use League\CommonMark\Extension\Table\TableSection; - -class WP_Markdown_To_Blocks { +class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter { const STATE_READY = 'STATE_READY'; const STATE_COMPLETE = 'STATE_COMPLETE'; @@ -40,19 +39,26 @@ public function __construct( $markdown ) { $this->markdown = $markdown; } - public function parse() { + public function convert() { if ( self::STATE_READY !== $this->state ) { return false; } $this->convert_markdown_to_blocks(); - $this->block_markup = self::convert_blocks_to_markup( $this->parsed_blocks ); + $this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ); return true; } - public function get_frontmatter() { + public function get_all_metadata() { return $this->frontmatter; } + public function get_meta_value( $key ) { + if ( ! array_key_exists( $key, $this->frontmatter ) ) { + return null; + } + return $this->frontmatter[ $key ][0]; + } + public function get_block_markup() { return $this->block_markup; } @@ -74,7 +80,11 @@ private function convert_markdown_to_blocks() { $parser = new MarkdownParser( $environment ); $document = $parser->parse( $this->markdown ); - $this->frontmatter = $document->data; + $this->frontmatter = []; + foreach( $document->data as $key => $value ) { + // Use an array as a value to comply with the WP_Block_Markup_Converter interface. + $this->frontmatter[ $key ] = [$value]; + } $walker = $document->walker(); while ( true ) { @@ -163,7 +173,7 @@ private function convert_markdown_to_blocks() { 'content' => '
' . trim( str_replace( "\n", '
', htmlspecialchars( $node->getLiteral() ) ) ) . '
',
)
);
- if ( $node->getInfo() ) {
+ if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) {
$this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
}
break;
@@ -339,35 +349,6 @@ private function convert_markdown_to_blocks() {
$this->parsed_blocks = $this->root_block->inner_blocks;
}
- private static function convert_blocks_to_markup( $blocks ) {
- $block_markup = '';
-
- foreach ( $blocks as $block ) {
- // Start of block comment
- $comment = '';
- $p = new WP_HTML_Tag_Processor( $comment );
- $p->next_token();
- $attrs = $block->attrs;
- $content = $block->attrs['content'] ?? '';
- unset( $attrs['content'] );
- $encoded_attrs = json_encode( $attrs );
- if ( $encoded_attrs === '[]' ) {
- $encoded_attrs = '';
- }
- $p->set_modifiable_text( " wp:{$block->block_name} " . $encoded_attrs . ' ' );
- $open_comment = $p->get_updated_html();
-
- $block_markup .= $open_comment . "\n";
- $block_markup .= $content . "\n";
- $block_markup .= self::convert_blocks_to_markup( $block->inner_blocks );
-
- // End of block comment
- $block_markup .= "\n";
- }
-
- return $block_markup;
- }
-
private function append_content( $content ) {
if ( ! isset( $this->current_block->attrs['content'] ) ) {
$this->current_block->attrs['content'] = '';
diff --git a/packages/playground/data-liberation-markdown/src/bootstrap.php b/packages/playground/data-liberation-markdown/src/bootstrap.php
new file mode 100644
index 0000000000..f815de02b7
--- /dev/null
+++ b/packages/playground/data-liberation-markdown/src/bootstrap.php
@@ -0,0 +1,6 @@
+file_visitor = new WP_File_Visitor( realpath( $root_dir ) );
- $this->next_post_id = $first_post_id;
- }
-
- public function next_entity() {
- while ( true ) {
- if ( null !== $this->pending_directory_index ) {
- $dir = $this->file_visitor->get_event()->dir;
- $parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() - 1 ] ?? null;
-
- if ( false === $this->pending_directory_index ) {
- // No directory index candidate – let's create a fake page
- // just to have something in the page tree.
- $markdown = '';
- $source_path = $dir->getPathName();
- } else {
- $markdown = file_get_contents( $this->pending_directory_index->getRealPath() );
- $source_path = $this->pending_directory_index->getRealPath();
- }
- $post_id = $this->next_post_id;
- ++$this->next_post_id;
- ++$this->entities_read_so_far;
- $this->entity = $this->markdown_to_post_entity(
- array(
- 'markdown' => $markdown,
- 'source_path' => $source_path,
- 'post_id' => $post_id,
- 'parent_id' => $parent_id,
- 'title_fallback' => $this->slug_to_title( $dir->getFileName() ),
- )
- );
- $this->pending_directory_index = null;
- $depth = $this->file_visitor->get_current_depth();
- $this->parent_ids[ $depth ] = $post_id;
- return true;
- }
-
- while ( count( $this->pending_files ) ) {
- $parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() ] ?? null;
- $file = array_shift( $this->pending_files );
- $this->entity = $this->markdown_to_post_entity(
- array(
- 'markdown' => file_get_contents( $file->getRealPath() ),
- 'source_path' => $file->getRealPath(),
- 'post_id' => $this->next_post_id,
- 'parent_id' => $parent_id,
- 'title_fallback' => $this->slug_to_title( $file->getFileName() ),
- )
- );
- ++$this->next_post_id;
- ++$this->entities_read_so_far;
- return true;
- }
-
- if ( false === $this->next_file() ) {
- break;
- }
- }
- $this->is_finished = true;
- return false;
- }
-
- public function get_entity(): WP_Imported_Entity {
- return $this->entity;
- }
-
- protected function markdown_to_post_entity( $options ) {
- $converter = new WP_Markdown_To_Blocks( $options['markdown'] );
- $converter->parse();
- $block_markup = $converter->get_block_markup();
- $frontmatter = $converter->get_frontmatter();
-
- $removed_title = $this->remove_first_h1_block_from_block_markup( $block_markup );
- if ( false !== $removed_title ) {
- $block_markup = $removed_title['remaining_html'];
- }
-
- $post_title = '';
- if ( ! $post_title && ! empty( $removed_title['content'] ) ) {
- $post_title = $removed_title['content'];
- }
- if ( ! $post_title && ! empty( $frontmatter['title'] ) ) {
- // In WordPress Playground docs, the frontmatter title
- // is actually a worse candidate than the first H1 block
- //
- // There will, inevitably, be 10,000 ways people will want
- // to use this importer with different projects. Let's just
- // enable plugins to customize the title resolution.
- $post_title = $frontmatter['title'];
- }
- if ( ! $post_title ) {
- $post_title = $options['title_fallback'];
- }
-
- $entity_data = array(
- 'post_id' => $options['post_id'],
- 'post_type' => 'page',
- 'guid' => $options['source_path'],
- 'post_title' => $post_title,
- 'post_content' => $block_markup,
- 'post_excerpt' => $frontmatter['description'] ?? '',
- 'post_status' => 'publish',
- );
-
- /**
- * Technically `source_path` isn't a part of the WordPress post object,
- * but we need it to resolve relative URLs in the imported content.
- *
- * This path is relative to the root directory traversed by this class.
- */
- if ( ! empty( $options['source_path'] ) ) {
- $source_path = $options['source_path'];
- $root_dir = $this->file_visitor->get_root_dir();
- if ( str_starts_with( $source_path, $root_dir ) ) {
- $source_path = substr( $source_path, strlen( $root_dir ) );
- }
- $source_path = ltrim( $source_path, '/' );
- $entity_data['source_path'] = $source_path;
- }
-
- if ( ! empty( $frontmatter['slug'] ) ) {
- $slug = $frontmatter['slug'];
- $last_segment = substr( $slug, strrpos( $slug, '/' ) + 1 );
- $entity_data['post_name'] = $last_segment;
- }
-
- if ( isset( $frontmatter['sidebar_position'] ) ) {
- $entity_data['post_order'] = $frontmatter['sidebar_position'];
- }
-
- if ( $options['parent_id'] ) {
- $entity_data['post_parent'] = $options['parent_id'];
- }
- return new WP_Imported_Entity( 'post', $entity_data );
- }
-
- private function next_file() {
- $this->pending_files = array();
- $this->entity = null;
- while ( $this->file_visitor->next() ) {
- $event = $this->file_visitor->get_event();
-
- $is_root = $event->dir->getPathName() === $this->file_visitor->get_root_dir();
- if ( $is_root ) {
- continue;
- }
- if ( $event->is_exiting() ) {
- // Clean up stale IDs to save some memory when processing
- // large directory trees.
- unset( $this->parent_ids[ $event->dir->getRealPath() ] );
- continue;
- }
-
- $this->pending_files = $this->choose_relevant_files( $event->files );
- $directory_index_idx = $this->choose_directory_index( $this->pending_files );
- if ( -1 !== $directory_index_idx ) {
- $this->pending_directory_index = $this->pending_files[ $directory_index_idx ];
- unset( $this->pending_files[ $directory_index_idx ] );
- } else {
- $this->pending_directory_index = false;
- }
- return true;
- }
- return false;
- }
-
- protected function choose_directory_index( $files ) {
- foreach ( $files as $idx => $file ) {
- if ( $this->looks_like_directory_index( $file ) ) {
- return $idx;
- }
- }
- return -1;
- }
-
- protected function looks_like_directory_index( $file ) {
- return str_contains( $file->getFilename(), 'index' );
- }
-
- protected function choose_relevant_files( $files ) {
- return array_filter( $files, array( $this, 'is_valid_file' ) );
- }
-
- protected function is_valid_file( $file ) {
- return 'md' === $file->getExtension();
- }
-
- protected function slug_to_title( $filename ) {
- $name = pathinfo( $filename, PATHINFO_FILENAME );
- $name = preg_replace( '/^\d+/', '', $name );
- $name = str_replace(
- array( '-', '_' ),
- ' ',
- $name
- );
- $name = ucwords( $name );
- return $name;
- }
-
- private function remove_first_h1_block_from_block_markup( $html ) {
- $p = WP_Markdown_HTML_Processor::create_fragment( $html );
- if ( false === $p->next_tag() ) {
- return false;
- }
- if ( $p->get_tag() !== 'H1' ) {
- return false;
- }
- $depth = $p->get_current_depth();
- $title = '';
- do {
- if ( false === $p->next_token() ) {
- break;
- }
- if ( $p->get_token_type() === '#text' ) {
- $title .= $p->get_modifiable_text() . ' ';
- }
- } while ( $p->get_current_depth() > $depth );
-
- if ( ! $title ) {
- return false;
- }
-
- // Move past the closing comment
- $p->next_token();
- if ( $p->get_token_type() === '#text' ) {
- $p->next_token();
- }
- if ( $p->get_token_type() !== '#comment' ) {
- return false;
- }
-
- return array(
- 'content' => trim( $title ),
- 'remaining_html' => substr(
- $html,
- $p->get_string_index_after_current_token()
- ),
- );
- }
-
- public function current(): object {
- return $this->get_entity();
- }
-
- public function next(): void {
- $this->next_entity();
- }
-
- public function key(): int {
- return $this->entities_read_so_far - 1;
- }
-
- public function valid(): bool {
- return ! $this->is_finished;
- }
-
- public function rewind(): void {
- // noop
- }
-}
diff --git a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php b/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php
deleted file mode 100644
index afb6ec9c2a..0000000000
--- a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php
+++ /dev/null
@@ -1,19 +0,0 @@
-set_bookmark( $name );
- $bookmark = $this->bookmarks[ '_' . $name ];
- $this->release_bookmark( $name );
- return $bookmark->start + $bookmark->length;
- }
-}