From cba62418a92bc1d5640bda8f6c1ef5e0ab9da37f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?=
Date: Tue, 17 Dec 2024 12:54:43 +0100
Subject: [PATCH 1/2] [Data Liberation] Move Markdown importer to a separate
package
Moves the Markdown importer to a `data-liberation-markdown` package so
that it can be shipped as a separate `.phar` file and downloaded only
when needed.
## Testing instructions
This only moves code around. To test, confirm the CI PHP unit tests keep
working.
---
.../src/WP_Markdown_Importer.php | 91 +++++
.../src/WP_Markdown_To_Blocks.php | 381 ++++++++++++++++++
.../src/bootstrap.php | 6 +
3 files changed, 478 insertions(+)
create mode 100644 packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php
create mode 100644 packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php
create mode 100644 packages/playground/data-liberation-markdown/src/bootstrap.php
diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php
new file mode 100644
index 0000000000..ceda2728c9
--- /dev/null
+++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php
@@ -0,0 +1,91 @@
+ $markdown_directory,
+ 'first_post_id' => 1,
+ 'allowed_extensions' => array( 'md' ),
+ 'index_file_patterns' => array( '#^index\.md$#' ),
+ 'markup_converter_factory' => function( $content ) {
+ return new WP_Markdown_To_Blocks( $content );
+ },
+ )
+ );
+ },
+ $options,
+ $cursor
+ );
+ }
+
+ protected static function parse_options( $options ) {
+ if ( ! isset( $options['source_site_url'] ) ) {
+ _doing_it_wrong( __METHOD__, 'The source_site_url option is required.', '__WP_VERSION__' );
+ return false;
+ }
+ $options['default_source_site_url'] = $options['source_site_url'];
+
+ if ( ! isset( $options['local_markdown_assets_root'] ) ) {
+ _doing_it_wrong( __METHOD__, 'The markdown_assets_root option is required.', '__WP_VERSION__' );
+ return false;
+ }
+ if ( ! is_dir( $options['local_markdown_assets_root'] ) ) {
+ _doing_it_wrong( __METHOD__, 'The markdown_assets_root option must point to a directory.', '__WP_VERSION__' );
+ return false;
+ }
+ $options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );
+
+ return parent::parse_options( $options );
+ }
+
+ protected function rewrite_attachment_url( string $raw_url, $context_path = null ) {
+ /**
+ * For Docusaurus docs, URLs starting with `@site` are referring
+ * to local files. Let's convert them to file:// URLs.
+ */
+ if (
+ isset( $this->options['local_markdown_assets_url_prefix'] ) &&
+ str_starts_with( $raw_url, $this->options['local_markdown_assets_url_prefix'] )
+ ) {
+ // @TODO: Source the file from the current input stream if we can.
+ // This would allow stream-importing zipped Markdown and WXR directory
+ // structures.
+ // Maybe for v1 we could just support importing them from ZIP files
+ // that are already downloaded and available in a local directory just
+ // to avoid additional data transfer and the hurdle with implementing
+ // multiple range requests.
+ $relative_asset_path = substr( $raw_url, strlen( $this->options['local_markdown_assets_url_prefix'] ) );
+ $relative_asset_path = '/' . ltrim( $relative_asset_path, '/' );
+ $raw_url = (
+ 'file://' .
+ $this->options['local_markdown_assets_root'] .
+ $relative_asset_path
+ );
+ }
+
+ return parent::rewrite_attachment_url( $raw_url, $context_path );
+ }
+
+ /**
+ * When processing Markdown, we'll download all the images
+ * referenced in the image tags.
+ *
+ * @TODO: Actually, should we?
+ * @TODO: How can we process the videos?
+ * @TODO: What other asset types are there?
+ */
+ protected function url_processor_matched_asset_url( WP_Block_Markup_Url_Processor $p ) {
+ return (
+ $p->get_tag() === 'IMG' &&
+ $p->get_inspected_attribute_name() === 'src'
+ );
+ }
+}
diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php
new file mode 100644
index 0000000000..2316dcee61
--- /dev/null
+++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php
@@ -0,0 +1,381 @@
+ tags. Otherwise their width
+ * exceeds that of the paragraph block they're in.
+ * * Consider implementing a dedicated markdown parser – similarly how we have
+ * a small, dedicated, and fast XML, HTML, etc. parsers. It would solve for
+ * code complexity, bundle size, performance, PHP compatibility, etc.
+ */
+
+use League\CommonMark\Environment\Environment;
+use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
+use League\CommonMark\Extension\GithubFlavoredMarkdownExtension;
+use League\CommonMark\Parser\MarkdownParser;
+use League\CommonMark\Extension\CommonMark\Node\Block as ExtensionBlock;
+use League\CommonMark\Extension\CommonMark\Node\Inline as ExtensionInline;
+use League\CommonMark\Node\Block;
+use League\CommonMark\Node\Inline;
+use League\CommonMark\Extension\Table\Table;
+use League\CommonMark\Extension\Table\TableCell;
+use League\CommonMark\Extension\Table\TableRow;
+use League\CommonMark\Extension\Table\TableSection;
+
+class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
+ const STATE_READY = 'STATE_READY';
+ const STATE_COMPLETE = 'STATE_COMPLETE';
+
+ private $state = self::STATE_READY;
+ private $root_block;
+ private $block_stack = array();
+ private $current_block = null;
+
+ private $frontmatter = array();
+ private $markdown;
+ private $parsed_blocks = array();
+ private $block_markup = '';
+
+ public function __construct( $markdown ) {
+ $this->markdown = $markdown;
+ }
+
+ public function convert() {
+ if ( self::STATE_READY !== $this->state ) {
+ return false;
+ }
+ $this->convert_markdown_to_blocks();
+ $this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
+ return true;
+ }
+
+ public function get_all_metadata() {
+ return $this->frontmatter;
+ }
+
+ public function get_meta_value( $key ) {
+ if ( ! array_key_exists( $key, $this->frontmatter ) ) {
+ return null;
+ }
+ return $this->frontmatter[ $key ][0];
+ }
+
+ public function get_block_markup() {
+ return $this->block_markup;
+ }
+
+ private function convert_markdown_to_blocks() {
+ $this->root_block = $this->create_block( 'post-content' );
+ $this->block_stack[] = $this->root_block;
+ $this->current_block = $this->root_block;
+
+ $environment = new Environment( array() );
+ $environment->addExtension( new CommonMarkCoreExtension() );
+ $environment->addExtension( new GithubFlavoredMarkdownExtension() );
+ $environment->addExtension(
+ new \Webuni\FrontMatter\Markdown\FrontMatterLeagueCommonMarkExtension(
+ new \Webuni\FrontMatter\FrontMatter()
+ )
+ );
+
+ $parser = new MarkdownParser( $environment );
+
+ $document = $parser->parse( $this->markdown );
+ $this->frontmatter = [];
+ foreach( $document->data as $key => $value ) {
+ // Use an array as a value to comply with the WP_Block_Markup_Converter interface.
+ $this->frontmatter[ $key ] = [$value];
+ }
+
+ $walker = $document->walker();
+ while ( true ) {
+ $event = $walker->next();
+ if ( ! $event ) {
+ break;
+ }
+ $node = $event->getNode();
+
+ if ( $event->isEntering() ) {
+ switch ( get_class( $node ) ) {
+ case Block\Document::class:
+ // Ignore
+ break;
+
+ case ExtensionBlock\Heading::class:
+ $this->push_block(
+ 'heading',
+ array(
+ 'level' => $node->getLevel(),
+ 'content' => 'getLevel() . '>',
+ )
+ );
+ break;
+
+ case ExtensionBlock\ListBlock::class:
+ $this->push_block(
+ 'list',
+ array(
+ 'ordered' => $node->getListData()->type === 'ordered',
+ 'content' => '',
+ )
+ );
+ if ( $node->getListData()->start && $node->getListData()->start !== 1 ) {
+ $this->current_block->attrs['start'] = $node->getListData()->start;
+ }
+ break;
+
+ case ExtensionBlock\ListItem::class:
+ $this->push_block(
+ 'list-item',
+ array(
+ 'content' => '- ',
+ )
+ );
+ break;
+
+ case Table::class:
+ $this->push_block(
+ 'table',
+ array(
+ 'head' => array(),
+ 'body' => array(),
+ 'foot' => array(),
+ )
+ );
+ break;
+
+ case TableSection::class:
+ $this->push_block(
+ 'table-section',
+ array(
+ 'type' => $node->isHead() ? 'head' : 'body',
+ )
+ );
+ break;
+
+ case TableRow::class:
+ $this->push_block( 'table-row' );
+ break;
+
+ case TableCell::class:
+ /** @var TableCell $node */
+ $this->push_block( 'table-cell' );
+ break;
+
+ case ExtensionBlock\BlockQuote::class:
+ $this->push_block( 'quote' );
+ break;
+
+ case ExtensionBlock\FencedCode::class:
+ case ExtensionBlock\IndentedCode::class:
+ $this->push_block(
+ 'code',
+ array(
+ 'content' => '
' . trim( str_replace( "\n", '
', htmlspecialchars( $node->getLiteral() ) ) ) . '
',
+ )
+ );
+ if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) {
+ $this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
+ }
+ break;
+
+ case ExtensionBlock\HtmlBlock::class:
+ $this->push_block(
+ 'html',
+ array(
+ 'content' => $node->getLiteral(),
+ )
+ );
+ break;
+
+ case ExtensionBlock\ThematicBreak::class:
+ $this->push_block( 'separator' );
+ break;
+
+ case Block\Paragraph::class:
+ if ( $this->current_block->block_name === 'list-item' ) {
+ break;
+ }
+ $this->push_block(
+ 'paragraph',
+ array(
+ 'content' => '',
+ )
+ );
+ break;
+
+ case Inline\Newline::class:
+ $this->append_content( "\n" );
+ break;
+
+ case Inline\Text::class:
+ $this->append_content( $node->getLiteral() );
+ break;
+
+ case ExtensionInline\Code::class:
+ $this->append_content( '' . htmlspecialchars( $node->getLiteral() ) . '
' );
+ break;
+
+ case ExtensionInline\Strong::class:
+ $this->append_content( '' );
+ break;
+
+ case ExtensionInline\Emphasis::class:
+ $this->append_content( '' );
+ break;
+
+ case ExtensionInline\HtmlInline::class:
+ $this->append_content( htmlspecialchars( $node->getLiteral() ) );
+ break;
+
+ case ExtensionInline\Image::class:
+ $html = new WP_HTML_Tag_Processor( '
' );
+ $html->next_tag();
+ if ( $node->getUrl() ) {
+ $html->set_attribute( 'src', $node->getUrl() );
+ }
+ if ( $node->getTitle() ) {
+ $html->set_attribute( 'title', $node->getTitle() );
+ }
+ $this->append_content( $html->get_updated_html() );
+ break;
+
+ case ExtensionInline\Link::class:
+ $html = new WP_HTML_Tag_Processor( '' );
+ $html->next_tag();
+ if ( $node->getUrl() ) {
+ $html->set_attribute( 'href', $node->getUrl() );
+ }
+ if ( $node->getTitle() ) {
+ $html->set_attribute( 'title', $node->getTitle() );
+ }
+ $this->append_content( $html->get_updated_html() );
+ break;
+
+ default:
+ error_log( 'Unhandled node type: ' . get_class( $node ) );
+ return null;
+ }
+ } else {
+ switch ( get_class( $node ) ) {
+ case ExtensionBlock\ListBlock::class:
+ $this->append_content( '
' );
+ $this->pop_block();
+ break;
+ case ExtensionBlock\ListItem::class:
+ $this->append_content( '' );
+ $this->pop_block();
+ break;
+ case ExtensionBlock\Heading::class:
+ $this->append_content( 'getLevel() . '>' );
+ $this->pop_block();
+ break;
+ case ExtensionInline\Strong::class:
+ $this->append_content( '' );
+ break;
+ case ExtensionInline\Emphasis::class:
+ $this->append_content( '' );
+ break;
+ case ExtensionInline\Link::class:
+ $this->append_content( '' );
+ break;
+ case TableSection::class:
+ $table_section = $this->pop_block();
+ $type = $table_section->attrs['type'];
+ $tag = $type === 'head' ? 'th' : 'td';
+
+ $parsed_rows = array();
+ foreach ( $table_section->inner_blocks as $row ) {
+ $parsed_row = array();
+ foreach ( $row->inner_blocks as $cell ) {
+ $parsed_row[] = array(
+ 'tag' => $tag,
+ 'content' => $cell->attrs['content'] ?? '',
+ );
+ }
+ $parsed_rows[] = $parsed_row;
+ }
+
+ $table = $this->current_block;
+ if ( $type === 'head' ) {
+ $table->attrs[ $type ] = $parsed_rows[0];
+ } else {
+ $table->attrs[ $type ] = $parsed_rows;
+ }
+ $table->inner_blocks = array();
+ break;
+ case Table::class:
+ $table = '';
+ $table .= '';
+ $table .= '';
+ foreach ( $this->current_block->attrs['head'] as $cell ) {
+ $table .= '' . $cell['content'] . ' | ';
+ }
+ $table .= '
';
+ foreach ( $this->current_block->attrs['body'] as $row ) {
+ $table .= '';
+ foreach ( $row as $cell ) {
+ $table .= '' . $cell['content'] . ' | ';
+ }
+ $table .= '
';
+ }
+ $table .= '
';
+ $table .= ' ';
+ $this->current_block->attrs['content'] = $table;
+ $this->pop_block();
+ break;
+
+ case Block\Paragraph::class:
+ if ( $this->current_block->block_name === 'list-item' ) {
+ break;
+ }
+ $this->append_content( '
' );
+ $this->pop_block();
+ break;
+
+ case Inline\Text::class:
+ case Inline\Newline::class:
+ case Block\Document::class:
+ case ExtensionInline\Code::class:
+ case ExtensionInline\HtmlInline::class:
+ case ExtensionInline\Image::class:
+ // Ignore, don't pop any blocks.
+ break;
+ default:
+ $this->pop_block();
+ break;
+ }
+ }
+ }
+ $this->parsed_blocks = $this->root_block->inner_blocks;
+ }
+
+ private function append_content( $content ) {
+ if ( ! isset( $this->current_block->attrs['content'] ) ) {
+ $this->current_block->attrs['content'] = '';
+ }
+ $this->current_block->attrs['content'] .= $content;
+ }
+
+ private function push_block( $name, $attributes = array(), $inner_blocks = array() ) {
+ $block = $this->create_block( $name, $attributes, $inner_blocks );
+ $this->current_block->inner_blocks[] = $block;
+ array_push( $this->block_stack, $block );
+ $this->current_block = $block;
+ }
+
+ private function create_block( $name, $attributes = array(), $inner_blocks = array() ) {
+ return new WP_Block_Object(
+ $name,
+ $attributes,
+ $inner_blocks
+ );
+ }
+
+ private function pop_block() {
+ if ( ! empty( $this->block_stack ) ) {
+ $popped = array_pop( $this->block_stack );
+ $this->current_block = end( $this->block_stack );
+ return $popped;
+ }
+ }
+}
diff --git a/packages/playground/data-liberation-markdown/src/bootstrap.php b/packages/playground/data-liberation-markdown/src/bootstrap.php
new file mode 100644
index 0000000000..f815de02b7
--- /dev/null
+++ b/packages/playground/data-liberation-markdown/src/bootstrap.php
@@ -0,0 +1,6 @@
+
Date: Tue, 17 Dec 2024 12:58:20 +0100
Subject: [PATCH 2/2] Remove the markdown api files from the core
data-liberation library
---
.../playground/data-liberation/bootstrap.php | 14 -
.../WP_Markdown_Directory_Tree_Reader.php | 283 -------------
.../WP_Markdown_HTML_Processor.php | 19 -
.../markdown-api/WP_Markdown_To_Blocks.php | 400 ------------------
4 files changed, 716 deletions(-)
delete mode 100644 packages/playground/data-liberation/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php
delete mode 100644 packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php
delete mode 100644 packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php
diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index 8b5910c217..9c38ff0a6e 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -67,20 +67,6 @@
require_once __DIR__ . '/src/utf8_decoder.php';
-/**
- * Require conditionally – these files are missing from the data-liberation-core.phar
- * to reduce the bundle size (we'd need to include a large markdown parser and its
- * dependencies, too).
- *
- * @TODO: Build a separate "data-liberation-markdown" phar file plugin with the Markdown
- * importing functionality.
- */
-if ( file_exists( __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php' ) ) {
- require_once __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php';
- require_once __DIR__ . '/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php';
- require_once __DIR__ . '/src/markdown-api/WP_Markdown_HTML_Processor.php';
-}
-
// When running in Playground, the composer autoloader script sees CLI SAPI and
// tries to use the STDERR, STDIN, and STDOUT constants.
// @TODO: Don't use the "cli" SAPI string and don't allow composer to run platform checks.
diff --git a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php b/packages/playground/data-liberation/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php
deleted file mode 100644
index da7514eb89..0000000000
--- a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php
+++ /dev/null
@@ -1,283 +0,0 @@
-file_visitor = new WP_File_Visitor( realpath( $root_dir ) );
- $this->next_post_id = $first_post_id;
- }
-
- public function next_entity() {
- while ( true ) {
- if ( null !== $this->pending_directory_index ) {
- $dir = $this->file_visitor->get_event()->dir;
- $parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() - 1 ] ?? null;
-
- if ( false === $this->pending_directory_index ) {
- // No directory index candidate – let's create a fake page
- // just to have something in the page tree.
- $markdown = '';
- $source_path = $dir->getPathName();
- } else {
- $markdown = file_get_contents( $this->pending_directory_index->getRealPath() );
- $source_path = $this->pending_directory_index->getRealPath();
- }
- $post_id = $this->next_post_id;
- ++$this->next_post_id;
- ++$this->entities_read_so_far;
- $this->entity = $this->markdown_to_post_entity(
- array(
- 'markdown' => $markdown,
- 'source_path' => $source_path,
- 'post_id' => $post_id,
- 'parent_id' => $parent_id,
- 'title_fallback' => $this->slug_to_title( $dir->getFileName() ),
- )
- );
- $this->pending_directory_index = null;
- $depth = $this->file_visitor->get_current_depth();
- $this->parent_ids[ $depth ] = $post_id;
- return true;
- }
-
- while ( count( $this->pending_files ) ) {
- $parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() ] ?? null;
- $file = array_shift( $this->pending_files );
- $this->entity = $this->markdown_to_post_entity(
- array(
- 'markdown' => file_get_contents( $file->getRealPath() ),
- 'source_path' => $file->getRealPath(),
- 'post_id' => $this->next_post_id,
- 'parent_id' => $parent_id,
- 'title_fallback' => $this->slug_to_title( $file->getFileName() ),
- )
- );
- ++$this->next_post_id;
- ++$this->entities_read_so_far;
- return true;
- }
-
- if ( false === $this->next_file() ) {
- break;
- }
- }
- $this->is_finished = true;
- return false;
- }
-
- public function get_entity(): WP_Imported_Entity {
- return $this->entity;
- }
-
- protected function markdown_to_post_entity( $options ) {
- $converter = new WP_Markdown_To_Blocks( $options['markdown'] );
- $converter->parse();
- $block_markup = $converter->get_block_markup();
- $frontmatter = $converter->get_frontmatter();
-
- $removed_title = $this->remove_first_h1_block_from_block_markup( $block_markup );
- if ( false !== $removed_title ) {
- $block_markup = $removed_title['remaining_html'];
- }
-
- $post_title = '';
- if ( ! $post_title && ! empty( $removed_title['content'] ) ) {
- $post_title = $removed_title['content'];
- }
- if ( ! $post_title && ! empty( $frontmatter['title'] ) ) {
- // In WordPress Playground docs, the frontmatter title
- // is actually a worse candidate than the first H1 block
- //
- // There will, inevitably, be 10,000 ways people will want
- // to use this importer with different projects. Let's just
- // enable plugins to customize the title resolution.
- $post_title = $frontmatter['title'];
- }
- if ( ! $post_title ) {
- $post_title = $options['title_fallback'];
- }
-
- $entity_data = array(
- 'post_id' => $options['post_id'],
- 'post_type' => 'page',
- 'guid' => $options['source_path'],
- 'post_title' => $post_title,
- 'post_content' => $block_markup,
- 'post_excerpt' => $frontmatter['description'] ?? '',
- 'post_status' => 'publish',
- );
-
- /**
- * Technically `source_path` isn't a part of the WordPress post object,
- * but we need it to resolve relative URLs in the imported content.
- *
- * This path is relative to the root directory traversed by this class.
- */
- if ( ! empty( $options['source_path'] ) ) {
- $source_path = $options['source_path'];
- $root_dir = $this->file_visitor->get_root_dir();
- if ( str_starts_with( $source_path, $root_dir ) ) {
- $source_path = substr( $source_path, strlen( $root_dir ) );
- }
- $source_path = ltrim( $source_path, '/' );
- $entity_data['source_path'] = $source_path;
- }
-
- if ( ! empty( $frontmatter['slug'] ) ) {
- $slug = $frontmatter['slug'];
- $last_segment = substr( $slug, strrpos( $slug, '/' ) + 1 );
- $entity_data['post_name'] = $last_segment;
- }
-
- if ( isset( $frontmatter['sidebar_position'] ) ) {
- $entity_data['post_order'] = $frontmatter['sidebar_position'];
- }
-
- if ( $options['parent_id'] ) {
- $entity_data['post_parent'] = $options['parent_id'];
- }
- return new WP_Imported_Entity( 'post', $entity_data );
- }
-
- private function next_file() {
- $this->pending_files = array();
- $this->entity = null;
- while ( $this->file_visitor->next() ) {
- $event = $this->file_visitor->get_event();
-
- $is_root = $event->dir->getPathName() === $this->file_visitor->get_root_dir();
- if ( $is_root ) {
- continue;
- }
- if ( $event->is_exiting() ) {
- // Clean up stale IDs to save some memory when processing
- // large directory trees.
- unset( $this->parent_ids[ $event->dir->getRealPath() ] );
- continue;
- }
-
- $this->pending_files = $this->choose_relevant_files( $event->files );
- $directory_index_idx = $this->choose_directory_index( $this->pending_files );
- if ( -1 !== $directory_index_idx ) {
- $this->pending_directory_index = $this->pending_files[ $directory_index_idx ];
- unset( $this->pending_files[ $directory_index_idx ] );
- } else {
- $this->pending_directory_index = false;
- }
- return true;
- }
- return false;
- }
-
- protected function choose_directory_index( $files ) {
- foreach ( $files as $idx => $file ) {
- if ( $this->looks_like_directory_index( $file ) ) {
- return $idx;
- }
- }
- return -1;
- }
-
- protected function looks_like_directory_index( $file ) {
- return str_contains( $file->getFilename(), 'index' );
- }
-
- protected function choose_relevant_files( $files ) {
- return array_filter( $files, array( $this, 'is_valid_file' ) );
- }
-
- protected function is_valid_file( $file ) {
- return 'md' === $file->getExtension();
- }
-
- protected function slug_to_title( $filename ) {
- $name = pathinfo( $filename, PATHINFO_FILENAME );
- $name = preg_replace( '/^\d+/', '', $name );
- $name = str_replace(
- array( '-', '_' ),
- ' ',
- $name
- );
- $name = ucwords( $name );
- return $name;
- }
-
- private function remove_first_h1_block_from_block_markup( $html ) {
- $p = WP_Markdown_HTML_Processor::create_fragment( $html );
- if ( false === $p->next_tag() ) {
- return false;
- }
- if ( $p->get_tag() !== 'H1' ) {
- return false;
- }
- $depth = $p->get_current_depth();
- $title = '';
- do {
- if ( false === $p->next_token() ) {
- break;
- }
- if ( $p->get_token_type() === '#text' ) {
- $title .= $p->get_modifiable_text() . ' ';
- }
- } while ( $p->get_current_depth() > $depth );
-
- if ( ! $title ) {
- return false;
- }
-
- // Move past the closing comment
- $p->next_token();
- if ( $p->get_token_type() === '#text' ) {
- $p->next_token();
- }
- if ( $p->get_token_type() !== '#comment' ) {
- return false;
- }
-
- return array(
- 'content' => trim( $title ),
- 'remaining_html' => substr(
- $html,
- $p->get_string_index_after_current_token()
- ),
- );
- }
-
- public function current(): object {
- return $this->get_entity();
- }
-
- public function next(): void {
- $this->next_entity();
- }
-
- public function key(): int {
- return $this->entities_read_so_far - 1;
- }
-
- public function valid(): bool {
- return ! $this->is_finished;
- }
-
- public function rewind(): void {
- // noop
- }
-}
diff --git a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php b/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php
deleted file mode 100644
index afb6ec9c2a..0000000000
--- a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_HTML_Processor.php
+++ /dev/null
@@ -1,19 +0,0 @@
-set_bookmark( $name );
- $bookmark = $this->bookmarks[ '_' . $name ];
- $this->release_bookmark( $name );
- return $bookmark->start + $bookmark->length;
- }
-}
diff --git a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php
deleted file mode 100644
index 424ebd5b8d..0000000000
--- a/packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php
+++ /dev/null
@@ -1,400 +0,0 @@
- tags. Otherwise their width
- * exceeds that of the paragraph block they're in.
- * * Consider implementing a dedicated markdown parser – similarly how we have
- * a small, dedicated, and fast XML, HTML, etc. parsers. It would solve for
- * code complexity, bundle size, performance, PHP compatibility, etc.
- */
-
-use League\CommonMark\Environment\Environment;
-use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
-use League\CommonMark\Extension\GithubFlavoredMarkdownExtension;
-use League\CommonMark\Parser\MarkdownParser;
-use League\CommonMark\Extension\CommonMark\Node\Block as ExtensionBlock;
-use League\CommonMark\Extension\CommonMark\Node\Inline as ExtensionInline;
-use League\CommonMark\Node\Block;
-use League\CommonMark\Node\Inline;
-use League\CommonMark\Extension\Table\Table;
-use League\CommonMark\Extension\Table\TableCell;
-use League\CommonMark\Extension\Table\TableRow;
-use League\CommonMark\Extension\Table\TableSection;
-
-
-class WP_Markdown_To_Blocks {
- const STATE_READY = 'STATE_READY';
- const STATE_COMPLETE = 'STATE_COMPLETE';
-
- private $state = self::STATE_READY;
- private $root_block;
- private $block_stack = array();
- private $current_block = null;
-
- private $frontmatter = array();
- private $markdown;
- private $parsed_blocks = array();
- private $block_markup = '';
-
- public function __construct( $markdown ) {
- $this->markdown = $markdown;
- }
-
- public function parse() {
- if ( self::STATE_READY !== $this->state ) {
- return false;
- }
- $this->convert_markdown_to_blocks();
- $this->block_markup = self::convert_blocks_to_markup( $this->parsed_blocks );
- return true;
- }
-
- public function get_frontmatter() {
- return $this->frontmatter;
- }
-
- public function get_block_markup() {
- return $this->block_markup;
- }
-
- private function convert_markdown_to_blocks() {
- $this->root_block = $this->create_block( 'post-content' );
- $this->block_stack[] = $this->root_block;
- $this->current_block = $this->root_block;
-
- $environment = new Environment( array() );
- $environment->addExtension( new CommonMarkCoreExtension() );
- $environment->addExtension( new GithubFlavoredMarkdownExtension() );
- $environment->addExtension(
- new \Webuni\FrontMatter\Markdown\FrontMatterLeagueCommonMarkExtension(
- new \Webuni\FrontMatter\FrontMatter()
- )
- );
-
- $parser = new MarkdownParser( $environment );
-
- $document = $parser->parse( $this->markdown );
- $this->frontmatter = $document->data;
-
- $walker = $document->walker();
- while ( true ) {
- $event = $walker->next();
- if ( ! $event ) {
- break;
- }
- $node = $event->getNode();
-
- if ( $event->isEntering() ) {
- switch ( get_class( $node ) ) {
- case Block\Document::class:
- // Ignore
- break;
-
- case ExtensionBlock\Heading::class:
- $this->push_block(
- 'heading',
- array(
- 'level' => $node->getLevel(),
- 'content' => 'getLevel() . '>',
- )
- );
- break;
-
- case ExtensionBlock\ListBlock::class:
- $this->push_block(
- 'list',
- array(
- 'ordered' => $node->getListData()->type === 'ordered',
- 'content' => '',
- )
- );
- if ( $node->getListData()->start && $node->getListData()->start !== 1 ) {
- $this->current_block->attrs['start'] = $node->getListData()->start;
- }
- break;
-
- case ExtensionBlock\ListItem::class:
- $this->push_block(
- 'list-item',
- array(
- 'content' => '- ',
- )
- );
- break;
-
- case Table::class:
- $this->push_block(
- 'table',
- array(
- 'head' => array(),
- 'body' => array(),
- 'foot' => array(),
- )
- );
- break;
-
- case TableSection::class:
- $this->push_block(
- 'table-section',
- array(
- 'type' => $node->isHead() ? 'head' : 'body',
- )
- );
- break;
-
- case TableRow::class:
- $this->push_block( 'table-row' );
- break;
-
- case TableCell::class:
- /** @var TableCell $node */
- $this->push_block( 'table-cell' );
- break;
-
- case ExtensionBlock\BlockQuote::class:
- $this->push_block( 'quote' );
- break;
-
- case ExtensionBlock\FencedCode::class:
- case ExtensionBlock\IndentedCode::class:
- $this->push_block(
- 'code',
- array(
- 'content' => '
' . trim( str_replace( "\n", '
', htmlspecialchars( $node->getLiteral() ) ) ) . '
',
- )
- );
- if ( $node->getInfo() ) {
- $this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
- }
- break;
-
- case ExtensionBlock\HtmlBlock::class:
- $this->push_block(
- 'html',
- array(
- 'content' => $node->getLiteral(),
- )
- );
- break;
-
- case ExtensionBlock\ThematicBreak::class:
- $this->push_block( 'separator' );
- break;
-
- case Block\Paragraph::class:
- if ( $this->current_block->block_name === 'list-item' ) {
- break;
- }
- $this->push_block(
- 'paragraph',
- array(
- 'content' => '',
- )
- );
- break;
-
- case Inline\Newline::class:
- $this->append_content( "\n" );
- break;
-
- case Inline\Text::class:
- $this->append_content( $node->getLiteral() );
- break;
-
- case ExtensionInline\Code::class:
- $this->append_content( '' . htmlspecialchars( $node->getLiteral() ) . '
' );
- break;
-
- case ExtensionInline\Strong::class:
- $this->append_content( '' );
- break;
-
- case ExtensionInline\Emphasis::class:
- $this->append_content( '' );
- break;
-
- case ExtensionInline\HtmlInline::class:
- $this->append_content( htmlspecialchars( $node->getLiteral() ) );
- break;
-
- case ExtensionInline\Image::class:
- $html = new WP_HTML_Tag_Processor( '
' );
- $html->next_tag();
- if ( $node->getUrl() ) {
- $html->set_attribute( 'src', $node->getUrl() );
- }
- if ( $node->getTitle() ) {
- $html->set_attribute( 'title', $node->getTitle() );
- }
- $this->append_content( $html->get_updated_html() );
- break;
-
- case ExtensionInline\Link::class:
- $html = new WP_HTML_Tag_Processor( '' );
- $html->next_tag();
- if ( $node->getUrl() ) {
- $html->set_attribute( 'href', $node->getUrl() );
- }
- if ( $node->getTitle() ) {
- $html->set_attribute( 'title', $node->getTitle() );
- }
- $this->append_content( $html->get_updated_html() );
- break;
-
- default:
- error_log( 'Unhandled node type: ' . get_class( $node ) );
- return null;
- }
- } else {
- switch ( get_class( $node ) ) {
- case ExtensionBlock\ListBlock::class:
- $this->append_content( '
' );
- $this->pop_block();
- break;
- case ExtensionBlock\ListItem::class:
- $this->append_content( '' );
- $this->pop_block();
- break;
- case ExtensionBlock\Heading::class:
- $this->append_content( 'getLevel() . '>' );
- $this->pop_block();
- break;
- case ExtensionInline\Strong::class:
- $this->append_content( '' );
- break;
- case ExtensionInline\Emphasis::class:
- $this->append_content( '' );
- break;
- case ExtensionInline\Link::class:
- $this->append_content( '' );
- break;
- case TableSection::class:
- $table_section = $this->pop_block();
- $type = $table_section->attrs['type'];
- $tag = $type === 'head' ? 'th' : 'td';
-
- $parsed_rows = array();
- foreach ( $table_section->inner_blocks as $row ) {
- $parsed_row = array();
- foreach ( $row->inner_blocks as $cell ) {
- $parsed_row[] = array(
- 'tag' => $tag,
- 'content' => $cell->attrs['content'] ?? '',
- );
- }
- $parsed_rows[] = $parsed_row;
- }
-
- $table = $this->current_block;
- if ( $type === 'head' ) {
- $table->attrs[ $type ] = $parsed_rows[0];
- } else {
- $table->attrs[ $type ] = $parsed_rows;
- }
- $table->inner_blocks = array();
- break;
- case Table::class:
- $table = '';
- $table .= '';
- $table .= '';
- foreach ( $this->current_block->attrs['head'] as $cell ) {
- $table .= '' . $cell['content'] . ' | ';
- }
- $table .= '
';
- foreach ( $this->current_block->attrs['body'] as $row ) {
- $table .= '';
- foreach ( $row as $cell ) {
- $table .= '' . $cell['content'] . ' | ';
- }
- $table .= '
';
- }
- $table .= '
';
- $table .= ' ';
- $this->current_block->attrs['content'] = $table;
- $this->pop_block();
- break;
-
- case Block\Paragraph::class:
- if ( $this->current_block->block_name === 'list-item' ) {
- break;
- }
- $this->append_content( '' );
- $this->pop_block();
- break;
-
- case Inline\Text::class:
- case Inline\Newline::class:
- case Block\Document::class:
- case ExtensionInline\Code::class:
- case ExtensionInline\HtmlInline::class:
- case ExtensionInline\Image::class:
- // Ignore, don't pop any blocks.
- break;
- default:
- $this->pop_block();
- break;
- }
- }
- }
- $this->parsed_blocks = $this->root_block->inner_blocks;
- }
-
- private static function convert_blocks_to_markup( $blocks ) {
- $block_markup = '';
-
- foreach ( $blocks as $block ) {
- // Start of block comment
- $comment = '';
- $p = new WP_HTML_Tag_Processor( $comment );
- $p->next_token();
- $attrs = $block->attrs;
- $content = $block->attrs['content'] ?? '';
- unset( $attrs['content'] );
- $encoded_attrs = json_encode( $attrs );
- if ( $encoded_attrs === '[]' ) {
- $encoded_attrs = '';
- }
- $p->set_modifiable_text( " wp:{$block->block_name} " . $encoded_attrs . ' ' );
- $open_comment = $p->get_updated_html();
-
- $block_markup .= $open_comment . "\n";
- $block_markup .= $content . "\n";
- $block_markup .= self::convert_blocks_to_markup( $block->inner_blocks );
-
- // End of block comment
- $block_markup .= "\n";
- }
-
- return $block_markup;
- }
-
- private function append_content( $content ) {
- if ( ! isset( $this->current_block->attrs['content'] ) ) {
- $this->current_block->attrs['content'] = '';
- }
- $this->current_block->attrs['content'] .= $content;
- }
-
- private function push_block( $name, $attributes = array(), $inner_blocks = array() ) {
- $block = $this->create_block( $name, $attributes, $inner_blocks );
- $this->current_block->inner_blocks[] = $block;
- array_push( $this->block_stack, $block );
- $this->current_block = $block;
- }
-
- private function create_block( $name, $attributes = array(), $inner_blocks = array() ) {
- return new WP_Block_Object(
- $name,
- $attributes,
- $inner_blocks
- );
- }
-
- private function pop_block() {
- if ( ! empty( $this->block_stack ) ) {
- $popped = array_pop( $this->block_stack );
- $this->current_block = end( $this->block_stack );
- return $popped;
- }
- }
-}