Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data Liberation] Move Markdown importer to a separate package #2093

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php

use WordPress\Filesystem\WP_Filesystem;

class WP_Markdown_Importer extends WP_Stream_Importer {

public static function create_for_markdown_directory( $markdown_directory, $options = array(), $cursor = null ) {
return WP_Markdown_Importer::create(
function ( $cursor = null ) use ( $markdown_directory ) {
// @TODO: Handle $cursor
return new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array (
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
);
},
$options,
$cursor
);
}

protected static function parse_options( $options ) {
if ( ! isset( $options['source_site_url'] ) ) {
_doing_it_wrong( __METHOD__, 'The source_site_url option is required.', '__WP_VERSION__' );
return false;
}
$options['default_source_site_url'] = $options['source_site_url'];

if ( ! isset( $options['local_markdown_assets_root'] ) ) {
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option is required.', '__WP_VERSION__' );
return false;
}
if ( ! is_dir( $options['local_markdown_assets_root'] ) ) {
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option must point to a directory.', '__WP_VERSION__' );
return false;
}
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );

return parent::parse_options( $options );
}

protected function rewrite_attachment_url( string $raw_url, $context_path = null ) {
/**
* For Docusaurus docs, URLs starting with `@site` are referring
* to local files. Let's convert them to file:// URLs.
*/
if (
isset( $this->options['local_markdown_assets_url_prefix'] ) &&
str_starts_with( $raw_url, $this->options['local_markdown_assets_url_prefix'] )
) {
// @TODO: Source the file from the current input stream if we can.
// This would allow stream-importing zipped Markdown and WXR directory
// structures.
// Maybe for v1 we could just support importing them from ZIP files
// that are already downloaded and available in a local directory just
// to avoid additional data transfer and the hurdle with implementing
// multiple range requests.
$relative_asset_path = substr( $raw_url, strlen( $this->options['local_markdown_assets_url_prefix'] ) );
$relative_asset_path = '/' . ltrim( $relative_asset_path, '/' );
$raw_url = (
'file://' .
$this->options['local_markdown_assets_root'] .
$relative_asset_path
);
}

return parent::rewrite_attachment_url( $raw_url, $context_path );
}

/**
* When processing Markdown, we'll download all the images
* referenced in the image tags.
*
* @TODO: Actually, should we?
* @TODO: How can we process the videos?
* @TODO: What other asset types are there?
*/
protected function url_processor_matched_asset_url( WP_Block_Markup_Url_Processor $p ) {
return (
$p->get_tag() === 'IMG' &&
$p->get_inspected_attribute_name() === 'src'
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
use League\CommonMark\Extension\Table\TableRow;
use League\CommonMark\Extension\Table\TableSection;


class WP_Markdown_To_Blocks {
class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
const STATE_READY = 'STATE_READY';
const STATE_COMPLETE = 'STATE_COMPLETE';

Expand All @@ -40,19 +39,26 @@ public function __construct( $markdown ) {
$this->markdown = $markdown;
}

public function parse() {
public function convert() {
if ( self::STATE_READY !== $this->state ) {
return false;
}
$this->convert_markdown_to_blocks();
$this->block_markup = self::convert_blocks_to_markup( $this->parsed_blocks );
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
return true;
}

public function get_frontmatter() {
public function get_all_metadata() {
return $this->frontmatter;
}

public function get_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
return null;
}
return $this->frontmatter[ $key ][0];
}

public function get_block_markup() {
return $this->block_markup;
}
Expand All @@ -74,7 +80,11 @@ private function convert_markdown_to_blocks() {
$parser = new MarkdownParser( $environment );

$document = $parser->parse( $this->markdown );
$this->frontmatter = $document->data;
$this->frontmatter = [];
foreach( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
$this->frontmatter[ $key ] = [$value];
}

$walker = $document->walker();
while ( true ) {
Expand Down Expand Up @@ -163,7 +173,7 @@ private function convert_markdown_to_blocks() {
'content' => '<pre class="wp-block-code"><code>' . trim( str_replace( "\n", '<br>', htmlspecialchars( $node->getLiteral() ) ) ) . '</code></pre>',
)
);
if ( $node->getInfo() ) {
if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) {
$this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
}
break;
Expand Down Expand Up @@ -339,35 +349,6 @@ private function convert_markdown_to_blocks() {
$this->parsed_blocks = $this->root_block->inner_blocks;
}

private static function convert_blocks_to_markup( $blocks ) {
$block_markup = '';

foreach ( $blocks as $block ) {
// Start of block comment
$comment = '<!-- -->';
$p = new WP_HTML_Tag_Processor( $comment );
$p->next_token();
$attrs = $block->attrs;
$content = $block->attrs['content'] ?? '';
unset( $attrs['content'] );
$encoded_attrs = json_encode( $attrs );
if ( $encoded_attrs === '[]' ) {
$encoded_attrs = '';
}
$p->set_modifiable_text( " wp:{$block->block_name} " . $encoded_attrs . ' ' );
$open_comment = $p->get_updated_html();

$block_markup .= $open_comment . "\n";
$block_markup .= $content . "\n";
$block_markup .= self::convert_blocks_to_markup( $block->inner_blocks );

// End of block comment
$block_markup .= "<!-- /wp:{$block->block_name} -->\n";
}

return $block_markup;
}

private function append_content( $content ) {
if ( ! isset( $this->current_block->attrs['content'] ) ) {
$this->current_block->attrs['content'] = '';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';

require_once __DIR__ . '/../vendor/autoload.php';
14 changes: 0 additions & 14 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,6 @@

require_once __DIR__ . '/src/utf8_decoder.php';

/**
* Require conditionally – these files are missing from the data-liberation-core.phar
* to reduce the bundle size (we'd need to include a large markdown parser and its
* dependencies, too).
*
* @TODO: Build a separate "data-liberation-markdown" phar file plugin with the Markdown
* importing functionality.
*/
if ( file_exists( __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php' ) ) {
require_once __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php';
require_once __DIR__ . '/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php';
require_once __DIR__ . '/src/markdown-api/WP_Markdown_HTML_Processor.php';
}

// When running in Playground, the composer autoloader script sees CLI SAPI and
// tries to use the STDERR, STDIN, and STDOUT constants.
// @TODO: Don't use the "cli" SAPI string and don't allow composer to run platform checks.
Expand Down
Loading
Loading