From 0732a6da2a7b0308df2e00767f934c0923743024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 10 Jan 2025 01:20:59 +0100 Subject: [PATCH 1/2] [Data Liberation] Introduce WP_Entity_Reader_Iterator Introduces a `new WP_Entity_Reader_Iterator( $entity_reader )` class to separate the iterator interface from the entity reader interface. ## Rationale Mixing the entity reader interface with the iterator interface is impractical, error-prone, and confusing: * Iterator methods overlap with the EntityReader methods and it's confusing to have both `next()` and `next_entity()` with different semantice. * The iterator interface expects the data to be ready on the first current() call, but the EntityReader interface assumes the first `get_current_entity()` call returns null until `next_entity()` is called * Maintaining a copy&paste `current()`, `next()` etc. methods over a number of classes is a maintenance burden ## Testing instructions TBD. I haven't tested this PR yet. There are no server-side unit tests for WP_Stream_Importer in `trunk` yet. AFAIR @zaerl you had something like that in your branch? It could come handy for testing this PR. Aside of that, manual testing in the Data Liberation plugin is appropriate. Instructions TBD. --- .../src/WP_Markdown_Importer.php | 26 +++++------ .../playground/data-liberation/bootstrap.php | 1 + .../WP_Directory_Tree_Entity_Reader.php | 43 +++++-------------- .../src/entity-readers/WP_Entity_Reader.php | 37 +--------------- .../entity-readers/WP_WXR_Entity_Reader.php | 35 --------------- .../src/import/WP_Stream_Importer.php | 20 +++++---- 6 files changed, 38 insertions(+), 124 deletions(-) diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php index 9041b562d1..b891be91d5 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php @@ -8,18 +8,20 @@ public static function create_for_markdown_directory( $markdown_directory, $opti return WP_Markdown_Importer::create( function ( $cursor = null ) use ( $markdown_directory ) { // @TODO: Handle $cursor - return new WP_Directory_Tree_Entity_Reader( - new WP_Filesystem(), - array( - 'root_dir' => $markdown_directory, - 'first_post_id' => 1, - 'allowed_extensions' => array( 'md' ), - 'index_file_patterns' => array( '#^index\.md$#' ), - 'markup_converter_factory' => function ( $content ) { - return new WP_Markdown_To_Blocks( $content ); - }, - ) - ); + return new WP_Entity_Reader_Iterator( + new WP_Directory_Tree_Entity_Reader( + new WP_Filesystem(), + array( + 'root_dir' => $markdown_directory, + 'first_post_id' => 1, + 'allowed_extensions' => array( 'md' ), + 'index_file_patterns' => array( '#^index\.md$#' ), + 'markup_converter_factory' => function ( $content ) { + return new WP_Markdown_To_Blocks( $content ); + }, + ) + ) + ); }, $options, $cursor diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index b08856e838..8926b4bfe9 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -77,6 +77,7 @@ require_once __DIR__ . '/src/import/WP_Stream_Importer.php'; require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php'; require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php'; +require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader_Iterator.php'; require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php index fcbcd70133..f4d4fdaa30 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php @@ -8,16 +8,16 @@ * * @TODO: Explore supporting a cursor to allow resuming from where we left off. */ -class WP_Directory_Tree_Entity_Reader implements \Iterator { +class WP_Directory_Tree_Entity_Reader extends WP_Entity_Reader { private $file_visitor; private $filesystem; private $entity; + private $is_finished = false; private $pending_directory_index; private $pending_files = array(); private $parent_ids = array(); private $next_post_id; - private $is_finished = false; private $entities_read_so_far = 0; private $allowed_extensions = array(); private $index_file_patterns = array(); @@ -154,6 +154,14 @@ public function next_entity() { return false; } + public function is_finished(): bool { + return $this->is_finished; + } + + public function get_last_error(): ?string { + return null; + } + public function get_entity(): ?\WP_Imported_Entity { return $this->entity; } @@ -308,35 +316,4 @@ protected function is_valid_file( $path ) { return in_array( $extension, $this->allowed_extensions, true ); } - /** - * @TODO: Either implement this method, or introduce a concept of - * reentrant and non-reentrant entity readers. - */ - public function get_reentrancy_cursor() { - return ''; - } - - public function current(): mixed { - if ( null === $this->entity && ! $this->is_finished ) { - $this->next(); - } - return $this->get_entity(); - } - - public function next(): void { - $this->next_entity(); - } - - public function key(): int { - return $this->entities_read_so_far - 1; - } - - public function valid(): bool { - return ! $this->is_finished; - } - - public function rewind(): void { - // @TODO: Either implement this method, or formalize the fact that - // entity readers are not rewindable. - } } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php index a45017fd0f..237906bc61 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php @@ -7,7 +7,7 @@ * The reader implements Iterator so you can easily loop through entities: * foreach ($reader as $entity) { ... } */ -abstract class WP_Entity_Reader implements \Iterator { +abstract class WP_Entity_Reader { /** * Gets the current entity being processed. @@ -57,39 +57,4 @@ public function get_reentrancy_cursor() { return ''; } - // The iterator interface: - - public function current(): object { - if ( null === $this->get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) { - $this->next(); - } - return $this->get_entity(); - } - - private $last_next_result = null; - public function next(): void { - // @TODO: Don't keep track of this. Just make sure the next_entity() - // call will make the is_finished() true. - $this->last_next_result = $this->next_entity(); - } - - public function key(): string { - return $this->get_reentrancy_cursor(); - } - - public function valid(): bool { - return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error(); - } - - public function rewind(): void { - // Haven't started yet. - if ( null === $this->last_next_result ) { - return; - } - _doing_it_wrong( - __METHOD__, - 'WP_WXR_Entity_Reader does not support rewinding.', - null - ); - } } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_WXR_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_WXR_Entity_Reader.php index a8247515f2..0d0dc2ac89 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_WXR_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_WXR_Entity_Reader.php @@ -905,39 +905,4 @@ private function after_entity() { $this->last_opener_attributes = array(); } - public function current(): object { - // Lazily initialize the iterator when it is first accessed. - // The alternative is eager initialization in the constructor. - if ( null === $this->entity_data && ! $this->is_finished() && ! $this->get_last_error() ) { - $this->next(); - } - return $this->get_entity(); - } - - private $last_next_result = null; - public function next(): void { - // @TODO: Don't keep track of this. Just make sure the next_entity() - // call will make the is_finished() true. - $this->last_next_result = $this->next_entity(); - } - - public function key(): string { - return $this->get_reentrancy_cursor(); - } - - public function valid(): bool { - return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error(); - } - - public function rewind(): void { - // Haven't started yet. - if ( null === $this->last_next_result ) { - return; - } - _doing_it_wrong( - __METHOD__, - 'WP_WXR_Reader does not support rewinding.', - null - ); - } } diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 77ebd09c00..928820db62 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -132,7 +132,9 @@ class WP_Stream_Importer { public static function create_for_wxr_file( $wxr_path, $options = array(), $cursor = null ) { return static::create( function ( $cursor = null ) use ( $wxr_path ) { - return WP_WXR_Entity_Reader::create( WP_File_Reader::create( $wxr_path ), $cursor ); + return new WP_Entity_Reader_Iterator( + WP_WXR_Entity_Reader::create( WP_File_Reader::create( $wxr_path ), $cursor ) + ); }, $options, $cursor @@ -142,7 +144,9 @@ function ( $cursor = null ) use ( $wxr_path ) { public static function create_for_wxr_url( $wxr_url, $options = array(), $cursor = null ) { return static::create( function ( $cursor = null ) use ( $wxr_url ) { - return WP_WXR_Entity_Reader::create( new WP_Remote_File_Reader( $wxr_url ), $cursor ); + return new WP_Entity_Reader_Iterator( + WP_WXR_Entity_Reader::create( new WP_Remote_File_Reader( $wxr_url ), $cursor ) + ); }, $options, $cursor @@ -240,11 +244,11 @@ public function get_reentrancy_cursor() { protected static function parse_options( $options ) { if ( ! isset( $options['new_site_url'] ) ) { - $options['new_site_url'] = get_site_url(); + // $options['new_site_url'] = get_site_url(); } if ( ! isset( $options['uploads_path'] ) ) { - $options['uploads_path'] = wp_get_upload_dir()['basedir']; + // $options['uploads_path'] = wp_get_upload_dir()['basedir']; } // Remove the trailing slash to make concatenation easier later. $options['uploads_path'] = rtrim( $options['uploads_path'], '/' ); @@ -435,7 +439,7 @@ protected function index_next_entities( $count = 10000 ) { $this->entity_iterator->next(); } - $this->resume_at_entity = $this->entity_iterator->get_reentrancy_cursor(); + $this->resume_at_entity = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor(); return true; } @@ -570,7 +574,7 @@ protected function frontload_next_entity() { * and enqueue them for download. */ $entity = $this->entity_iterator->current(); - $cursor = $this->entity_iterator->get_reentrancy_cursor(); + $cursor = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor(); $this->active_downloads[ $cursor ] = array(); $data = $entity->get_data(); @@ -715,7 +719,7 @@ protected function import_next_entity() { /** * @TODO: Update the progress information. */ - $this->resume_at_entity = $this->entity_iterator->get_reentrancy_cursor(); + $this->resume_at_entity = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor(); $this->entity_iterator->next(); return true; } @@ -744,7 +748,7 @@ protected function enqueue_attachment_download( string $raw_url, $options = arra return false; } - $entity_cursor = $this->entity_iterator->get_reentrancy_cursor(); + $entity_cursor = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor(); $this->active_downloads[ $entity_cursor ][ $raw_url ] = true; return true; } From ecfc8caa55027bbc6363a91cd113a27fb20c2163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 10 Jan 2025 15:10:54 +0100 Subject: [PATCH 2/2] Add the missing iterator class --- .../WP_Entity_Reader_Iterator.php | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader_Iterator.php diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader_Iterator.php b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader_Iterator.php new file mode 100644 index 0000000000..83779b1169 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader_Iterator.php @@ -0,0 +1,65 @@ +entity_reader = $entity_reader; + } + + public function get_entity_reader() { + return $this->entity_reader; + } + + #[\ReturnTypeWillChange] + public function current() { + $this->ensure_initialized(); + return $this->entity_reader->get_entity(); + } + + #[\ReturnTypeWillChange] + public function next() { + $this->ensure_initialized(); + $this->advance_to_next_entity(); + } + + #[\ReturnTypeWillChange] + public function key() { + $this->ensure_initialized(); + return $this->key; + } + + #[\ReturnTypeWillChange] + public function valid() { + $this->ensure_initialized(); + return ! $this->entity_reader->is_finished(); + } + + #[\ReturnTypeWillChange] + public function rewind() { + throw new Data_Liberation_Exception( 'WP_Entity_Reader_Iterator does not support rewinding.' ); + } + + private function ensure_initialized() { + if ( ! $this->is_initialized ) { + $this->is_initialized = true; + $this->advance_to_next_entity(); + } + } + + private function advance_to_next_entity() { + if ( $this->entity_reader->next_entity() ) { + $this->key++; + } + } + +} \ No newline at end of file