diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0fab2fb --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.idea +vendor +composer.lock \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index d173406..d3eead8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,13 @@ language: php php: - - 7.1 + - '7.2' + - '7.3' before_script: - travis_retry composer self-update - - travis_retry composer install --no-interaction --prefer-source --dev + - travis_retry composer global require hirak/prestissimo + - travis_retry composer install --no-interaction --dev - travis_retry phpenv rehash script: diff --git a/LICENSE b/LICENSE index 2c56b57..5e7b846 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2017 Benoit POLASZEK +Copyright (c) 2019 Benoit POLASZEK Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 068ffd0..0850806 100644 --- a/README.md +++ b/README.md @@ -5,84 +5,323 @@ [![Quality Score](https://img.shields.io/scrutinizer/g/bpolaszek/bentools-etl.svg?style=flat-square)](https://scrutinizer-ci.com/g/bpolaszek/bentools-etl) [![Total Downloads](https://poser.pugx.org/bentools/etl/downloads)](https://packagist.org/packages/bentools/etl) -This **PHP 7.1+** library provides a very simple implementation of the `Extract / Transform / Load` pattern. +Okay, so you heard about the [Extract / Transform / Load](https://en.wikipedia.org/wiki/Extract,_transform,_load) pattern and you're looking for a PHP library to do the stuff. -It is heavily inspired by the [knplabs/etl](https://github.com/docteurklein/php-etl) library, with a more generic approach and less dependencies. +Alright, let's go! + +Installation +------------ + +```bash +composer require bentools/etl:^3.0@alpha +``` + +_Warning: version 3.0 is a complete rewrite and a involves major BC breaks. Don't upgrade from `^2.0` unless you know what you're doing!_ + +Usage +----- + +To sum up, you will apply _transformations_ onto an `iterable` of "anythings" in order to _load_ them in some place. +Sometimes your `iterable` is ready to go, sometimes you just don't need to perform transformations, but anyway you need to load that data somewhere. + +Let's start with a really simple example: -Overview --------- ```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Event\ContextElementEvent; -use BenTools\ETL\Extractor\IncrementorExtractor; -use BenTools\ETL\Loader\DebugLoader; -use BenTools\ETL\Runner\ETLRunner; -use BenTools\ETL\Transformer\CallbackTransformer; - -$items = [ - 'France', - 'Germany', - 'Poland', +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Loader\JsonFileLoader; + +$data = [ + 'foo', + 'bar', ]; -$extract = new IncrementorExtractor(); -$transform = new CallbackTransformer('strtolower'); -$load = new DebugLoader(); -$run = new ETLRunner(); - -// Optionnal: hook on the AFTER_EXTRACT event to skip some elements -$run->onExtract(function (ContextElementEvent $event) { - $element = $event->getElement(); - if ('Germany' === $element->getData()) { - $element->skip(); - } -}); -$run($items, $extract, $transform, $load); +$etl = EtlBuilder::init() + ->loadInto(JsonFileLoader::toFile(__DIR__.'/data.json')) + ->createEtl(); +$etl->process($data); ``` -Output: +Basically you just loaded the string `["foo","bar"]` into `data.json`. Yay! + +Now let's apply a basic uppercase transformation: + ```php -array(2) { - [0]=> - string(6) "france" - [2]=> - string(7) "poland" -} +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Loader\JsonFileLoader; + +$data = [ + 'foo', + 'bar', +]; +$etl = EtlBuilder::init() + ->transformWith(new CallableTransformer('strtoupper')) + ->loadInto(JsonFileLoader::toFile(__DIR__.'/data.json')) + ->createEtl(); +$etl->process($data); ``` -Installation ------------- +Didn't you just write the string `["FOO","BAR"]` into `data.json` ? Yes, you did! + +Okay, but what if your source data is not an iterable (yet)? It can be a CSV file or a CSV string, for instance. Here's another example: + +```php +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Extractor\CsvExtractor; +use BenTools\ETL\Loader\JsonFileLoader; + +$data = <<extractFrom(new CsvExtractor( + $delimiter = ',', + $enclosure = '"', + $escapeString = '\\', + $createKeys = true, // 1st row will be used for keys + CsvExtractor::INPUT_STRING + )) + ->loadInto(JsonFileLoader::toFile(__DIR__.'/data.json', \JSON_PRETTY_PRINT)) + ->createEtl(); +$etl->process($data); ``` -composer require bentools/etl + +As you guessed, the following content was just written into `data.json`: + +```json +[ + { + "country_code": "US", + "country_name": "USA", + "president": "Donald Trump" + }, + { + "country_code": "RU", + "country_name": "Russia", + "president": "Vladimir Putin" + } +] ``` -Tests ------ +We provide helpful extractors and loaders to manipulate JSON, CSV, text, and you'll also find a `DoctrineORMLoader` for when your transformer yields Doctrine entities. + +Because yes, a transformer must return a `\Generator`. Why? Because a single extracted item can lead to several output items. Let's take a more sophisticated example: + +```php +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Extractor\JsonExtractor; + +$pdo = new \PDO('mysql:host=localhost;dbname=test'); +$etl = EtlBuilder::init() + ->extractFrom(new JsonExtractor()) + ->transformWith( + function ($item) use ($pdo) { + $stmt = $pdo->prepare('SELECT country_code FROM countries WHERE country_code = ?'); + $stmt->bindValue(1, $item['country_code'], \PDO::PARAM_STR); + $stmt->execute(); + if (0 === $stmt->rowCount()) { + yield ['INSERT INTO countries (country_code, country_name) VALUES (?, ?)', [$item['country_code'], $item['country_name']]]; + } + + yield ['REPLACE INTO presidents (country_code, president_name) VALUES (?, ?)', [$item['country_code'], $item['president']]]; + + } + ) + ->loadInto( + $loader = function (\Generator $queries) use ($pdo) { + foreach ($queries as $query) { + list($sql, $params) = $query; + $stmt = $pdo->prepare($sql); + foreach ($params as $i => $value) { + $stmt->bindValue($i + 1, $value); + } + $stmt->execute(); + } + } + ) + ->createEtl(); + +$etl->process(__DIR__.'/data.json'); // The JsonExtractor will convert that file to a PHP array ``` -./vendor/bin/phpunit + +As you can see, from a single item, we loaded up to 2 queries. + +And as you can notice, your _extractors_, _transformers_ and _loaders_ can implement `ExtractorInterface`, `TransformerInterface` or `LoaderInterface` as well as being simple `callables`. + + +Skipping items +-------------- + +Each _extractor_ / _transformer_ / _loader_ callback gets the current `Etl` object passed in their arguments. + +This allows you to ask the ETL to skip an item, or even stop the process: + +```php +use BenTools\ETL\Etl; +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Transformer\CallableTransformer; + +$fruits = [ + 'apple', + 'banana', + 'strawberry', + 'pineapple', + 'pear', +]; + + +$storage = []; +$etl = EtlBuilder::init() + ->transformWith(new CallableTransformer('strtoupper')) + ->loadInto( + function ($generated, $key, Etl $etl) use (&$storage) { + foreach ($generated as $fruit) { + if ('BANANA' === $fruit) { + $etl->skipCurrentItem(); + break; + } + if ('PINEAPPLE' === $fruit) { + $etl->stopProcessing(); + break; + } + $storage[] = $fruit; + } + }) + ->createEtl(); + +$etl->process($fruits); + +var_dump($storage); // ['APPLE', 'STRAWBERRY'] +``` + + +Events +------ + +Now you're wondering how you can hook on the ETL lifecycle, to log things, handle exceptions, ... This library ships with a built-in Event Dispatcher that you can leverage when: + +* The ETL starts +* An item has been extracted +* The extraction failed +* An item has been transformed +* Transformation failed +* An item has been loaded +* Loading failed +* An item has been skipped +* The ETL was stopped +* A flush operation was completed +* A rollback operation was completed +* The ETL completed the whole process. + +The _item_ events will allow you to mark the current item to be skipped, or even handle runtime exceptions. Let's take another example: + +```php +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\EventDispatcher\Event\ItemExceptionEvent; + +$fruits = [ + 'apple', + new \RuntimeException('Is tomato a fruit?'), + 'banana', +]; + + +$storage = []; +$etl = EtlBuilder::init() + ->transformWith( + function ($item, $key) { + if ($item instanceof \Exception) { + throw $item; + } + + yield $key => $item; + }) + ->loadInto( + function (iterable $transformed) use (&$storage) { + foreach ($transformed as $fruit) { + $storage[] = $fruit; + } + }) + ->onTransformException( + function (ItemExceptionEvent $event) { + echo $event->getException()->getMessage(); // Is tomato a fruit? + $event->ignoreException(); + }) + ->createEtl(); + +$etl->process($fruits); + +var_dump($storage); // ['apple', 'banana'] ``` +Here, we intentionnally threw an exception during the _transform_ operation. But thanks to the event dispatcher, we could tell the ETL this exception can be safely ignored and it can pursue the rest of the process. -Documentation and recipes -------------------------- -[Concept](doc/Concept.md) +You can attach as many event listeners as you wish and sort them by priority. -[Getting started](doc/GettingStarted.md) -[Extractors](doc/Extractors.md) +Recipes +------- -[Transformers](doc/Transformers.md) +A recipe is an ETL pattern that can be reused through different tasks. It's a kind of `ETLBuilder` factory, but you must know that a recipe can **replace** the builder's _extractor_, _transformer_ and _loader_ but will **add** event listeners. +If you want to log everything that goes through an ETL for example, use our built-in Logger recipe: -[Loaders](doc/Loaders.md) +```php +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Recipe\LoggerRecipe; -[Events](doc/Events.md) +$etl = EtlBuilder::init() + ->useRecipe(new LoggerRecipe($logger)) + ->createEtl(); +``` -[Advanced CSV to JSON conversion example](doc/Recipes/AdvancedCSVToJSON.md) +You can also create your own recipes: +```php +use BenTools\ETL\EtlBuilder; +use BenTools\ETL\Extractor\CsvExtractor; +use BenTools\ETL\Loader\JsonFileLoader; +use BenTools\ETL\Recipe\LoggerRecipe; +use BenTools\ETL\Recipe\Recipe; + +class CSVtoJSONRecipe extends Recipe +{ + private $outputFile; + + public function __construct(string $outputFile) + { + $this->outputFile = $outputFile; + } + + /** + * @inheritDoc + */ + public function updateBuilder(EtlBuilder $builder): EtlBuilder + { + return $builder + ->extractFrom(new CsvExtractor()) + ->loadInto(JsonFileLoader::toFile($this->outputFile)) + ; + } +} + +$etl = EtlBuilder::init() + ->useRecipe(new CSVtoJSONRecipe('output.json')) + ->useRecipe(new LoggerRecipe($logger)) + ->createEtl(); +$etl->process('input.csv'); +``` + + +Tests +----- + +```bash +./vendor/bin/phpunit +``` License ------- diff --git a/composer.json b/composer.json index afeda7f..624db72 100644 --- a/composer.json +++ b/composer.json @@ -1,6 +1,6 @@ { "name": "bentools/etl", - "description": "Some classes to implement the ETL pattern, with the ability to hook on each event.", + "description": "Extract / Transform / Load in PHP - Multiple formats - No dependency.", "license": "MIT", "authors": [ { @@ -8,15 +8,25 @@ } ], "require": { - "php": ">=7.1", - "psr/log": "^1.0" + "php": ">=7.2", + "thecodingmachine/safe": "^0.1.14", + "psr/event-dispatcher": "^1.0" + }, + "require-dev": { + "phpunit/phpunit": "^6.0", + "squizlabs/php_codesniffer": "~2.0", + "doctrine/orm": "^2.5", + "symfony/var-dumper": "^4.0", + "php-coveralls/php-coveralls": "^2.1", + "psr/log": "^1.1" }, "autoload-dev": { "psr-4": { "BenTools\\ETL\\Tests\\": "tests/src" }, "files": [ - "vendor/symfony/var-dumper/Resources/functions/dump.php" + "vendor/symfony/var-dumper/Resources/functions/dump.php", + "tests/functions.php" ] }, "autoload": { @@ -24,18 +34,9 @@ "BenTools\\ETL\\": "src" } }, - "require-dev": { - "phpunit/phpunit": "^6.0", - "squizlabs/php_codesniffer": "~2.0", - "symfony/event-dispatcher": "^3.2", - "doctrine/orm": "^2.5", - "guzzlehttp/promises": "^1.3", - "satooshi/php-coveralls": "^1.0", - "symfony/var-dumper": "^3.2" - }, - "suggest": { - "symfony/event-dispatcher": "An event dispatcher to hook into the ETL process.", - "guzzlehttp/promises": "Process Asynchronous ETL", - "cocur/slugify": "Generates slugs from strings - useful to create array keys." + "extra": { + "branch-alias": { + "dev-3.0": "3.0.x-dev" + } } } diff --git a/doc/Concept.md b/doc/Concept.md deleted file mode 100644 index 76528b1..0000000 --- a/doc/Concept.md +++ /dev/null @@ -1,76 +0,0 @@ -Concept -======= -The idea behind the ETL pattern is to loop over an [`iterable`](https://wiki.php.net/rfc/iterable) to transfer each key / value pair through 3 tasks: - -Extract -------- -The `Extractor` is a [`callable`](http://php.net/manual/en/language.types.callable.php) factory which is responsible to return a new `BenTools\ETL\Context\ContextElementInterface` object which contains the extracted data. - -Transform ---------- -The `Transformer` is a [`callable`](http://php.net/manual/en/language.types.callable.php) that takes the `ContextElement`'s extracted data, transforms it into the desired output, can even change the key, and hydrates back the `ContextElement`. - -Load ----- -The `Loader` is a [`callable`](http://php.net/manual/en/language.types.callable.php) which takes the `ContextElement` as argument and send the transformed data in a persistence layer, a HTTP Post, a file, ... - - - - -Implementation -============== - -All you have to do is to implement `BenTools\ETL\Extractor\ExtractorInterface`, `BenTools\ETL\Transformer\TransformerInterface` and `BenTools\ETL\Loader\LoaderInterface` or use already provided classes. - -The only method to implement is `__invoke()`. Thus, feel free to use simple _callables_ that respect the same arguments and return values. - -You then need an [`iterable`](https://wiki.php.net/rfc/iterable) - i.e an `array`, `\Traversable`, `\Iterator`, `\IteratorAggregate` or a `\Generator` to loop over. - - -Usage ------ - -The `BenTools\ETL\Runner\ETLRunner` class is the implementation of the ETL pattern. - -Here's the contract: - -```php -namespace BenTools\ETL\Runner; - -use BenTools\ETL\Extractor\ExtractorInterface; -use BenTools\ETL\Transformer\TransformerInterface; - -interface ETLRunnerInterface -{ - - /** - * @param iterable|\Generator|\Traversable|array $items - * @param callable|ExtractorInterface $extractor - * @param callable|TransformerInterface $transformer - * @param callable $loader - */ - public function __invoke(iterable $items, callable $extractor, callable $transformer = null, callable $loader); -} -``` - -How to use it: -```php -use BenTools\ETL\Runner\ETLRunner; -$run = new ETLRunner(); -$run($iterable, $extractor, $transformer, $loader); -``` - -When invoked, the runner will loop over `$iterable`, then call the `$extractor`, the `$transformer` and the `$loader` consecutively. - -As you can notice, the transformer is optionnal, meaning the extracted data can be directly loaded if no transformation is needed. - - -Advanced Usage --------------- -The `BenTools\ETL\Runner\ETLRunner` constructor accepts 2 optionnal arguments: - -* A `Psr\Log\LoggerInterface` logger like Monolog to get some info about the ETL process -* A `BenTools\ETL\Event\EventDispatcher\EventDispatcherInterface` event manager of your own (or use either our built-in Event Dispatcher or the Symfony Bridge provided) to hook on the ETL process (see [Events](Events.md)). - - -Next: [Getting started](GettingStarted.md) \ No newline at end of file diff --git a/doc/Events.md b/doc/Events.md deleted file mode 100644 index 505093d..0000000 --- a/doc/Events.md +++ /dev/null @@ -1,45 +0,0 @@ -Event Dispatcher -================ - -The library ships with a built-in Event dispatcher that allow you to hook at different points within the ETL process. - -If you're running Symfony, you can use Symfony's Event Dispatcher by wrapping it within into the `BenTools\ETL\Event\EventDispatcher\Bridge\Symfony\SymfonyEventDispatcherBridge` class. - -You're also free to create your own bridge if you're using another framework, just implement `BenTools\ETL\Event\EventDispatcher\EventDispatcherInterface`. - -ETL Events -========== - -These events (see `BenTools\ETL\Event\ETLEvents`) are fired by `BenTools\ETL\Runner\ETLRunner` during the loop. - -ETLEvents::START ------------------------------------ -This event is fired just before beginning iterating. - -ETLEvents::AFTER_EXTRACT -------------------------------------------- -This event is fired after an item's extraction. You get a fresh `BenTools\ETL\Context\ContextElement` object. - -ETLEvents::AFTER_TRANSFORM -------------------------------------------- -This event is fired once the item is transformed. You get the `BenTools\ETL\Context\ContextElement` object with the transformed data. - -ETLEvents::AFTER_LOAD -------------------------------------------- -This event is fired on load. You have access to the `BenTools\ETL\Context\ContextElement` object. - -**Note**: For loaders that implement `BenTools\ETL\Loader\FlushableLoaderInterface`, like `BenTools\ETL\Loader\DoctrineORMLoader` a loaded object does not necessarily mean it is already commited to the persistence layer. - -ETLEvents::AFTER_FLUSH -------------------------------------------- -This event is fired when a `BenTools\ETL\Loader\FlushableLoaderInterface` loader flushes waiting objects. - -You can't get a `BenTools\ETL\Context\ContextElement` object since it is a global event. - - -ETLEvents::END ------------------------------------ -This event is fired when the process is finished. - - -Next: [Recipes](Recipes/AdvancedCSVToJSON.md) \ No newline at end of file diff --git a/doc/Extractors.md b/doc/Extractors.md deleted file mode 100644 index 9c270d7..0000000 --- a/doc/Extractors.md +++ /dev/null @@ -1,224 +0,0 @@ -Extractors -========== - -Extractors are kind of factories: when you iterate over a PHP loop, like `foreach ($items AS $key => $value)`, you get a `$key` and a `$value`. - -However, the real way to identify of your resource may not be the `$key` itself, but something within your `$value` (`$value->getId()` for instance). - -Here comes the **ContextElement**. A **ContextElement** carries _your_ id (which may, or may not, differ from `$key`), and the data associated (`$value`). - -As a **ContextElement** factory, the role of an extractor is to return a `BenTools\ETL\Context\ContextElementInterface` object and hydrate its id and its data. - -To respect the ETL pattern, if you define your own extractors, it's also their responsibility to validate the source data (and they might throw exceptions or call `$contextElement->skip()` or `$contextElement->stop()` if this element is blocking the whole loop). - -To make things simpler we provide a default `BenTools\ETL\Context\ContextElement` class and default extractors: - -KeyValueExtractor ------------------ -This is the most basic extractor, since it extracts the key and the value provided by the iterator. - -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Extractor\KeyValueExtractor; -use BenTools\ETL\Loader\DebugLoader; - -$items = [ - 'foo' => 'bar', - 'bar' => 'baz' -]; -$extract = new KeyValueExtractor(); -$load = new DebugLoader(); -foreach ($items AS $key => $value) { - $element = $extract($key, $value); - $load($element); -} -$load->flush(); -``` - -Ouputs: -```php -array (size=2) - 'foo' => string 'bar' (length=3) - 'bar' => string 'baz' (length=3) -``` - -IncrementorExtractor --------------------- - -This extractors provides an incremental key. - -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Extractor\IncrementorExtractor; -use BenTools\ETL\Loader\DebugLoader; - -$items = [ - 'foo' => 'bar', - 'bar' => 'baz' -]; -$extract = new IncrementorExtractor(); -$load = new DebugLoader(); -foreach ($items AS $key => $value) { - $element = $extract($key, $value); - $load($element); -} -$load->flush(); -``` - -Outputs: -```php -array (size=2) - 0 => string 'bar' (length=3) - 1 => string 'baz' (length=3) -``` - -ArrayPropertyExtractor ----------------------- -When the value of each element of the loop is an array, you can specify which array key will be used to generate the id. - -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Extractor\ArrayPropertyExtractor; -use BenTools\ETL\Loader\DebugLoader; - -$items = [ - 'foo' => [ - 'id' => '6ef02334-002e-11e7-93ae-92361f002671', - 'name' => 'Foo' - ], - 'bar' => [ - 'id' => 'a55b81da-f270-4de0-907a-25488e5ffcc8', - 'name' => 'Bar' - ], -]; -$extract = new ArrayPropertyExtractor('id'); -$load = new DebugLoader(); -foreach ($items AS $key => $value) { - $element = $extract($key, $value); - $load($element); -} -$load->flush(); -``` - -Outputs: -```php -array (size=2) - '6ef02334-002e-11e7-93ae-92361f002671' => - array (size=1) - 'name' => string 'Foo' (length=3) - 'a55b81da-f270-4de0-907a-25488e5ffcc8' => - array (size=1) - 'name' => string 'Bar' (length=3) -``` - -As a consequence, the `$value['id']` is unset. You can prevent this behaviour by setting the 2nd argument to false: `new ArrayPropertyExtractor('id', false);` - - -ObjectPropertyExtractor ------------------------ -The same, for objects with public properties. Note that it's not possible to unset `$value->id` in that case. -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Extractor\ObjectPropertyExtractor; -use BenTools\ETL\Loader\DebugLoader; - -$foo = new stdClass(); -$foo->id = '6ef02334-002e-11e7-93ae-92361f002671'; -$foo->name = 'Foo'; - -$bar = new stdClass(); -$bar->id = 'a55b81da-f270-4de0-907a-25488e5ffcc8'; -$bar->name = 'Bar'; - -$items = [ - 'foo' => $foo, - 'bar' => $bar, -]; -$extract = new ObjectPropertyExtractor('id'); -$load = new DebugLoader(); -foreach ($items AS $key => $value) { - $element = $extract($key, $value); - $load($element); -} -$load->flush(); -``` - -Outputs: -```php -array (size=2) - '6ef02334-002e-11e7-93ae-92361f002671' => - object(stdClass)[3] - public 'id' => string '6ef02334-002e-11e7-93ae-92361f002671' (length=36) - public 'name' => string 'Foo' (length=3) - 'a55b81da-f270-4de0-907a-25488e5ffcc8' => - object(stdClass)[2] - public 'id' => string 'a55b81da-f270-4de0-907a-25488e5ffcc8' (length=36) - public 'name' => string 'Bar' (length=3) -``` - -CallbackExtractor ------------------ -This extractor uses a callback on a `ContextElement` created by a `KeyValueExtractor` to allow you to define the key and the value. -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Loader\DebugLoader; - -class MyObject { - private $id, $name; - public function __construct($id, $name) { - $this->id = $id; - $this->name = $name; - } - - public function getId() { - return $this->id; - } - - /** - * @return mixed - */ - public function getName() { - return $this->name; - } -} - -$foo = new MyObject('6ef02334-002e-11e7-93ae-92361f002671', 'Foo'); -$bar = new MyObject('a55b81da-f270-4de0-907a-25488e5ffcc8', 'Bar'); - -$items = [ - 'foo' => $foo, - 'bar' => $bar, -]; -$extract = new \BenTools\ETL\Extractor\CallbackExtractor(function (ContextElementInterface $element) { - /** @var MyObject $myObject */ - $myObject = $element->getData(); - $element->setId($myObject->getId()); -}); -$load = new DebugLoader(); -foreach ($items AS $key => $value) { - $element = $extract($key, $value); - $load($element); -} -$load->flush(); -``` - -Outputs: -```php -array (size=2) - '6ef02334-002e-11e7-93ae-92361f002671' => - object(MyObject)[3] - private 'id' => string '6ef02334-002e-11e7-93ae-92361f002671' (length=36) - private 'name' => string 'Foo' (length=3) - 'a55b81da-f270-4de0-907a-25488e5ffcc8' => - object(MyObject)[2] - private 'id' => string 'a55b81da-f270-4de0-907a-25488e5ffcc8' (length=36) - private 'name' => string 'Bar' (length=3) -``` - -Next: [Transformers](Transformers.md) \ No newline at end of file diff --git a/doc/GettingStarted.md b/doc/GettingStarted.md deleted file mode 100644 index 34f8c31..0000000 --- a/doc/GettingStarted.md +++ /dev/null @@ -1,58 +0,0 @@ -Getting started: A simple example ---------- -Input: **JSON** - Output: **CSV** - -```php -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Extractor\KeyValueExtractor; -use BenTools\ETL\Loader\CsvFileLoader; -use BenTools\ETL\Runner\ETLRunner; - -require_once __DIR__ . '/vendor/autoload.php'; - -$jsonInput = '{ - "dictators": [ - { - "country": "USA", - "name": "Donald Trump" - }, - { - "country": "Russia", - "name": "Vladimir Poutine" - } - ] -}'; - -// We'll iterate over $json -$json = json_decode($jsonInput, true)['dictators']; - -// We'll use the default extractor (key => value) -$extractor = new KeyValueExtractor(); - -// Data transformer -$transformer = function (ContextElementInterface $element) { - $dictator = $element->getData(); - $element->setData(array_values($dictator)); -}; - -// Init CSV output -$csvOutput = new SplFileObject(__DIR__ . '/output/dictators.csv', 'w'); -$csvOutput->fputcsv(['country', 'name']); - -// CSV File loader -$loader = new CsvFileLoader($csvOutput); - -// Run the ETL -$run = new ETLRunner(); -$run($json, $extractor, $transformer, $loader); -``` - -File contents: -```csv -country,name -USA,"Donald Trump" -Russia,"Vladimir Poutine" -``` - - -Next: [Extractors](Extractors.md) diff --git a/doc/Iterators.md b/doc/Iterators.md deleted file mode 100644 index ceaae29..0000000 --- a/doc/Iterators.md +++ /dev/null @@ -1,226 +0,0 @@ -Iterators -========= - -By default, you are free to use any [`iterable`](https://wiki.php.net/rfc/iterable) array, object or generator - anything that can go in a `foreach` loop. - -To simplify some use cases, you can also use those ones: - -JsonIterator ------------- -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Iterator\JsonIterator; - -$iterator = new JsonIterator('{"cat":"meow","dog":"bark"}'); // Also accepts an already-decoded JSON -foreach ($iterator AS $key => $value) { - var_dump(sprintf('The %s %ss', $key, $value)); -} -``` - -Outputs: -```php -string 'The cat meows' (length=13) -string 'The dog barks' (length=13) -``` - -TextLinesIterator ------------------ -Takes a string, and yields each line. - -Example: -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Iterator\TextLinesIterator; - -$text = << - string(7) "country" - [1]=> - string(4) "name" -} -array(2) { - [0]=> - string(3) "USA" - [1]=> - string(12) "Donald Trump" -} -array(2) { - [0]=> - string(6) "Russia" - [1]=> - string(16) "Vladimir Poutine" -} -``` - -CsvFileIterator ---------------- -Takes an `SplFileObject` as an argument (or use the `createFromFileName()` factory) and returns an indexed array of each csv line. - -Example: - -```php -use BenTools\ETL\Iterator\CsvFileIterator; - -require_once __DIR__ . '/vendor/autoload.php'; - -$iterator = CsvFileIterator::createFromFilename('dictators.csv', ','); -foreach ($iterator AS $item) { - var_dump($item); -} -``` - -Outputs: -``` -array(2) { - [0]=> - string(7) "country" - [1]=> - string(4) "name" -} -array(2) { - [0]=> - string(3) "USA" - [1]=> - string(12) "Donald Trump" -} -array(2) { - [0]=> - string(6) "Russia" - [1]=> - string(16) "Vladimir Poutine" -} -``` - -KeysAwareCsvIterator --------------------- -This iterator uses the _Decorator_ pattern to wrap a CSV Iterator (`CsvFileIterator` or `CsvStringIterator`) and allows you to: - -* Specify the array keys to apply to each row -* Specify that the keys are set in the 1st line of the CSV -* Skip the 1st line of the CSV (useful indeed is the 1st line represent the keys) - -Example: -```php -use BenTools\ETL\Iterator\CsvStringIterator; -use BenTools\ETL\Iterator\KeysAwareCsvIterator; - -require_once __DIR__ . '/vendor/autoload.php'; - -$text = << - string(3) "USA" - ["name"]=> - string(12) "Donald Trump" -} -array(2) { - ["country"]=> - string(6) "Russia" - ["name"]=> - string(16) "Vladimir Poutine" -} -``` - -Another example: -```php -use BenTools\ETL\Iterator\CsvStringIterator; -use BenTools\ETL\Iterator\KeysAwareCsvIterator; - -require_once __DIR__ . '/vendor/autoload.php'; - -$text = << - string(3) "USA" - ["actual_president"]=> - string(12) "Donald Trump" -} -array(2) { - ["country_name"]=> - string(6) "Russia" - ["actual_president"]=> - string(16) "Vladimir Poutine" -} -``` \ No newline at end of file diff --git a/doc/Loaders.md b/doc/Loaders.md deleted file mode 100644 index 7f0dad6..0000000 --- a/doc/Loaders.md +++ /dev/null @@ -1,34 +0,0 @@ -Loaders -======= -The loaders are responsible to save or output your transformed data. Here are some built-in loaders, feel free to implement your own ones. - - -ArrayLoader ------------ -This loader stores your data into an array. - -DebugLoader ------------ -At the end of the process, this loader `var_dump` your data. You can use another debug function in the constructor if needed. - -FileLoader ----------- -This loader saves your data in a file. Make sure your transformer adds a `PHP_EOL` if you want a line for each load (you can also hook on the `ETLEvents::AFTER_TRANSFORM` event) - -CsvFileLoader -------------- -This loader will take your `SplFileObject` and call `fputcsv` on each load. - -JsonFileLoader --------------- -This loader will store your data as a Json file. - - -Loaders and FlushableLoaders -============================ -By default, each loader will store your data when invoked. - -You can create your own loader by implementing `BenTools\ETL\Loader\FlushableLoaderInterface`, which will cause the loader to buffer elements and store them only when the `flush()` method is called (cf. `BenTools\ETL\Loader\DoctrineORMLoader`). - - -Next: [Events](Events.md) \ No newline at end of file diff --git a/doc/Recipes/AdvancedCSVToJSON.md b/doc/Recipes/AdvancedCSVToJSON.md deleted file mode 100644 index 57e4896..0000000 --- a/doc/Recipes/AdvancedCSVToJSON.md +++ /dev/null @@ -1,137 +0,0 @@ -Advanced example -================ - -Convert a **CSV** to a **JSON** and apply some transformations. - -The goal: ---------- - -Transform this: -```csv -Variable Name,Dataset,Code List,Definition -bmu,Community engagement,,Number of TB Basic Management Units in the country -community_data_available,Community engagement,A=No; B=Yes,Are data available on community-based referrals or any form of community treatment adherence support? -prevtx_data_available,Latent TB infection,A=No; E= Yes - available from the routine surveillance system; G=Yes - estimated from a survey ,Are data available on the number of children aged under 5 who are household contacts of TB cases and started on TB preventive therapy? - -``` - -into this: -```json -{ - "bmu": { - "dataset": "Community engagement", - "code_list": [], - "definition": "Number of TB Basic Management Units in the country" - }, - "community_data_available": { - "dataset": "Community engagement", - "code_list": { - "A": "No", - "B": "Yes" - }, - "definition": "Are data available on community-based referrals or any form of community treatment adherence support?" - }, - "prevtx_data_available": { - "dataset": "Latent TB infection", - "code_list": { - "A": "No", - "E": "Yes - available from the routine surveillance system", - "G": "Yes - estimated from a survey" - }, - "definition": "Are data available on the number of children aged under 5 who are household contacts of TB cases and started on TB preventive therapy?" - } -} -``` - -The challenge: --------------- - -* We want to skip the 1st row -* We want to use the 1st row as keys, and slug them -* We want to use the 1st column as the identifier of each row -* We do not want the 1st column to be part of the value -* We want the "Code list" column to be output as an associative array - -How to achieve this: --------------------- - -* We'll use `BenTools\ETL\Iterator\CsvFileIterator` to iterate over the CSV (we'll get an indexed array for each row) -* We'll use `BenTools\ETL\Extractor\ArrayPropertyExtractor` to use the 1st column as an identifier -* We'll hook on the `BenTools\ETL\Event\ETLEvents::AFTER_EXTRACT` event to create the keys and skip the 1st row -* We'll use an external library `Cocur\Slugify\Slugify` to make slugs from our keys -* We'll use a `callable` Transformer to create an associative array for each row (by combining keys and values) and transform the code list to an array - -The code: -------- -```php -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Event\ETLEvents; -use BenTools\ETL\Event\EventDispatcher\Bridge\Symfony\SymfonyEvent; -use BenTools\ETL\Event\EventDispatcher\Bridge\Symfony\SymfonyEventDispatcherBridge as SymfonyBridge; -use BenTools\ETL\Extractor\ArrayPropertyExtractor; -use BenTools\ETL\Iterator\CsvFileIterator; -use BenTools\ETL\Loader\JsonFileLoader; -use BenTools\ETL\Runner\ETLRunner; -use Cocur\Slugify\Slugify; - -require_once __DIR__ . '/vendor/autoload.php'; - -// We will iterate over the CSV rows. -$csvInput = new CsvFileIterator(new SplFileObject(__DIR__ . '/input/input.csv')); - -// We will extract each row - the 1st column (index 0) will define the identifier of each row. -$extractor = new ArrayPropertyExtractor(0, $shift = true); - -// We will load the data in a JSON file -$loader = new JsonFileLoader(new SplFileObject(__DIR__ . '/output/output.json', 'w'), JSON_PRETTY_PRINT); - -// Here's our transformer function. -$transformer = function (ContextElementInterface $element) use (&$keys) { - - // Combine keys (1st row) and values (current row) - $data = array_combine($keys, $element->getData()); - - // Process code list - $codeList = array_map('trim', explode(';', trim($data['code_list']))); - $data['code_list'] = []; - foreach ($codeList AS $code) { - if (false !== strpos($code, '=')) { - list($key, $value) = explode('=', $code); - $data['code_list'][trim($key)] = trim($value); - } - } - - // Hydrate back our element - $element->setData($data); -}; - -// We need to hook on the BenTools\ETL\Event\ETLEvents::AFTER_EXTRACT event to generate the keys -$eventDispatcher = new SymfonyBridge(); -$keys = []; -$eventDispatcher->getWrappedDispatcher()->addListener(ETLEvents::AFTER_EXTRACT, function (SymfonyEvent $event) use (&$keys) { - - // The 1st CSV row will give us the keys - if (empty($keys)) { - - // The Symfony event wraps the "real" event. - $event = $event->getWrappedEvent(); - - // Retrieve element - $contextElement = $event->getElement(); - - // Remove spaces and caps - $slugify = function ($key) { - return Slugify::create()->slugify($key, ['separator' => '_']); - }; - $keys = array_map($slugify, array_values($contextElement->getData())); - - // We don't want that row to be sent to the loader. - $contextElement->skip(); - } -}); - -$run = new ETLRunner(null, $eventDispatcher); -$run($csvInput, $extractor, $transformer, $loader); - - -``` \ No newline at end of file diff --git a/doc/Transformers.md b/doc/Transformers.md deleted file mode 100644 index cbc5403..0000000 --- a/doc/Transformers.md +++ /dev/null @@ -1,125 +0,0 @@ -Transformers -============ - -This library voluntarily does not provide any transformer. You have to implement your owns. - -Nevertheless, if you do not need to change the id, you can use our `CallbackTransformer` to apply modification on your data: - -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Context\ContextElement; -use BenTools\ETL\Transformer\CallbackTransformer; - -$element = new ContextElement('foo', 'bar'); -$transform = new CallbackTransformer('strtoupper'); -$transform($element); -var_dump($element->getId()); -var_dump($element->getData()); -``` - -Outputs: -```php -string 'foo' (length=3) -string 'BAR' (length=3) -``` - -Otherwise, here's how we could achieve this by ourselves: -```php -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Transformer\TransformerInterface; - -class MyTransformer implements TransformerInterface -{ - /** - * @inheritDoc - */ - public function __invoke(ContextElementInterface $element): void - { - $element->setData(strtoupper($element->getData())); - } - -} -``` - -Transformer Stack ------------------ - -Provides a stack of transformers with priority management. - -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Context\ContextElement; -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Transformer\TransformerStack; - -$transformer1 = function (ContextElementInterface $element) { - $element->setData('foo'); -}; -$transformer2 = function (ContextElementInterface $element) { - $element->setData('bar'); -}; -$transformer3 = function (ContextElementInterface $element) { - if (1000 === $element->getId()) { - $element->skip(); - } -}; - -$stack = new TransformerStack(); -$stack->registerTransformer($transformer1, 100); -$stack->registerTransformer($transformer2, 50); -$stack->registerTransformer($transformer3, 75); - -$element = new ContextElement(1000); - -/** - * This will execute in the following order: - * - $transformer1 - * - $transformer3 - * - $transformer2 - */ -$stack($element); - -/** - * Because $transformer3, executed in 2nd position, has set the element to be skipped, - * $element data will not be set to 'bar' - */ -var_dump($element->getData()); // 'foo' -``` - - -Step Transformer ----------------- - -You can also chain transformers with nameable steps. This can be useful to hook an additionnal transformer on a specific step of the transform workflow: -```php -require_once __DIR__ . '/vendor/autoload.php'; - -use BenTools\ETL\Context\ContextElement; -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Transformer\StepTransformer; - -$stack = new StepTransformer([ - 'step_1', - 'step_2' -]); - -$stack->registerTransformer('step_1', function (ContextElementInterface $element) { - $element->setData('foo'); -}); - -$stack->registerTransformer('step_2', function (ContextElementInterface $element) { - $element->setData('bar'); -}); - -$stack->registerTransformer('step_2', function (ContextElementInterface $element) { - $element->setData('baz'); -}, 100); // Will be executed on top of step_2 - -$element = new ContextElement(); -$stack($element); -var_dump($element->getData()); // 'bar' -``` - -Next: [Loaders](Loaders.md) \ No newline at end of file diff --git a/src/Context/ContextElement.php b/src/Context/ContextElement.php deleted file mode 100644 index 23a56a5..0000000 --- a/src/Context/ContextElement.php +++ /dev/null @@ -1,106 +0,0 @@ -id = $id; - $this->data = $data; - } - - /** - * @inheritDoc - */ - public function setId($id): void - { - $this->id = $id; - } - - /** - * @inheritDoc - */ - public function getId() - { - return $this->id; - } - - /** - * @inheritDoc - */ - public function setData($data): void - { - $this->data = $data; - } - - /** - * @inheritDoc - */ - public function getData() - { - return $this->data; - } - - /** - * @inheritDoc - */ - public function skip(): void - { - $this->skip = true; - } - - /** - * @inheritDoc - */ - public function stop(bool $flush = true): void - { - $this->stop = true; - $this->flush = $flush; - } - - /** - * @inheritDoc - */ - public function flush(): void - { - $this->flush = true; - } - - /** - * @inheritDoc - */ - public function shouldSkip(): bool - { - return $this->skip; - } - - /** - * @inheritDoc - */ - public function shouldStop(): bool - { - return $this->stop; - } - - /** - * @inheritDoc - */ - public function shouldFlush(): bool - { - return $this->flush; - } -} diff --git a/src/Context/ContextElementInterface.php b/src/Context/ContextElementInterface.php deleted file mode 100644 index 87cfb2a..0000000 --- a/src/Context/ContextElementInterface.php +++ /dev/null @@ -1,65 +0,0 @@ -extract = $extract; + $this->transform = $transform ?? self::defaultTransformer(); + $this->init = $initLoader; + $this->load = $load ?? new NullLoader(); + $this->flush = $flush; + $this->rollback = $rollback; + $this->flushEvery = null !== $flushEvery ? max(1, $flushEvery) : null; + $this->eventDispatcher = $eventDispatcher ?? new EventDispatcher(); + } + + /** + * Run the ETL on the given input. + * + * @param $data + * @throws EtlException + */ + public function process($data): void + { + $flushCounter = $totalCounter = 0; + $this->start(); + + foreach ($this->extract($data) as $key => $item) { + if ($this->shouldSkip) { + $this->skip($item, $key); + continue; + } + + if ($this->shouldStop) { + $this->stop($item, $key); + break; + } + + $transformed = $this->transform($item, $key); + + if ($this->shouldSkip) { + $this->skip($item, $key); + continue; + } + + if ($this->shouldStop) { + $this->stop($item, $key); + break; + } + + $flushCounter++; + $totalCounter++; + + if (1 === $totalCounter) { + $this->initLoader(); + } + + $flush = (null === $this->flushEvery ? false : (0 === ($totalCounter % $this->flushEvery))); + $this->load($transformed(), $item, $key, $flush, $flushCounter, $totalCounter); + } + + $this->end($flushCounter, $totalCounter); + } + + private function start(): void + { + $this->reset(); + $this->eventDispatcher->dispatch(new StartProcessEvent($this)); + } + + /** + * Mark the current item to be skipped. + */ + public function skipCurrentItem(): void + { + $this->shouldSkip = true; + } + + /** + * Process item skip. + * + * @param $item + * @param $key + */ + private function skip($item, $key): void + { + $this->shouldSkip = false; + $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::SKIP, $item, $key, $this)); + } + + /** + * Ask the ETl to stop. + * + * @param bool $rollback - if the loader should rollback instead of flushing. + */ + public function stopProcessing(bool $rollback = false): void + { + $this->shouldStop = true; + $this->shouldRollback = $rollback; + } + + /** + * @param $item + * @param $key + */ + private function stop($item, $key): void + { + $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::STOP, $item, $key, $this)); + } + + /** + * Reset the ETL. + */ + private function reset(): void + { + $this->shouldSkip = false; + $this->shouldStop = false; + $this->shouldRollback = false; + } + + /** + * Extract data. + * + * @param $data + * @return iterable + * @throws EtlException + */ + private function extract($data): iterable + { + $items = null === $this->extract ? $data : ($this->extract)($data, $this); + + if (null === $items) { + $items = new \EmptyIterator(); + } + + if (!\is_iterable($items)) { + throw new EtlException('Could not extract data.'); + } + + try { + foreach ($items as $key => $item) { + try { + $this->shouldSkip = false; + $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::EXTRACT, $item, $key, $this)); + yield $key => $item; + } catch (\Exception $e) { + continue; + } + } + } catch (\Throwable $e) { + /** @var ItemExceptionEvent $event */ + $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::EXTRACT_EXCEPTION, $item ?? null, $key ?? null, $this, $e)); + if ($event->shouldThrowException()) { + throw $e; + } + } + } + + /** + * Transform data. + * + * @param $item + * @param $key + * @return callable + * @throws EtlException + */ + private function transform($item, $key) + { + $transformed = ($this->transform)($item, $key, $this); + + if (!$transformed instanceof \Generator) { + throw new EtlException('The transformer must return a generator.'); + } + + // Traverse generator to trigger events + try { + $transformed = \iterator_to_array($transformed); + $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::TRANSFORM, $item, $key, $this)); + } catch (\Exception $e) { + /** @var ItemExceptionEvent $event */ + $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::TRANSFORM_EXCEPTION, $item, $key, $this, $e)); + if ($event->shouldThrowException()) { + throw $e; + } + } + + return function () use ($transformed) { + yield from $transformed; + }; + } + + /** + * Init the loader on the 1st item. + */ + private function initLoader(): void + { + if (null === $this->init) { + return; + } + + ($this->init)(); + } + + /** + * Load data. + * + * @param iterable $data + */ + private function load(iterable $data, $item, $key, bool $flush, int &$flushCounter, int &$totalCounter): void + { + try { + ($this->load)($data, $key, $this); + $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::LOAD, $item, $key, $this)); + } catch (\Throwable $e) { + /** @var ItemExceptionEvent $event */ + $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::LOAD_EXCEPTION, $item, $key, $this, $e)); + if ($event->shouldThrowException()) { + throw $e; + } + $flushCounter--; + $totalCounter--; + } + + if (true === $flush) { + $this->flush($flushCounter, true); + } + } + + /** + * Flush elements. + */ + private function flush(int &$flushCounter, bool $partial): void + { + if (null === $this->flush) { + return; + } + + ($this->flush)($partial); + $this->eventDispatcher->dispatch(new FlushEvent($this, $flushCounter, $partial)); + $flushCounter = 0; + } + + /** + * Restore loader's initial state. + */ + private function rollback(int &$flushCounter): void + { + if (null === $this->rollback) { + return; + } + + ($this->rollback)(); + $this->eventDispatcher->dispatch(new RollbackEvent($this, $flushCounter)); + $flushCounter = 0; + } + + /** + * Process the end of the ETL. + * + * @param int $flushCounter + * @param int $totalCounter + */ + private function end(int $flushCounter, int $totalCounter): void + { + if ($this->shouldRollback) { + $this->rollback($flushCounter); + $totalCounter = max(0, $totalCounter - $flushCounter); + } else { + $this->flush($flushCounter, false); + } + $this->eventDispatcher->dispatch(new EndProcessEvent($this, $totalCounter)); + $this->reset(); + } + + /** + * @return callable + */ + private static function defaultTransformer(): callable + { + return function ($item, $key): \Generator { + yield $key => $item; + }; + } +} diff --git a/src/EtlBuilder.php b/src/EtlBuilder.php new file mode 100644 index 0000000..6cb9a12 --- /dev/null +++ b/src/EtlBuilder.php @@ -0,0 +1,350 @@ +extractFrom($extractor); + } + + if (null !== $transformer) { + $this->transformWith($transformer); + } + + if (null !== $loader) { + $this->loadInto($loader); + } + } + + /** + * @return EtlBuilder + */ + public static function init($extractor = null, $transformer = null, $loader = null): self + { + return new self($extractor, $transformer, $loader); + } + + /** + * @param $extractor + * @return EtlBuilder + * @throws \InvalidArgumentException + */ + public function extractFrom($extractor): self + { + if ($extractor instanceof ExtractorInterface) { + $this->extractor = [$extractor, 'extract']; + + return $this; + } + + if (\is_callable($extractor) || null === $extractor) { + $this->extractor = $extractor; + + return $this; + } + + + throw new \InvalidArgumentException(sprintf('Expected callable, null or instance of %s, got %s', ExtractorInterface::class, \is_object($extractor) ? \get_class($extractor) : \gettype($extractor))); + } + + /** + * @param $transformer + * @return EtlBuilder + * @throws \InvalidArgumentException + */ + public function transformWith($transformer): self + { + + if ($transformer instanceof TransformerInterface) { + $this->transformer = [$transformer, 'transform']; + + return $this; + } + + if (\is_callable($transformer) || null === $transformer) { + $this->transformer = $transformer; + + return $this; + } + + throw new \InvalidArgumentException(sprintf('Expected callable, null or instance of %s, got %s', TransformerInterface::class, \is_object($transformer) ? \get_class($transformer) : \gettype($transformer))); + } + + /** + * @param $loader + * @return EtlBuilder + * @throws \InvalidArgumentException + */ + public function loadInto($loader): self + { + if ($loader instanceof LoaderInterface) { + $this->loader = [$loader, 'load']; + $this->initLoader = [$loader, 'init']; + $this->committer = [$loader, 'commit']; + $this->restorer = [$loader, 'rollback']; + + return $this; + } + + if (\is_callable($loader)) { + $this->loader = $loader; + + return $this; + } + + + throw new \InvalidArgumentException(sprintf('Expected callable or instance of %s, got %s', LoaderInterface::class, \is_object($loader) ? \get_class($loader) : \gettype($loader))); + } + + /** + * @param int|null $nbItems + * @return EtlBuilder + */ + public function flushEvery(?int $nbItems): self + { + $this->flushEvery = $nbItems; + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onStart(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::START, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onExtract(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::EXTRACT, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onExtractException(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::EXTRACT_EXCEPTION, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onTransform(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::TRANSFORM, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onTransformException(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::TRANSFORM_EXCEPTION, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onLoad(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::LOAD, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onLoadException(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::LOAD_EXCEPTION, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onFlush(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::FLUSH, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onSkip(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::SKIP, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onStop(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::STOP, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onRollback(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::ROLLBACK, $callable, $priority); + + return $this; + } + + /** + * @param callable $callable + * @param int $priority + * @return EtlBuilder + */ + public function onEnd(callable $callable, int $priority = 0): self + { + $this->listeners[] = new EventListener(EtlEvents::END, $callable, $priority); + + return $this; + } + + /** + * @param Recipe $recipe + * @return EtlBuilder + */ + public function useRecipe(Recipe $recipe): self + { + return $recipe->updateBuilder($this); + } + + /** + * @return Etl + * @throws \RuntimeException + */ + public function createEtl(): Etl + { + $this->checkValidity(); + + return new Etl( + $this->extractor, + $this->transformer, + $this->loader, + $this->initLoader, + $this->committer, + $this->restorer, + $this->flushEvery, + new EventDispatcher($this->listeners) + ); + } + + /** + * @return bool + */ + private function checkValidity(): void + { + if (null === $this->loader) { + throw new \RuntimeException('Loader has not been provided.'); + } + + if (null !== $this->flushEvery && $this->flushEvery <= 0) { + throw new \RuntimeException('The "flushEvery" option must be null or an integer > 0.'); + } + } +} diff --git a/src/Event/ContextElementEvent.php b/src/Event/ContextElementEvent.php deleted file mode 100644 index 6695b20..0000000 --- a/src/Event/ContextElementEvent.php +++ /dev/null @@ -1,109 +0,0 @@ -name = $name; - $this->element = $element; - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return $this->name; - } - - /** - * @return ContextElementInterface - */ - public function getElement(): ContextElementInterface - { - return $this->element; - } - - /** - * @inheritDoc - */ - public function isPropagationStopped(): bool - { - return !$this->running; - } - - /** - * @inheritDoc - */ - public function stopPropagation(): void - { - $this->running = false; - } - - /** - * @inheritDoc - */ - public function hasException(): bool - { - return null !== $this->exception; - } - - /** - * @inheritDoc - */ - public function getException(): ?Throwable - { - return $this->exception; - } - - /** - * @param Throwable $exception - * @return $this - Provides Fluent Interface - */ - public function setException(Throwable $exception = null) - { - $this->exception = $exception; - return $this; - } - - /** - * Removes the exception, if any. - * - * @return $this - */ - public function removeException() - { - $this->exception = null; - return $this; - } -} diff --git a/src/Event/ETLEvent.php b/src/Event/ETLEvent.php deleted file mode 100644 index eb108aa..0000000 --- a/src/Event/ETLEvent.php +++ /dev/null @@ -1,94 +0,0 @@ -name = $name; - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return $this->name; - } - - /** - * @inheritDoc - */ - public function isPropagationStopped(): bool - { - return !$this->running; - } - - /** - * @inheritDoc - */ - public function stopPropagation(): void - { - $this->running = false; - } - - /** - * @inheritDoc - */ - public function hasException(): bool - { - return null !== $this->exception; - } - - /** - * @inheritDoc - */ - public function getException(): ?Throwable - { - return $this->exception; - } - - /** - * @param Throwable $exception - * @return $this - Provides Fluent Interface - */ - public function setException(Throwable $exception = null) - { - $this->exception = $exception; - return $this; - } - - /** - * Removes the exception, if any. - * - * @return $this - */ - public function removeException() - { - $this->exception = null; - return $this; - } -} diff --git a/src/Event/ETLEvents.php b/src/Event/ETLEvents.php deleted file mode 100644 index 30e5243..0000000 --- a/src/Event/ETLEvents.php +++ /dev/null @@ -1,18 +0,0 @@ -wrappedEvent->isPropagationStopped(); - } - - /** - * @inheritDoc - */ - public function stopPropagation() - { - $this->wrappedEvent->stopPropagation(); - } -} diff --git a/src/Event/EventDispatcher/Bridge/Symfony/SymfonyEventDispatcherBridge.php b/src/Event/EventDispatcher/Bridge/Symfony/SymfonyEventDispatcherBridge.php deleted file mode 100644 index de85731..0000000 --- a/src/Event/EventDispatcher/Bridge/Symfony/SymfonyEventDispatcherBridge.php +++ /dev/null @@ -1,42 +0,0 @@ -wrappedDispatcher = $eventDispatcher ?? new EventDispatcher(); - } - - /** - * @inheritDoc - */ - public function addListener(string $eventName, callable $callback): void - { - $this->wrappedDispatcher->addListener($eventName, $callback); - } - - /** - * @inheritdoc - */ - public function trigger(EventInterface $event): void - { - $symfonyEvent = new SymfonyEvent($event); - $this->wrappedDispatcher->dispatch($event->getName(), $symfonyEvent); - } -} diff --git a/src/Event/EventDispatcher/Bridge/WrappedDispatcherTrait.php b/src/Event/EventDispatcher/Bridge/WrappedDispatcherTrait.php deleted file mode 100644 index 600fbd1..0000000 --- a/src/Event/EventDispatcher/Bridge/WrappedDispatcherTrait.php +++ /dev/null @@ -1,28 +0,0 @@ -wrappedDispatcher; - return $wrappedEvent->$name(...$arguments); - } - - /** - * @return mixed - */ - public function getWrappedDispatcher() - { - return $this->wrappedDispatcher; - } -} diff --git a/src/Event/EventDispatcher/Bridge/WrappedEventTrait.php b/src/Event/EventDispatcher/Bridge/WrappedEventTrait.php deleted file mode 100644 index b1bd052..0000000 --- a/src/Event/EventDispatcher/Bridge/WrappedEventTrait.php +++ /dev/null @@ -1,40 +0,0 @@ -wrappedEvent = $wrappedEvent; - } - - /** - * @return EventInterface - */ - public function getWrappedEvent() - { - return $this->wrappedEvent; - } - - /** - * @inheritDoc - */ - public function __call($name, $arguments) - { - $wrappedEvent = $this->wrappedEvent; - return $wrappedEvent->$name(...$arguments); - } -} diff --git a/src/Event/EventDispatcher/ETLEventDispatcher.php b/src/Event/EventDispatcher/ETLEventDispatcher.php deleted file mode 100644 index 3725025..0000000 --- a/src/Event/EventDispatcher/ETLEventDispatcher.php +++ /dev/null @@ -1,37 +0,0 @@ -listeners)) { - $this->listeners[$eventName] = []; - } - $this->listeners[$eventName][] = $listener; - } - - /** - * @inheritDoc - */ - public function trigger(EventInterface $event): void - { - if (!empty($this->listeners[$event->getName()])) { - foreach ($this->listeners[$event->getName()] as $listen) { - if (!$event->isPropagationStopped()) { - $listen($event); - } - } - } - } -} diff --git a/src/Event/EventDispatcher/EventDispatcherInterface.php b/src/Event/EventDispatcher/EventDispatcherInterface.php deleted file mode 100644 index 4705567..0000000 --- a/src/Event/EventDispatcher/EventDispatcherInterface.php +++ /dev/null @@ -1,19 +0,0 @@ -exception = $exception; - $this->key = $key; - $this->value = $value; - } - - /** - * @return mixed - */ - public function getKey() - { - return $this->key; - } - - /** - * @return mixed - */ - public function getValue() - { - return $this->value; - } - - /** - * @param bool $shouldIgnore - */ - public function ignore(bool $shouldIgnore) - { - $this->exception->ignore($shouldIgnore); - } - - /** - * @param bool $shouldStop - * @param bool $flush - */ - public function stop(bool $shouldStop, bool $flush = true) - { - $this->exception->stop($shouldStop, $flush); - } - - /** - * @return bool - */ - public function shouldIgnore() - { - return $this->exception->shouldIgnore(); - } - - /** - * @return bool - */ - public function shouldStop() - { - return $this->exception->shouldStop(); - } - - /** - * @return bool - */ - public function shouldFlush() - { - return $this->exception->shouldFlush(); - } - - /** - * @return ExtractionFailedException - */ - public function getException(): ?Throwable - { - return $this->exception; - } - - /** - * @inheritdoc - */ - public function setException(Throwable $exception = null) - { - throw new \LogicException(sprintf('Calling %s is not allowed.', __METHOD__)); - } - - /** - * @inheritdoc - */ - public function removeException() - { - throw new \LogicException(sprintf('Calling %s is not allowed.', __METHOD__)); - } -} diff --git a/src/EventDispatcher/EtlEvents.php b/src/EventDispatcher/EtlEvents.php new file mode 100644 index 0000000..5f59a28 --- /dev/null +++ b/src/EventDispatcher/EtlEvents.php @@ -0,0 +1,78 @@ +counter = $counter; + } + + /** + * @return int + */ + public function getCounter(): int + { + return $this->counter; + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return EtlEvents::END; + } +} diff --git a/src/EventDispatcher/Event/EtlEvent.php b/src/EventDispatcher/Event/EtlEvent.php new file mode 100644 index 0000000..f8d776e --- /dev/null +++ b/src/EventDispatcher/Event/EtlEvent.php @@ -0,0 +1,58 @@ +etl = $etl; + } + + /** + * @return Etl + */ + public function getEtl(): Etl + { + return $this->etl; + } + + /** + * @return string + */ + abstract public function getName(): string; + + /** + * Stop event propagation. + */ + final public function stopPropagation(): void + { + $this->propagationStopped = true; + } + + /** + * @inheritDoc + */ + final public function isPropagationStopped(): bool + { + return true === $this->propagationStopped; + } +} diff --git a/src/EventDispatcher/Event/FlushEvent.php b/src/EventDispatcher/Event/FlushEvent.php new file mode 100644 index 0000000..e6e2a53 --- /dev/null +++ b/src/EventDispatcher/Event/FlushEvent.php @@ -0,0 +1,54 @@ +counter = $counter; + $this->partial = $partial; + } + + /** + * @return int + */ + public function getCounter(): int + { + return $this->counter; + } + + /** + * @return bool + */ + public function isPartial(): bool + { + return $this->partial; + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return EtlEvents::FLUSH; + } +} diff --git a/src/EventDispatcher/Event/ItemEvent.php b/src/EventDispatcher/Event/ItemEvent.php new file mode 100644 index 0000000..15ebfbc --- /dev/null +++ b/src/EventDispatcher/Event/ItemEvent.php @@ -0,0 +1,55 @@ +name = $name; + $this->item = $item; + $this->key = $key; + parent::__construct($etl); + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return $this->name; + } + + /** + * @return mixed + */ + public function getItem() + { + return $this->item; + } + + /** + * @return mixed + */ + public function getKey() + { + return $this->key; + } +} diff --git a/src/EventDispatcher/Event/ItemExceptionEvent.php b/src/EventDispatcher/Event/ItemExceptionEvent.php new file mode 100644 index 0000000..43de09f --- /dev/null +++ b/src/EventDispatcher/Event/ItemExceptionEvent.php @@ -0,0 +1,94 @@ +name = $name; + $this->item = $item; + $this->key = $key; + $this->exception = $exception; + parent::__construct($etl); + } + + /** + * @return \Throwable + */ + public function getException(): \Throwable + { + return $this->exception; + } + + /** + * @return bool + */ + public function shouldThrowException(): bool + { + return $this->shouldBeThrown; + } + + /** + * Exception should not be thrown. + * Implicitely skips the current item. + */ + public function ignoreException(): void + { + $this->shouldBeThrown = false; + $this->etl->skipCurrentItem(); + } + + /** + * @return mixed + */ + public function getItem() + { + return $this->item; + } + + /** + * @return mixed + */ + public function getKey() + { + return $this->key; + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return $this->name; + } +} diff --git a/src/EventDispatcher/Event/RollbackEvent.php b/src/EventDispatcher/Event/RollbackEvent.php new file mode 100644 index 0000000..08ffb5b --- /dev/null +++ b/src/EventDispatcher/Event/RollbackEvent.php @@ -0,0 +1,41 @@ +counter = $counter; + } + + /** + * @return int + */ + public function getCounter(): int + { + return $this->counter; + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return EtlEvents::ROLLBACK; + } +} diff --git a/src/EventDispatcher/Event/StartProcessEvent.php b/src/EventDispatcher/Event/StartProcessEvent.php new file mode 100644 index 0000000..c7c8166 --- /dev/null +++ b/src/EventDispatcher/Event/StartProcessEvent.php @@ -0,0 +1,16 @@ +addListener($listener); + } + } + + /** + * @param EventListener $eventListener + */ + private function addListener(EventListener $eventListener): void + { + $this->listeners[] = $eventListener; + } + + + /** + * @inheritDoc + */ + public function getListenersForEvent(object $event): iterable + { + if (!$event instanceof EtlEvent) { + return []; + } + + $listenersForEvent = \array_filter( + $this->listeners, + function (EventListener $eventListener) use ($event) { + return $eventListener->getEventName() === $event->getName(); + } + ); + + \usort( + $listenersForEvent, + function (EventListener $a, EventListener $b) { + return $b->getPriority() <=> $a->getPriority(); + } + ); + + return \array_map( + function (EventListener $eventListener) { + return $eventListener->getCallable(); + }, + $listenersForEvent + ); + } + + /** + * @inheritDoc + */ + public function dispatch(object $event) + { + if (!$event instanceof EtlEvent) { + return $event; + } + + $listeners = $this->getListenersForEvent($event); + + foreach ($listeners as $listen) { + if ($event->isPropagationStopped()) { + break; + } + + $listen($event); + } + + return $event; + } +} diff --git a/src/EventDispatcher/EventListener.php b/src/EventDispatcher/EventListener.php new file mode 100644 index 0000000..1e23483 --- /dev/null +++ b/src/EventDispatcher/EventListener.php @@ -0,0 +1,55 @@ +eventName = $eventName; + $this->listener = $listener; + $this->priority = $priority; + } + + /** + * @return string + */ + public function getEventName(): ?string + { + return $this->eventName; + } + + /** + * @return callable + */ + public function getCallable(): callable + { + return $this->listener; + } + + /** + * @return int + */ + public function getPriority(): int + { + return $this->priority; + } +} diff --git a/src/Exception/EtlException.php b/src/Exception/EtlException.php new file mode 100644 index 0000000..9fb02ec --- /dev/null +++ b/src/Exception/EtlException.php @@ -0,0 +1,8 @@ +ignore = $shouldIgnore; - } - - /** - * @param bool $shouldStop - * @param bool $shouldFlush - */ - public function stop(bool $shouldStop, $shouldFlush = true) - { - $this->stop = $shouldStop; - $this->flush = $shouldFlush; - } - - /** - * @return bool - */ - public function shouldIgnore() - { - return $this->ignore; - } - - /** - * @return bool - */ - public function shouldStop() - { - return $this->stop; - } - - /** - * @return bool - */ - public function shouldFlush() - { - return $this->flush; - } -} diff --git a/src/Extractor/ArrayPropertyExtractor.php b/src/Extractor/ArrayPropertyExtractor.php deleted file mode 100644 index b5d1e41..0000000 --- a/src/Extractor/ArrayPropertyExtractor.php +++ /dev/null @@ -1,62 +0,0 @@ -property = $property; - $this->shift = $shift; - } - - /** - * @inheritdoc - */ - public function __invoke($key, $value): ContextElementInterface - { - $class = $this->getClass(); - if (!is_a($class, ContextElementInterface::class, true)) { - throw new \RuntimeException(sprintf('%s should implement %s.', $class, ContextElementInterface::class)); - } - - /** - * @var ContextElementInterface $element - */ - $element = new $class; - - if (is_array($value)) { - if (!array_key_exists($this->property, $value)) { - throw new \RuntimeException(sprintf('This array does not contain a \'%s\' property', $this->property)); - } - $element->setId($value[$this->property]); - if (true === $this->shift) { - unset($value[$this->property]); - } - } - - $element->setData($value); - return $element; - } -} diff --git a/src/Extractor/CallbackExtractor.php b/src/Extractor/CallbackExtractor.php deleted file mode 100644 index 8a2d8c8..0000000 --- a/src/Extractor/CallbackExtractor.php +++ /dev/null @@ -1,41 +0,0 @@ -callback = $callback; - } - - /** - * @inheritDoc - */ - public function __invoke($key, $value): ContextElementInterface - { - $element = parent::__invoke($key, $value); - $callback = $this->callback; - $callback($element); - return $element; - } -} diff --git a/src/Extractor/CsvExtractor.php b/src/Extractor/CsvExtractor.php new file mode 100644 index 0000000..10029d2 --- /dev/null +++ b/src/Extractor/CsvExtractor.php @@ -0,0 +1,78 @@ +delimiter = $delimiter; + $this->enclosure = $enclosure; + $this->escapeString = $escapeString; + $this->createKeys = $createKeys; + $this->inputType = $inputType; + } + + /** + * @inheritDoc + */ + public function extract($input, Etl $etl): iterable + { + switch ($this->inputType) { + case self::INPUT_STRING: + $iterator = CsvStringIterator::createFromText($input, $this->delimiter, $this->enclosure, $this->escapeString); + break; + case self::INPUT_FILE: + $iterator = CsvFileIterator::createFromFilename($input, $this->delimiter, $this->enclosure, $this->escapeString); + break; + default: + throw new \InvalidArgumentException('Invalid input.'); + } + + return true === $this->createKeys ? new KeysAwareCsvIterator($iterator) : $iterator; + } +} diff --git a/src/Extractor/ExtractorInterface.php b/src/Extractor/ExtractorInterface.php index f6d9424..6cccd3a 100644 --- a/src/Extractor/ExtractorInterface.php +++ b/src/Extractor/ExtractorInterface.php @@ -2,17 +2,15 @@ namespace BenTools\ETL\Extractor; -use BenTools\ETL\Context\ContextElementInterface; +use BenTools\ETL\Etl; interface ExtractorInterface { /** - * Creates a context element. - * - * @param $key - * @param $value - * @return ContextElementInterface + * @param mixed $input + * @param Etl $etl + * @return iterable */ - public function __invoke($key, $value): ContextElementInterface; + public function extract($input, Etl $etl): iterable; } diff --git a/src/Extractor/FileExtractor.php b/src/Extractor/FileExtractor.php new file mode 100644 index 0000000..89e198f --- /dev/null +++ b/src/Extractor/FileExtractor.php @@ -0,0 +1,31 @@ +contentExtractor = $contentExtractor; + } + + /** + * @inheritDoc + */ + public function extract(/*string */$filename, Etl $etl): iterable + { + return $this->contentExtractor->extract(file_get_contents($filename), $etl); + } +} diff --git a/src/Extractor/IncrementorExtractor.php b/src/Extractor/IncrementorExtractor.php deleted file mode 100644 index cdf3f48..0000000 --- a/src/Extractor/IncrementorExtractor.php +++ /dev/null @@ -1,40 +0,0 @@ -index = $startAt - 1; - parent::__construct($class); - } - - /** - * @return int - */ - public function getIndex() - { - return $this->index; - } - - /** - * @inheritDoc - */ - public function __invoke($key, $value): ContextElementInterface - { - $element = parent::__invoke($key, $value); - $element->setId(++$this->index); - return $element; - } -} diff --git a/src/Extractor/JsonExtractor.php b/src/Extractor/JsonExtractor.php new file mode 100644 index 0000000..082ff36 --- /dev/null +++ b/src/Extractor/JsonExtractor.php @@ -0,0 +1,31 @@ +extract(\Safe\file_get_contents($json), $etl); + } + + throw $e; + } + } +} diff --git a/src/Extractor/KeyValueExtractor.php b/src/Extractor/KeyValueExtractor.php deleted file mode 100644 index 4df30a8..0000000 --- a/src/Extractor/KeyValueExtractor.php +++ /dev/null @@ -1,64 +0,0 @@ - value iterator (=> the key) - */ -class KeyValueExtractor implements ExtractorInterface -{ - - const DEFAULT_CLASS = ContextElement::class; - - protected $class = self::DEFAULT_CLASS; - - /** - * KeyValueExtractor constructor. - * - * @param string $class - */ - public function __construct(string $class = self::DEFAULT_CLASS) - { - $this->class = $class; - } - - /** - * @return string - */ - public function getClass() - { - return $this->class; - } - - /** - * @param string $class - * @return $this - Provides Fluent Interface - */ - public function setClass($class) - { - $this->class = $class; - return $this; - } - - /** - * @inheritDoc - */ - public function __invoke($key, $value): ContextElementInterface - { - $class = $this->getClass(); - if (!is_a($class, ContextElementInterface::class, true)) { - throw new \RuntimeException(sprintf('%s should implement %s.', $class, ContextElementInterface::class)); - } - /** - * @var ContextElementInterface $element - */ - $element = new $class; - $element->setId($key); - $element->setData($value); - return $element; - } -} diff --git a/src/Extractor/ObjectPropertyExtractor.php b/src/Extractor/ObjectPropertyExtractor.php deleted file mode 100644 index 7c3a426..0000000 --- a/src/Extractor/ObjectPropertyExtractor.php +++ /dev/null @@ -1,57 +0,0 @@ -property = $property; - } - - /** - * @inheritdoc - */ - public function __invoke($key, $value): ContextElementInterface - { - $class = $this->getClass(); - if (!is_a($class, ContextElementInterface::class, true)) { - throw new \RuntimeException(sprintf('%s should implement %s.', $class, ContextElementInterface::class)); - } - - /** - * @var ContextElementInterface $element - */ - $element = new $class; - - if (is_object($value)) { - if (!property_exists($value, $this->property)) { - throw new \RuntimeException(sprintf('This object does not contain a \'%s\' property', $this->property)); - } - $element->setId($value->{$this->property}); - } - - $element->setData($value); - return $element; - } -} diff --git a/src/Iterator/CsvFileIterator.php b/src/Iterator/CsvFileIterator.php index 5ae05b2..d980136 100644 --- a/src/Iterator/CsvFileIterator.php +++ b/src/Iterator/CsvFileIterator.php @@ -5,7 +5,7 @@ use FilterIterator; use SplFileObject; -class CsvFileIterator extends FilterIterator implements CsvIteratorInterface, \Countable +final class CsvFileIterator extends FilterIterator implements CsvIteratorInterface, \Countable { private $nbLines; @@ -45,7 +45,7 @@ public function accept() { $current = $this->getInnerIterator()->current(); return !empty( - array_filter( + \array_filter( $current, function ($cell) { return null !== $cell; @@ -61,7 +61,7 @@ public function count() { if (null === $this->nbLines) { $this->rewind(); - $this->nbLines = count(iterator_to_array($this)); + $this->nbLines = \count(\iterator_to_array($this)); } return $this->nbLines; diff --git a/src/Iterator/CsvStringIterator.php b/src/Iterator/CsvStringIterator.php index c4a078e..dfc2f7f 100644 --- a/src/Iterator/CsvStringIterator.php +++ b/src/Iterator/CsvStringIterator.php @@ -4,7 +4,7 @@ use IteratorAggregate; -class CsvStringIterator implements IteratorAggregate, CsvIteratorInterface +final class CsvStringIterator implements IteratorAggregate, CsvIteratorInterface { /** @@ -49,7 +49,6 @@ public function __construct( /** * @param string $text - * @param bool $skipEmptyLines * @param string $delimiter * @param string $enclosure * @param string $escapeString @@ -57,13 +56,12 @@ public function __construct( */ public static function createFromText( string $text, - bool $skipEmptyLines = true, $delimiter = ',', $enclosure = '"', $escapeString = '\\' ) { - return new static(new TextLinesIterator($text, $skipEmptyLines), $delimiter, $enclosure, $escapeString); + return new static(new TextLinesIterator($text, true), $delimiter, $enclosure, $escapeString); } /** @@ -72,7 +70,7 @@ public static function createFromText( public function getIterator() { foreach ($this->stringIterator as $line) { - yield str_getcsv($line, $this->delimiter, $this->enclosure, $this->escapeString); + yield \str_getcsv($line, $this->delimiter, $this->enclosure, $this->escapeString); } } } diff --git a/src/Iterator/JsonIterator.php b/src/Iterator/JsonIterator.php deleted file mode 100644 index f59497c..0000000 --- a/src/Iterator/JsonIterator.php +++ /dev/null @@ -1,38 +0,0 @@ -json = $json; - } elseif (is_string($json)) { - $this->json = new \ArrayIterator(json_decode($json, true)); - } elseif ($json instanceof \stdClass || is_array($json)) { - $this->json = new \ArrayIterator((array) $json); - } else { - throw new \InvalidArgumentException("Invalid json input"); - } - } - - /** - * @inheritDoc - */ - public function getIterator() - { - return $this->json; - } -} diff --git a/src/Iterator/KeysAwareCsvIterator.php b/src/Iterator/KeysAwareCsvIterator.php index 36eb646..024210a 100644 --- a/src/Iterator/KeysAwareCsvIterator.php +++ b/src/Iterator/KeysAwareCsvIterator.php @@ -4,7 +4,7 @@ use IteratorAggregate; -class KeysAwareCsvIterator implements IteratorAggregate, CsvIteratorInterface +final class KeysAwareCsvIterator implements IteratorAggregate, CsvIteratorInterface { /** * @var CsvIteratorInterface diff --git a/src/Iterator/TextLinesIterator.php b/src/Iterator/TextLinesIterator.php index 116fe1b..ab16777 100644 --- a/src/Iterator/TextLinesIterator.php +++ b/src/Iterator/TextLinesIterator.php @@ -3,8 +3,9 @@ namespace BenTools\ETL\Iterator; use IteratorAggregate; +use function Safe\preg_split; -class TextLinesIterator implements IteratorAggregate, StringIteratorInterface +final class TextLinesIterator implements IteratorAggregate, StringIteratorInterface { /** * @var string @@ -57,10 +58,10 @@ private function traverseWithPregSplit() */ private function traverseWithStrTok() { - $tok = strtok($this->content, "\r\n"); + $tok = \strtok($this->content, "\r\n"); while (false !== $tok) { $line = $tok; - $tok = strtok("\n\r"); + $tok = \strtok("\n\r"); yield $line; } } diff --git a/src/Loader/ArrayLoader.php b/src/Loader/ArrayLoader.php index 43e7d87..5ae5f31 100644 --- a/src/Loader/ArrayLoader.php +++ b/src/Loader/ArrayLoader.php @@ -2,9 +2,9 @@ namespace BenTools\ETL\Loader; -use BenTools\ETL\Context\ContextElementInterface; +use BenTools\ETL\Etl; -class ArrayLoader implements LoaderInterface +final class ArrayLoader implements LoaderInterface { /** @@ -12,29 +12,62 @@ class ArrayLoader implements LoaderInterface */ protected $array; + /** + * @var bool + */ + private $preserveKeys; + /** * ArrayLoader constructor. * * @param array $array */ - public function __construct(array &$array = []) + public function __construct(bool $preserveKeys = true, array &$array = []) { $this->array = &$array; + $this->preserveKeys = $preserveKeys; } /** - * @return array + * @inheritDoc */ - public function getArray() + public function init(): void { - return $this->array; } /** * @inheritDoc */ - public function __invoke(ContextElementInterface $element): void + public function load(\Generator $items, $key, Etl $etl): void { - $this->array[$element->getId()] = $element->getData(); + foreach ($items as $v) { + if ($this->preserveKeys) { + $this->array[$key] = $v; + } else { + $this->array[] = $v; + } + } + } + + /** + * @inheritDoc + */ + public function rollback(): void + { + } + + /** + * @inheritDoc + */ + public function commit(bool $partial): void + { + } + + /** + * @return array + */ + public function getArray(): array + { + return $this->array; } } diff --git a/src/Loader/CsvFileLoader.php b/src/Loader/CsvFileLoader.php index 61ef6cd..25d3fbd 100644 --- a/src/Loader/CsvFileLoader.php +++ b/src/Loader/CsvFileLoader.php @@ -2,11 +2,12 @@ namespace BenTools\ETL\Loader; -use BenTools\ETL\Context\ContextElementInterface; -use Psr\Log\LoggerInterface; +use BenTools\ETL\Etl; +use SplFileObject; -class CsvFileLoader extends FileLoader +final class CsvFileLoader implements LoaderInterface { + private $file; /** * @var string @@ -17,6 +18,7 @@ class CsvFileLoader extends FileLoader * @var string */ private $enclosure; + /** * @var string */ @@ -27,76 +29,54 @@ class CsvFileLoader extends FileLoader */ private $keys; - /** - * @var bool - */ - private $startedWriting = false; - /** * @inheritDoc */ public function __construct( - \SplFileObject $file, - LoggerInterface $logger = null, + SplFileObject $file, $delimiter = ',', $enclosure = '"', $escape = '\\', array $keys = [] ) { - parent::__construct($file, $logger); + $this->file = $file; $this->delimiter = $delimiter; $this->enclosure = $enclosure; - $this->escape = $escape; + $this->escape = $escape; $this->keys = $keys; } /** - * @return array + * @inheritDoc */ - public function getKeys() + public function load(\Generator $generator, $key, Etl $etl): void { - return $this->keys; + foreach ($generator as $row) { + $this->file->fputcsv($row, $this->delimiter, $this->enclosure, $this->escape); + } } /** - * @param array $keys - * @return $this - Provides Fluent Interface + * @inheritDoc */ - public function setKeys(array $keys) + public function init(): void { - if (true === $this->startedWriting) { - throw new \RuntimeException("It is too late to set the keys, the loader has already started writing."); + if (!empty($this->keys)) { + $this->file->fputcsv($this->keys, $this->delimiter, $this->enclosure, $this->escape); } - - $this->keys = $keys; - return $this; } /** * @inheritDoc */ - public function __invoke(ContextElementInterface $element): void + public function commit(bool $partial): void { - if (!empty($this->keys) && false === $this->startedWriting) { - if (false !== (bool) $this->file->fputcsv($this->keys, $this->delimiter, $this->enclosure, $this->escape)) { - $this->startedWriting = true; - } - } - - $bytes = $this->file->fputcsv($element->getData(), $this->delimiter, $this->enclosure, $this->escape); - - if (0 !== $bytes && false === $this->startedWriting) { - $this->startedWriting = true; - } + } - $this->logger->debug( - 'Write a field array as a CSV line', - [ - 'id' => $element->getId(), - 'data' => $element->getData(), - 'filename' => $this->file->getBasename(), - 'bytes' => $bytes - ] - ); + /** + * @inheritDoc + */ + public function rollback(): void + { } } diff --git a/src/Loader/DebugLoader.php b/src/Loader/DebugLoader.php deleted file mode 100644 index 522a46c..0000000 --- a/src/Loader/DebugLoader.php +++ /dev/null @@ -1,40 +0,0 @@ -debugFn = $debugFn; - } - - /** - * @inheritDoc - */ - public function shouldFlushAfterLoad(): bool - { - return false; - } - - /** - * @inheritDoc - */ - public function flush(): void - { - $debugFn = $this->debugFn; - if (!is_callable($debugFn)) { - throw new \RuntimeException("The debug function is not callable"); - } - $debugFn($this->array); - } -} diff --git a/src/Loader/DoctrineORMLoader.php b/src/Loader/DoctrineORMLoader.php index 37bdb4a..e0980c0 100644 --- a/src/Loader/DoctrineORMLoader.php +++ b/src/Loader/DoctrineORMLoader.php @@ -2,14 +2,13 @@ namespace BenTools\ETL\Loader; -use BenTools\ETL\Context\ContextElementInterface; +use BenTools\ETL\Etl; use Doctrine\Common\Persistence\ManagerRegistry; use Doctrine\Common\Persistence\ObjectManager; +use Doctrine\Common\Util\ClassUtils; use Psr\Log\LoggerAwareTrait; -use Psr\Log\LoggerInterface; -use Psr\Log\NullLogger; -class DoctrineORMLoader implements FlushableLoaderInterface +final class DoctrineORMLoader implements LoaderInterface { use LoggerAwareTrait; @@ -19,16 +18,6 @@ class DoctrineORMLoader implements FlushableLoaderInterface */ private $managerRegistry; - /** - * @var int - */ - private $flushEvery = 1; - - /** - * @var int - */ - private $counter = 0; - /** * @var ObjectManager[] */ @@ -38,83 +27,63 @@ class DoctrineORMLoader implements FlushableLoaderInterface * DoctrineORMLoader constructor. * * @param ManagerRegistry $managerRegistry - * @param int $flushEvery - * @param LoggerInterface|null $logger */ - public function __construct(ManagerRegistry $managerRegistry, int $flushEvery = 1, LoggerInterface $logger = null) + public function __construct(ManagerRegistry $managerRegistry) { $this->managerRegistry = $managerRegistry; - $this->flushEvery = $flushEvery; - $this->logger = $logger ?? new NullLogger(); } /** - * @param int $flushEvery - * @return $this - Provides Fluent Interface + * @inheritDoc */ - public function setFlushEvery(int $flushEvery) + public function init(): void { - $this->flushEvery = $flushEvery; - return $this; } /** * @inheritDoc */ - public function shouldFlushAfterLoad(): bool + public function load(\Generator $entities, $key, Etl $etl): void { - return 0 !== $this->flushEvery // Otherwise we'll wait on an explicit flush() call - && 0 === ($this->counter % $this->flushEvery); + foreach ($entities as $entity) { + if (!is_object($entity)) { + throw new \InvalidArgumentException("The transformed data should return a generator of entities."); + } + + $className = ClassUtils::getClass($entity); + $objectManager = $this->managerRegistry->getManagerForClass($className); + if (null === $objectManager) { + throw new \RuntimeException(sprintf("Unable to locate Doctrine manager for class %s.", $className)); + } + + $objectManager->persist($entity); + + if (!in_array($objectManager, $this->objectManagers)) { + $this->objectManagers[] = $objectManager; + } + } } + /** * @inheritDoc */ - public function flush(): void + public function rollback(): void { foreach ($this->objectManagers as $objectManager) { - $objectManager->flush(); + $objectManager->clear(); } - $this->logger->debug(sprintf('Doctrine: flushed %d entities', $this->counter)); $this->objectManagers = []; - $this->counter = 0; } /** * @inheritDoc */ - public function __invoke(ContextElementInterface $element): void + public function commit(bool $partial): void { - $entity = $element->getData(); - - if (!is_object($entity)) { - throw new \InvalidArgumentException("The transformed data should return an entity object."); - } - - $className = get_class($entity); - $objectManager = $this->managerRegistry->getManagerForClass($className); - if (null === $objectManager) { - throw new \RuntimeException(sprintf("Unable to locate Doctrine manager for class %s.", $className)); - } - - $objectManager->persist($entity); - $this->logger->debug( - 'Loading entity', - [ - 'class' => $className, - 'id' => $element->getId(), - 'data', $element->getData() - ] - ); - - if (!in_array($objectManager, $this->objectManagers)) { - $this->objectManagers[] = $objectManager; - } - - if (1 === $this->flushEvery) { - $this->flush(); + foreach ($this->objectManagers as $objectManager) { + $objectManager->flush(); } - - $this->counter++; + $this->objectManagers = []; } } diff --git a/src/Loader/FileLoader.php b/src/Loader/FileLoader.php index 8f3ec91..25a0e02 100644 --- a/src/Loader/FileLoader.php +++ b/src/Loader/FileLoader.php @@ -2,48 +2,55 @@ namespace BenTools\ETL\Loader; -use BenTools\ETL\Context\ContextElementInterface; -use Psr\Log\LoggerAwareInterface; -use Psr\Log\LoggerAwareTrait; -use Psr\Log\LoggerInterface; -use Psr\Log\NullLogger; +use BenTools\ETL\Etl; +use SplFileObject; -class FileLoader implements LoaderInterface, LoggerAwareInterface +final class FileLoader implements LoaderInterface { - use LoggerAwareTrait; - /** - * @var \SplFileObject + * @var SplFileObject */ protected $file; /** * FileLoader constructor. * - * @param \SplFileObject $file - * @param LoggerInterface $logger + * @param SplFileObject $file */ - public function __construct(\SplFileObject $file, LoggerInterface $logger = null) + public function __construct(SplFileObject $file) { $this->file = $file; - $this->logger = $logger ?? new NullLogger(); } /** * @inheritDoc */ - public function __invoke(ContextElementInterface $element): void + public function load(\Generator $items, $key, Etl $etl): void + { + foreach ($items as $item) { + $this->file->fwrite($item); + } + } + + /** + * @inheritDoc + */ + public function init(): void + { + } + + /** + * @inheritDoc + */ + public function rollback(): void + { + } + + /** + * @inheritDoc + */ + public function commit(bool $partial): void { - $bytes = $this->file->fwrite($element->getData()); - $this->logger->debug( - 'Write to file', - [ - 'id' => $element->getId(), - 'data' => $element->getData(), - 'filename' => $this->file->getBasename(), - 'bytes' => $bytes - ] - ); } } diff --git a/src/Loader/FlushableLoaderInterface.php b/src/Loader/FlushableLoaderInterface.php deleted file mode 100644 index 2eb9427..0000000 --- a/src/Loader/FlushableLoaderInterface.php +++ /dev/null @@ -1,19 +0,0 @@ -file = $file; $this->jsonOptions = $jsonOptions; $this->jsonDepth = $jsonDepth; @@ -39,18 +45,52 @@ public function __construct(\SplFileObject $file, int $jsonOptions = 0, int $jso /** * @inheritDoc */ - public function shouldFlushAfterLoad(): bool + public function load(\Generator $items, $identifier, Etl $etl): void { - return false; + foreach ($items as $key => $value) { + $this->data[$key] = $value; + } } /** * @inheritDoc */ - public function flush(): void + public function init(): void { - if (0 === $this->file->fwrite(json_encode($this->getArray(), $this->jsonOptions, $this->jsonDepth))) { + } + + /** + * @inheritDoc + */ + public function rollback(): void + { + } + + + /** + * @inheritDoc + */ + public function commit(bool $partial): void + { + if (true === $partial) { + return; + } + + if (0 === $this->file->fwrite(json_encode($this->data, $this->jsonOptions, $this->jsonDepth))) { throw new \RuntimeException(sprintf('Unable to write to %s', $this->file->getPathname())); } } + + /** + * @param string $filename + * @param int $jsonOptions + * @param int $jsonDepth + * @return JsonFileLoader + * @throws \LogicException + * @throws \RuntimeException + */ + public static function toFile(string $filename, int $jsonOptions = 0, int $jsonDepth = 512): self + { + return new self(new SplFileObject($filename, 'w'), $jsonOptions, $jsonDepth); + } } diff --git a/src/Loader/LoaderInterface.php b/src/Loader/LoaderInterface.php index 3d7e957..16d9cc2 100644 --- a/src/Loader/LoaderInterface.php +++ b/src/Loader/LoaderInterface.php @@ -2,15 +2,34 @@ namespace BenTools\ETL\Loader; -use BenTools\ETL\Context\ContextElementInterface; +use BenTools\ETL\Etl; interface LoaderInterface { /** - * Loads the element into the persistence layer (ORM, file, HTTP, ...) + * Init loader (start a transaction, if supported). + */ + public function init(): void; + + /** + * Load elements. + * + * @param \Generator $items + * @param $key + * @param Etl $etl + */ + public function load(\Generator $items, $key, Etl $etl): void; + + /** + * Flush elements (if supported). * - * @param ContextElementInterface $element + * @param bool $partial - whether or not there remains elements to process. + */ + public function commit(bool $partial): void; + + /** + * Rollback (if supported). */ - public function __invoke(ContextElementInterface $element): void; + public function rollback(): void; } diff --git a/src/Loader/NullLoader.php b/src/Loader/NullLoader.php new file mode 100644 index 0000000..c97deee --- /dev/null +++ b/src/Loader/NullLoader.php @@ -0,0 +1,40 @@ + LogLevel::INFO, + EtlEvents::EXTRACT => LogLevel::INFO, + EtlEvents::TRANSFORM => LogLevel::INFO, + EtlEvents::LOAD => LogLevel::INFO, + EtlEvents::FLUSH => LogLevel::INFO, + EtlEvents::SKIP => LogLevel::INFO, + EtlEvents::STOP => LogLevel::INFO, + EtlEvents::ROLLBACK => LogLevel::INFO, + EtlEvents::END => LogLevel::INFO, + ]; + + private const DEFAULT_EVENT_PRIORITIES = [ + EtlEvents::START => 128, + EtlEvents::EXTRACT => 128, + EtlEvents::TRANSFORM => 128, + EtlEvents::LOAD => 128, + EtlEvents::FLUSH => 128, + EtlEvents::SKIP => 128, + EtlEvents::STOP => 128, + EtlEvents::ROLLBACK => 128, + EtlEvents::END => 128, + ]; + + /** + * @var LoggerInterface + */ + private $logger; + + /** + * @var array + */ + private $logLevels; + /** + * @var array + */ + private $eventPriorities; + + /** + * LoggerRecipe constructor. + */ + public function __construct(LoggerInterface $logger, array $logLevels = [], array $eventPriorities = []) + { + $this->logger = $logger; + $this->logLevels = \array_replace(self::DEFAULT_LOG_LEVELS, $logLevels); + $this->eventPriorities = \array_replace(self::DEFAULT_EVENT_PRIORITIES, $eventPriorities); + } + + /** + * @inheritDoc + */ + public function updateBuilder(EtlBuilder $builder): EtlBuilder + { + return $builder + ->onStart( + function (EtlEvent $event) { + $this->logger->log($this->getLogLevel($event), 'Starting ETL...'); + }, + $this->getPriority(EtlEvents::START) + ) + ->onExtract( + function (ItemEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Extracted %s.', $event->getKey())); + }, + $this->getPriority(EtlEvents::EXTRACT) + ) + ->onTransform( + function (ItemEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Transformed %s.', $event->getKey())); + }, + $this->getPriority(EtlEvents::TRANSFORM) + ) + ->onLoad( + function (ItemEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Loaded %s.', $event->getKey())); + }, + $this->getPriority(EtlEvents::LOAD) + ) + ->onSkip( + function (ItemEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Skipping item %s.', $event->getKey())); + }, + $this->getPriority(EtlEvents::SKIP) + ) + ->onStop( + function (ItemEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Stopping on item %s.', $event->getKey())); + }, + $this->getPriority(EtlEvents::STOP) + ) + ->onFlush( + function (FlushEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Flushed %d items.', $event->getCounter())); + }, + $this->getPriority(EtlEvents::FLUSH) + ) + ->onRollback( + function (RollbackEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('Rollback %d items.', $event->getCounter())); + }, + $this->getPriority(EtlEvents::ROLLBACK) + ) + ->onEnd( + function (EndProcessEvent $event) { + $this->logger->log($this->getLogLevel($event), sprintf('ETL completed on %d items.', $event->getCounter())); + }, + $this->getPriority(EtlEvents::END) + ); + } + + /** + * @param EtlEvent $event + * @return string + */ + private function getLogLevel(EtlEvent $event): string + { + return $this->logLevels[$event->getName()] ?? LogLevel::INFO; + } + + /** + * @param EtlEvent $event + * @return int + */ + private function getPriority(string $eventName): int + { + return $this->eventPriorities[$eventName] ?? 128; + } +} diff --git a/src/Recipe/Recipe.php b/src/Recipe/Recipe.php new file mode 100644 index 0000000..97094f0 --- /dev/null +++ b/src/Recipe/Recipe.php @@ -0,0 +1,15 @@ +resolve(parent::__invoke($items, $extractor, $transformer, $loader)); - } - ); - return $promise; - } -} diff --git a/src/Runner/ETLRunner.php b/src/Runner/ETLRunner.php deleted file mode 100644 index 5cf861b..0000000 --- a/src/Runner/ETLRunner.php +++ /dev/null @@ -1,364 +0,0 @@ -logger = $logger ?? new NullLogger(); - $this->eventDispatcher = $eventDispatcher ?? new ETLEventDispatcher(); - } - - /** - * @inheritDoc - */ - public function __invoke(iterable $items, callable $extractor, callable $transformer = null, callable $loader) - { - - $this->start(); - - foreach ($items as $key => $value) { - try { - // Extract and create element - $element = $this->extract($extractor, $key, $value); - - if ($element->shouldSkip()) { - $this->skip($key); - continue; - } - if ($element->shouldStop()) { - $this->stop($key, $element); - break; - } - - // Transform element - $this->transform($transformer, $element); - - if ($element->shouldSkip()) { - $this->skip($key); - continue; - } - if ($element->shouldStop()) { - $this->stop($key, $element); - break; - } - - // Load element - $this->load($loader, $element); - - if ($element->shouldStop()) { - $this->stop($key, $element); - break; - } - - // Flush if necessary (the loader will decide) - $this->flush($loader, false); - } catch (ExtractionFailedException $exception) { // If extraction failed - - // We may prevent the ETL to flush. - if (false === $exception->shouldFlush()) { - $this->flush = false; - } - - // And/or we could stop right now. - if ($exception->shouldStop()) { - $this->stop($key); - break; - } - - // If the exception should be blocking, throw it - if (!$exception->shouldIgnore()) { - throw $exception; - } - } - } - - // Flush remaining data - $this->flush($loader, true); - - $this->end(); - } - - /** - * Shortcut to ETLEvents::AFTER_EXTRACT listener creation. - * - * @param callable $callback - */ - public function onExtract(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::AFTER_EXTRACT, $callback); - } - - /** - * Shortcut to ETLEvents::ON_EXTRACT_EXCEPTION listener creation. - * - * @param callable $callback - */ - public function onExtractException(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::ON_EXTRACT_EXCEPTION, $callback); - } - - /** - * Shortcut to ETLEvents::AFTER_TRANSFORM listener creation. - * - * @param callable $callback - */ - public function onTransform(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::AFTER_TRANSFORM, $callback); - } - - /** - * Shortcut to ETLEvents::ON_TRANSFORM_EXCEPTION listener creation. - * - * @param callable $callback - */ - public function onTransformException(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::ON_TRANSFORM_EXCEPTION, $callback); - } - - /** - * Shortcut to ETLEvents::AFTER_LOAD listener creation. - * - * @param callable $callback - */ - public function onLoad(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::AFTER_LOAD, $callback); - } - - /** - * Shortcut to ETLEvents::ON_LOAD_EXCEPTION listener creation. - * - * @param callable $callback - */ - public function onLoadException(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::ON_LOAD_EXCEPTION, $callback); - } - - /** - * Shortcut to ETLEvents::AFTER_FLUSH listener creation. - * - * @param callable $callback - */ - public function onFlush(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::AFTER_FLUSH, $callback); - } - - /** - * Shortcut to ETLEvents::ON_FLUSH_EXCEPTION listener creation. - * - * @param callable $callback - */ - public function onFlushException(callable $callback): void - { - $this->eventDispatcher->addListener(ETLEvents::ON_FLUSH_EXCEPTION, $callback); - } - - private function reset() - { - $this->start = 0.0; - $this->end = 0.0; - $this->flush = true; - } - - private function start() - { - $this->reset(); - $this->start = microtime(true); - $this->eventDispatcher->trigger(new ETLEvent(ETLEvents::START)); - $this->logger->info('Starting ETL...'); - } - - private function end() - { - $this->end = microtime(true); - $this->eventDispatcher->trigger(new ETLEvent(ETLEvents::END)); - $this->logger->info(sprintf('ETL completed in %ss', round($this->end - $this->start, 3))); - } - - /** - * @param callable|ExtractorInterface $extract - * @param $key - * @param $value - * @return ContextElementInterface - */ - private function extract(callable $extract, $key, $value): ContextElementInterface - { - $this->logger->info(sprintf('Extracting key %s...', $key)); - - try { - - /** - * @var ContextElementInterface $element - */ - $element = $extract($key, $value); - $this->eventDispatcher->trigger(new ContextElementEvent(ETLEvents::AFTER_EXTRACT, $element)); - } catch (\Throwable $exception) { - $extractionFailedException = new ExtractionFailedException( - $exception->getMessage(), - $exception->getCode(), - $exception - ); - $event = new ExtractExceptionEvent($extractionFailedException, $key, $value); - $this->eventDispatcher->trigger($event); - throw $extractionFailedException; - } - $this->logger->debug( - sprintf('Key %s extracted.', $key), - [ - 'id' => $element->getId(), - 'data' => $element->getData(), - ] - ); - - return $element; - } - - /** - * @param callable|TransformerInterface $transform - * @param ContextElementInterface $element - */ - private function transform(callable $transform = null, ContextElementInterface $element): void - { - if (null !== $transform) { - $identifier = $element->getId(); - $this->logger->info(sprintf('Transforming key %s...', $identifier)); - try { - $transform($element); - $this->eventDispatcher->trigger(new ContextElementEvent(ETLEvents::AFTER_TRANSFORM, $element)); - } catch (\Throwable $exception) { - $event = new ContextElementEvent(ETLEvents::ON_TRANSFORM_EXCEPTION, $element); - $event->setException($exception); - $this->eventDispatcher->trigger($event); // Event listeners may handle and remove the exception - if ($event->hasException()) { - throw $event->getException(); // Otherwise, throw it - } - } - $this->logger->debug( - sprintf('Key %s transformed.', $identifier), - [ - 'id' => $element->getId(), - 'data' => $element->getData(), - ] - ); - } - } - - /** - * @param callable|LoaderInterface $load - * @param ContextElementInterface $element - * @return $this - */ - private function load(callable $load, ContextElementInterface $element): self - { - $identifier = $element->getId(); - $this->logger->info(sprintf('Loading key %s...', $identifier)); - try { - $load($element); - $this->eventDispatcher->trigger(new ContextElementEvent(ETLEvents::AFTER_LOAD, $element)); - } catch (\Throwable $exception) { - $event = new ContextElementEvent(ETLEvents::ON_LOAD_EXCEPTION, $element); - $event->setException($exception); - $this->eventDispatcher->trigger($event); // Event listeners may handle and remove the exception - if ($event->hasException()) { - throw $event->getException(); // Otherwise, throw it - } - } - $this->logger->debug( - sprintf('Key %s loaded.', $identifier), - [ - 'id' => $element->getId(), - ] - ); - return $this; - } - - /** - * @param $loader - */ - private function flush($loader, bool $forceFlush = false): void - { - if ($this->shouldFlush() - && ($loader instanceof FlushableLoaderInterface - && (true === $forceFlush || $loader->shouldFlushAfterLoad())) - ) { - $this->logger->info('Flushing elements...'); - $loader->flush(); - $this->eventDispatcher->trigger(new ETLEvent(ETLEvents::AFTER_FLUSH)); - } - } - - /** - * @param $key - */ - private function skip($key) - { - $this->logger->info(sprintf('Skipping key %s...', $key)); - } - - /** - * @param $key - * @param ContextElementInterface|null $element - */ - private function stop($key, ContextElementInterface $element = null) - { - $this->logger->info(sprintf('Stopping on key %s...', $key)); - if (null !== $element) { - $this->flush = $element->shouldFlush(); - } - } - - /** - * @return bool - */ - private function shouldFlush(): bool - { - return $this->flush; - } -} diff --git a/src/Runner/ETLRunnerInterface.php b/src/Runner/ETLRunnerInterface.php deleted file mode 100644 index e14a8ad..0000000 --- a/src/Runner/ETLRunnerInterface.php +++ /dev/null @@ -1,18 +0,0 @@ -callable = $callable; + } + + /** + * @inheritDoc + */ + public function transform($value, $key, Etl $etl): \Generator + { + yield $key => ($this->callable)($value); + } +} diff --git a/src/Transformer/CallbackTransformer.php b/src/Transformer/CallbackTransformer.php deleted file mode 100644 index 8cd3535..0000000 --- a/src/Transformer/CallbackTransformer.php +++ /dev/null @@ -1,32 +0,0 @@ -callback = $callback; - } - - /** - * @inheritDoc - */ - public function __invoke(ContextElementInterface $element): void - { - $callback = $this->callback; - $element->setData($callback($element->getData())); - } -} diff --git a/src/Transformer/StepTransformer.php b/src/Transformer/StepTransformer.php deleted file mode 100644 index 3f03ad2..0000000 --- a/src/Transformer/StepTransformer.php +++ /dev/null @@ -1,109 +0,0 @@ -registerSteps($steps); - } - - /** - * Reset stop state - */ - public function reset(): void - { - $this->stop = false; - $this->stoppedSteps = []; - } - - /** - * Register steps, in the order they should be executed. - * - * @param array $steps - */ - public function registerSteps(array $steps): void - { - foreach ($steps as $step) { - if (!is_scalar($step)) { - throw new \InvalidArgumentException("Steps must be an array of scalar values."); - } - } - - $this->steps = $steps; - } - - /** - * Register one or several transformers for a particular step. - * - * @param $step - * @param callable|TransformerInterface $transformer - * @param int $priority - * @throws \InvalidArgumentException - */ - public function registerTransformer($step, callable $transformer, int $priority = 0): void - { - if (!in_array($step, $this->steps)) { - throw new \InvalidArgumentException(sprintf('Step "%s" is not registered.', $step)); - } - - $stack = $this->transformers[$step] ?? new TransformerStack(); - $stack->registerTransformer($transformer, $priority); - $this->transformers[$step] = $stack; - } - - /** - * @param null $step - * @throws \InvalidArgumentException - */ - public function stop($step = null) - { - if (null !== $step) { - if (!in_array($step, $this->steps)) { - throw new \InvalidArgumentException(sprintf('Step "%s" is not registered.', $step)); - } - if (null !== ($transformer = $this->getTransformerFor($step))) { - $transformer->stop(); - $this->stoppedSteps[] = $step; - } - } else { - $this->stop = true; - } - } - - /** - * @param $step - * @return TransformerStack|null - */ - public function getTransformerFor($step) - { - return $this->transformers[$step] ?? null; - } - - /** - * @inheritdoc - */ - public function __invoke(ContextElementInterface $element): void - { - $this->reset(); - foreach ($this->steps as $step) { - if (false === $this->stop - && false === in_array($step, $this->stoppedSteps, true) - && null !== ($transform = $this->getTransformerFor($step))) { - $transform($element); - } - } - } -} diff --git a/src/Transformer/TransformerInterface.php b/src/Transformer/TransformerInterface.php index 0cda655..516bc02 100644 --- a/src/Transformer/TransformerInterface.php +++ b/src/Transformer/TransformerInterface.php @@ -2,15 +2,18 @@ namespace BenTools\ETL\Transformer; -use BenTools\ETL\Context\ContextElementInterface; +use BenTools\ETL\Etl; interface TransformerInterface { /** - * Transforms data and hydrates element (should call $element->setData()) + * Transform $value. * - * @param ContextElementInterface $element + * @param $value + * @param $key + * @param Etl $etl + * @return \Generator - yield values to load */ - public function __invoke(ContextElementInterface $element): void; + public function transform($value, $key, Etl $etl): \Generator; } diff --git a/src/Transformer/TransformerStack.php b/src/Transformer/TransformerStack.php deleted file mode 100644 index aace7e1..0000000 --- a/src/Transformer/TransformerStack.php +++ /dev/null @@ -1,70 +0,0 @@ -registerTransformer($transformer); - } - } - - /** - * @param callable $transformer - * @param int $priority - */ - public function registerTransformer(callable $transformer, int $priority = 0): void - { - $this->transformers[] = [ - 'p' => $priority, - 'c' => $transformer, - ]; - } - - /** - * @inheritDoc - */ - public function getIterator() - { - usort($this->transformers, function ($a, $b) { - return $b['p'] <=> $a['p']; - }); - - foreach ($this->transformers as $transformer) { - yield $transformer['c']; - } - } - - /** - * Stops the transformer chain. - */ - public function stop() - { - $this->stop = true; - } - - /** - * @inheritDoc - */ - public function __invoke(ContextElementInterface $element): void - { - foreach ($this as $transform) { - if (false === $this->stop) { - $transform($element); - } - } - } -} diff --git a/tests/functions.php b/tests/functions.php new file mode 100644 index 0000000..6e449bb --- /dev/null +++ b/tests/functions.php @@ -0,0 +1,26 @@ +element = new ContextElement('foo', 'bar'); - } - - public function testInit() - { - $this->assertEquals('foo', $this->element->getId()); - $this->assertEquals('bar', $this->element->getData()); - $this->assertEquals(false, $this->element->shouldSkip()); - $this->assertEquals(false, $this->element->shouldStop()); - $this->assertEquals(false, $this->element->shouldFlush()); - } - - public function testChangeIdAndData() - { - $this->element->setId('bar'); - $this->element->setData('baz'); - $this->assertEquals('bar', $this->element->getId()); - $this->assertEquals('baz', $this->element->getData()); - $this->assertEquals(false, $this->element->shouldSkip()); - $this->assertEquals(false, $this->element->shouldStop()); - $this->assertEquals(false, $this->element->shouldFlush()); - } - - public function testSkip() - { - $this->element->skip(); - $this->assertEquals('foo', $this->element->getId()); - $this->assertEquals('bar', $this->element->getData()); - $this->assertEquals(true, $this->element->shouldSkip()); - $this->assertEquals(false, $this->element->shouldStop()); - $this->assertEquals(false, $this->element->shouldFlush()); - } - - public function testStopAndFlush() - { - $this->element->stop(true); - $this->assertEquals('foo', $this->element->getId()); - $this->assertEquals('bar', $this->element->getData()); - $this->assertEquals(false, $this->element->shouldSkip()); - $this->assertEquals(true, $this->element->shouldStop()); - $this->assertEquals(true, $this->element->shouldFlush()); - } - - public function testStopWithoutFlushing() - { - $this->element->stop(false); - $this->assertEquals('foo', $this->element->getId()); - $this->assertEquals('bar', $this->element->getData()); - $this->assertEquals(false, $this->element->shouldSkip()); - $this->assertEquals(true, $this->element->shouldStop()); - $this->assertEquals(false, $this->element->shouldFlush()); - } - - public function testFlush() - { - $this->element->flush(); - $this->assertEquals('foo', $this->element->getId()); - $this->assertEquals('bar', $this->element->getData()); - $this->assertEquals(false, $this->element->shouldSkip()); - $this->assertEquals(false, $this->element->shouldStop()); - $this->assertEquals(true, $this->element->shouldFlush()); - } -} diff --git a/tests/src/EtlBuilderTest.php b/tests/src/EtlBuilderTest.php new file mode 100644 index 0000000..00e387d --- /dev/null +++ b/tests/src/EtlBuilderTest.php @@ -0,0 +1,310 @@ +createEtl(); + } + + /** + * @test + * @expectedException \RuntimeException + * @expectedExceptionMessage The "flushEvery" option must be null or an integer > 0. + */ + public function it_yells_on_invalid_flush_setting() + { + $builder = EtlBuilder::init()->loadInto( + function () { + + })->flushEvery(0); + $builder->createEtl(); + } + + /** + * @test + */ + public function it_builds_an_etl_object() + { + $builder = EtlBuilder::init( + null, + null, + function () { + + } + ); + $etl = $builder->createEtl(); + $this->assertInstanceOf(Etl::class, $etl); + } + + /** + * @test + */ + public function it_correctly_builds_an_etl_object() + { + $extractor = new class implements ExtractorInterface + { + public function extract($input, Etl $etl): iterable + { + return $input['foos']; + } + }; + + $transformer = new CallableTransformer('strtoupper'); + + $loader = new class implements LoaderInterface + { + public $initiated; + public $storage; + public $committed; + public $rollback; + + public function reset() + { + $this->initiated = false; + $this->storage = []; + $this->committed = false; + $this->rollback = false; + return $this; + } + + /** + * @inheritDoc + */ + public function init(): void + { + $this->initiated = true; + } + + /** + * @inheritDoc + */ + public function load(\Generator $items, $key, Etl $etl): void + { + foreach ($items as $item) { + $this->storage[] = $item; + } + } + + /** + * @inheritDoc + */ + public function commit(bool $partial): void + { + $this->committed = true; + } + + /** + * @inheritDoc + */ + public function rollback(): void + { + $this->rollback = true; + } + }; + + $etl = EtlBuilder::init($extractor, $transformer, $loader)->createEtl(); + + $data = [ + 'foos' => [ + 'foo', + 'bar', + ], + ]; + + $etl->process($data); + + $this->assertTrue($loader->initiated); + $this->assertTrue($loader->committed); + $this->assertEquals(['FOO', 'BAR'], $loader->storage); + + $loader = $loader->reset(); + $etl = EtlBuilder::init($extractor, function ($item, $key, Etl $etl) { + $etl->stopProcessing(true); + yield; + }, + $loader + )->createEtl(); + $etl->process($data); + + $this->assertTrue($loader->rollback); + + } + + /** + * @test + */ + public function it_correctly_maps_events() + { + $data = ['foo']; + $calledEvents = []; + $logEvent = function (EtlEvent $event) use (&$calledEvents) { + $calledEvents[] = $event->getName(); + }; + $builder = EtlBuilder::init()->loadInto(new NullLoader()) + ->onStart($logEvent) + ->onExtract($logEvent) + ->onTransform($logEvent) + ->onLoad($logEvent) + ->onFlush($logEvent) + ->onSkip($logEvent) + ->onStop($logEvent) + ->onEnd($logEvent) + ->onRollback($logEvent) + ; + + $etl = $builder->createEtl(); + $etl->process($data); + + $this->assertEquals([ + EtlEvents::START, + EtlEvents::EXTRACT, + EtlEvents::TRANSFORM, + EtlEvents::LOAD, + EtlEvents::FLUSH, + EtlEvents::END, + ], $calledEvents); + } + + /** + * @test + */ + public function it_correctly_maps_skipping_events() + { + $data = ['foo', 'bar']; + $calledEvents = []; + $logEvent = function (EtlEvent $event) use (&$calledEvents) { + $calledEvents[] = $event->getName(); + }; + $builder = EtlBuilder::init()->loadInto(new NullLoader()) + ->onStart($logEvent) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getItem()) { + $event->getEtl()->skipCurrentItem(); + } + }) + ->onExtract($logEvent) + ->onTransform($logEvent) + ->onLoad($logEvent) + ->onFlush($logEvent) + ->onSkip($logEvent) + ->onStop($logEvent) + ->onEnd($logEvent) + ->onRollback($logEvent) + ; + + $etl = $builder->createEtl(); + $etl->process($data); + + $this->assertEquals([ + EtlEvents::START, + EtlEvents::EXTRACT, + EtlEvents::SKIP, + EtlEvents::EXTRACT, + EtlEvents::TRANSFORM, + EtlEvents::LOAD, + EtlEvents::FLUSH, + EtlEvents::END, + ], $calledEvents); + } + + /** + * @test + */ + public function it_correctly_maps_stop_events() + { + $data = ['foo', 'bar']; + $calledEvents = []; + $logEvent = function (EtlEvent $event) use (&$calledEvents) { + $calledEvents[] = $event->getName(); + }; + $builder = EtlBuilder::init()->loadInto(new NullLoader()) + ->onStart($logEvent) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getItem()) { + $event->getEtl()->stopProcessing(); + } + }) + ->onExtract($logEvent) + ->onTransform($logEvent) + ->onLoad($logEvent) + ->onFlush($logEvent) + ->onSkip($logEvent) + ->onStop($logEvent) + ->onEnd($logEvent) + ->onRollback($logEvent) + ; + + $etl = $builder->createEtl(); + $etl->process($data); + + $this->assertEquals([ + EtlEvents::START, + EtlEvents::EXTRACT, + EtlEvents::STOP, + EtlEvents::FLUSH, + EtlEvents::END, + ], $calledEvents); + } + + /** + * @test + */ + public function it_correctly_maps_rollback_events() + { + $data = ['foo', 'bar']; + $calledEvents = []; + $logEvent = function (EtlEvent $event) use (&$calledEvents) { + $calledEvents[] = $event->getName(); + }; + $builder = EtlBuilder::init()->loadInto(new NullLoader()) + ->onStart($logEvent) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getItem()) { + $event->getEtl()->stopProcessing(true); + } + }) + ->onExtract($logEvent) + ->onTransform($logEvent) + ->onLoad($logEvent) + ->onFlush($logEvent) + ->onSkip($logEvent) + ->onStop($logEvent) + ->onEnd($logEvent) + ->onRollback($logEvent) + ; + + $etl = $builder->createEtl(); + $etl->process($data); + + $this->assertEquals([ + EtlEvents::START, + EtlEvents::EXTRACT, + EtlEvents::STOP, + EtlEvents::ROLLBACK, + EtlEvents::END, + ], $calledEvents); + } +} diff --git a/tests/src/EtlExceptionsTest.php b/tests/src/EtlExceptionsTest.php new file mode 100644 index 0000000..7c56502 --- /dev/null +++ b/tests/src/EtlExceptionsTest.php @@ -0,0 +1,165 @@ +loadInto(new NullLoader()) + ->transformWith( + function ($item) { + if ($item instanceof \RuntimeException) { + throw $item; + } + yield $item; + }) + ->createEtl() + ; + + $etl->process($data()); + } + + /** + * @test + */ + public function exception_can_be_processed_on_extract() + { + + $data = [ + 'foo', + new \RuntimeException('Something wrong happened.'), + 'bar', + ]; + + $extractor = function (iterable $items): iterable { + + foreach ($items as $item) { + if ($item instanceof \RuntimeException) { + throw $item; + } + yield $item; + } + }; + + $etl = EtlBuilder::init() + ->loadInto($loader = new ArrayLoader($preserveKeys = false)) + ->extractFrom($extractor) + ->onExtractException( + function (ItemExceptionEvent $event) { + $event->ignoreException(); + } + ) + ->onEnd( + function (EndProcessEvent $event) use (&$counter) { + $counter = $event->getCounter(); + } + ) + ->createEtl(); + + $etl->process($data); + $this->assertEquals(['foo'], $loader->getArray()); + $this->assertEquals(1, $counter); + } + + /** + * @test + */ + public function exception_can_be_processed_on_transform() + { + + $data = function () { + yield 'foo'; + yield 'bar'; + yield 'baz'; + }; + $counter = null; + $etl = EtlBuilder::init() + ->loadInto($loader = new ArrayLoader($preserveKeys = false)) + ->transformWith( + function ($item) { + if ('bar' === $item) { + throw new \RuntimeException('I don\'t like bar.'); + } + yield $item; + }) + ->onTransformException( + function (ItemExceptionEvent $event) { + $event->ignoreException(); + }) + ->onEnd( + function (EndProcessEvent $event) use (&$counter) { + $counter = $event->getCounter(); + }) + ->createEtl() + ; + + $etl->process($data()); + + $this->assertEquals(['foo', 'baz'], $loader->getArray()); + $this->assertEquals(2, $counter); + } + + /** + * @test + */ + public function exception_can_be_processed_on_load() + { + + $data = function () { + yield 'foo'; + yield 'bar'; + yield 'baz'; + }; + $counter = null; + $array = []; + $etl = EtlBuilder::init() + ->loadInto( + function (\Generator $items) use (&$array) { + foreach ($items as $item) { + if ('bar' === $item) { + throw new \RuntimeException('I don\'t like bar.'); + } + } + $array[] = $item; + }) + ->onLoadException( + function (ItemExceptionEvent $event) { + $event->ignoreException(); + }) + ->onEnd( + function (EndProcessEvent $event) use (&$counter) { + $counter = $event->getCounter(); + }) + ->createEtl() + ; + + $etl->process($data()); + + $this->assertEquals(['foo', 'baz'], $array); + $this->assertEquals(2, $counter); + } + +} diff --git a/tests/src/Event/ContextElementEventTest.php b/tests/src/Event/ContextElementEventTest.php deleted file mode 100644 index cd6030f..0000000 --- a/tests/src/Event/ContextElementEventTest.php +++ /dev/null @@ -1,31 +0,0 @@ -assertEquals('foo', $event->getName()); - $this->assertEquals('bar', $event->getElement()->getId()); - $this->assertEquals('baz', $event->getElement()->getData()); - return $event; - } - - /** - * @depends testEventName - */ - public function testStopPropagation(ContextElementEvent $event) - { - $this->assertEquals(false, $event->isPropagationStopped()); - $event->stopPropagation(); - $this->assertEquals(true, $event->isPropagationStopped()); - } - -} diff --git a/tests/src/Event/ETLEventTest.php b/tests/src/Event/ETLEventTest.php deleted file mode 100644 index f35359a..0000000 --- a/tests/src/Event/ETLEventTest.php +++ /dev/null @@ -1,31 +0,0 @@ -assertEquals('foo', $event->getName()); - return $event; - } - - /** - * @depends testEventName - */ - public function testStopPropagation(ETLEvent $event) - { - $this->assertEquals(false, $event->isPropagationStopped()); - $event->stopPropagation(); - $this->assertEquals(true, $event->isPropagationStopped()); - } - - - -} diff --git a/tests/src/Event/EventDispatcher/Bridge/SymfonyEventDispatcherBridgeTest.php b/tests/src/Event/EventDispatcher/Bridge/SymfonyEventDispatcherBridgeTest.php deleted file mode 100644 index bb4bb04..0000000 --- a/tests/src/Event/EventDispatcher/Bridge/SymfonyEventDispatcherBridgeTest.php +++ /dev/null @@ -1,84 +0,0 @@ -eventDispatcher = new SymfonyEventDispatcherBridge(); - } - - public function testWrappedDispatcher() - { - $this->assertInstanceOf(EventDispatcher::class, $this->eventDispatcher->getWrappedDispatcher()); - } - - public function testMagicCall() - { - $bar = function () {}; - $this->eventDispatcher->addListener('foo', $bar); - $listeners = $this->eventDispatcher->getListeners('foo'); - $this->assertInternalType('array', $listeners); - $this->assertCount(1, $listeners); - $this->assertArrayHasKey(0, $listeners); - $this->assertSame($bar, $listeners[0]); - } - - public function testTrigger() - { - $received = false; - $this->eventDispatcher->addListener('foo', function () use (&$received) { - $received = true; - }); - - // Test with another name to ensure the correct event is dispatched - $this->eventDispatcher->trigger(new ETLEvent('bar')); - $this->assertEquals(false, $received); - - // Test with the correct name - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals(true, $received); - } - - public function testPropagation() - { - $value = null; - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'bar'; - }); - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'baz'; - }); - - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals('baz', $value); - } - - public function testStopPropagation() - { - $value = null; - $this->eventDispatcher->addListener('foo', function (SymfonyEvent $event) use (&$value) { - $value = 'bar'; - $event->stopPropagation(); - }); - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'baz'; - }); - - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals('bar', $value); - } - -} diff --git a/tests/src/Event/EventDispatcher/Bridge/SymfonyEventTest.php b/tests/src/Event/EventDispatcher/Bridge/SymfonyEventTest.php deleted file mode 100644 index 2740064..0000000 --- a/tests/src/Event/EventDispatcher/Bridge/SymfonyEventTest.php +++ /dev/null @@ -1,33 +0,0 @@ -assertEquals('foo', $event->getName()); - $this->assertEquals('bar', $event->getElement()->getId()); - $this->assertEquals('baz', $event->getElement()->getData()); - return $event; - } - - /** - * @depends testEventName - */ - public function testStopPropagation(SymfonyEvent $event) - { - $this->assertEquals(false, $event->isPropagationStopped()); - $event->stopPropagation(); - $this->assertEquals(true, $event->isPropagationStopped()); - } - -} diff --git a/tests/src/Event/EventDispatcher/ETLEventDispatcherTest.php b/tests/src/Event/EventDispatcher/ETLEventDispatcherTest.php deleted file mode 100644 index c12f589..0000000 --- a/tests/src/Event/EventDispatcher/ETLEventDispatcherTest.php +++ /dev/null @@ -1,66 +0,0 @@ -eventDispatcher = new ETLEventDispatcher(); - } - - public function testTrigger() - { - $received = false; - $this->eventDispatcher->addListener('foo', function () use (&$received) { - $received = true; - }); - - // Test with another name to ensure the correct event is dispatched - $this->eventDispatcher->trigger(new ETLEvent('bar')); - $this->assertEquals(false, $received); - - // Test with the correct name - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals(true, $received); - } - - public function testPropagation() - { - $value = null; - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'bar'; - }); - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'baz'; - }); - - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals('baz', $value); - } - - public function testStopPropagation() - { - $value = null; - $this->eventDispatcher->addListener('foo', function (ETLEvent $event) use (&$value) { - $value = 'bar'; - $event->stopPropagation(); - }); - $this->eventDispatcher->addListener('foo', function () use (&$value) { - $value = 'baz'; - }); - - $this->eventDispatcher->trigger(new ETLEvent('foo')); - $this->assertEquals('bar', $value); - } -} diff --git a/tests/src/EventDispatcher/EventDispatcherTest.php b/tests/src/EventDispatcher/EventDispatcherTest.php new file mode 100644 index 0000000..28d7e61 --- /dev/null +++ b/tests/src/EventDispatcher/EventDispatcherTest.php @@ -0,0 +1,129 @@ +dispatch($event); + + $this->assertEquals(['foo', 'bar'], $stack); + } + + /** + * @test + */ + public function it_knows_how_to_handle_priorities() + { + $event = new class extends EtlEvent + { + public function __construct() + { + parent::__construct(dummy_etl()); + } + + public function getName(): string + { + return 'foo'; + } + + }; + + $stack = []; + + $dispatcher = new EventDispatcher([ + new EventListener('foo', function () use (&$stack) { + $stack[] = 'foo'; + }, -50), + new EventListener('foo', function () use (&$stack) { + $stack[] = 'bar'; + }, 100), + new EventListener('foo', function () use (&$stack) { + $stack[] = 'baz'; + }), + ]); + + $dispatcher->dispatch($event); + + $this->assertEquals(['bar', 'baz', 'foo'], $stack); + } + + /** + * @test + */ + public function it_stops_propagation_when_asked_to() + { + $event = new class extends EtlEvent + { + public function __construct() + { + parent::__construct(dummy_etl()); + } + + public function getName(): string + { + return 'foo'; + } + + }; + + $stack = []; + + $dispatcher = new EventDispatcher([ + new EventListener('foo', function () use (&$stack) { + $stack[] = 'foo'; + }, -50), + new EventListener('foo', function () use (&$stack) { + $stack[] = 'bar'; + }, 100), + new EventListener('foo', function (EtlEvent $event) use (&$stack) { + $stack[] = 'baz'; + $event->stopPropagation(); + }), + ]); + + $dispatcher->dispatch($event); + + $this->assertEquals(['bar', 'baz'], $stack); + } + +} diff --git a/tests/src/Extractor/ArrayPropertyExtractorTest.php b/tests/src/Extractor/ArrayPropertyExtractorTest.php deleted file mode 100644 index 9d765f2..0000000 --- a/tests/src/Extractor/ArrayPropertyExtractorTest.php +++ /dev/null @@ -1,66 +0,0 @@ - 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ]); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals('baz', $element->getId()); - $this->assertEquals([ - 'lorem' => 'ipsum', - 'dolor' => 'sit amet', - ], $element->getData()); - } - - public function testExtractorNotShift() - { - $extract = new ArrayPropertyExtractor('bar', false); - $element = $extract('foo', [ - 'lorem' => 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ]); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals('baz', $element->getId()); - $this->assertEquals([ - 'lorem' => 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ], $element->getData()); - } - - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithNonExistentProperty() - { - - $extract = new ArrayPropertyExtractor('bar'); - $extract('foo', []); - } - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithAnInvalidContextClass() - { - $context = new class() {}; - $class = get_class($context); - $extract = new ArrayPropertyExtractor('bar', true, $class); - $extract('foo', ['bar' => 'baz']); - } -} diff --git a/tests/src/Extractor/CallbackExtractorTest.php b/tests/src/Extractor/CallbackExtractorTest.php deleted file mode 100644 index af5cc3c..0000000 --- a/tests/src/Extractor/CallbackExtractorTest.php +++ /dev/null @@ -1,49 +0,0 @@ -getData(); - $this->assertEquals('foo', $element->getId()); - $element->setId($data->bar); - }; - $extract = new CallbackExtractor($callback); - $element = $extract('foo', (object) [ - 'lorem' => 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ]); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals('baz', $element->getId()); - $this->assertEquals((object) [ - 'lorem' => 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ], $element->getData()); - } - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithAnInvalidContextClass() - { - $callback = function (ContextElementInterface $element) { - $data = $element->getData(); - $this->assertEquals('foo', $element->getId()); - $element->setId($data->bar); - }; - $context = new class() {}; - $class = get_class($context); - $extract = new CallbackExtractor($callback, $class); - $element = $extract('foo', 'bar'); - } -} diff --git a/tests/src/Extractor/IncrementorExtractorTest.php b/tests/src/Extractor/IncrementorExtractorTest.php deleted file mode 100644 index 210ca7b..0000000 --- a/tests/src/Extractor/IncrementorExtractorTest.php +++ /dev/null @@ -1,46 +0,0 @@ -assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals(0, $element->getId()); - $this->assertEquals('bar', $element->getData()); - - $element = $extract('bar', 'baz'); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals(1, $element->getId()); - $this->assertEquals('baz', $element->getData()); - - } - - public function testExtractorWithAnotherStartIndex() - { - $extract = new IncrementorExtractor(10); - $this->assertEquals(9, $extract->getIndex()); - - $element = $extract('foo', 'bar'); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals(10, $element->getId()); - $this->assertEquals('bar', $element->getData()); - - $element = $extract('bar', 'baz'); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals(11, $element->getId()); - $this->assertEquals('baz', $element->getData()); - - } - -} diff --git a/tests/src/Extractor/KeyValueExtractorTest.php b/tests/src/Extractor/KeyValueExtractorTest.php deleted file mode 100644 index 217030f..0000000 --- a/tests/src/Extractor/KeyValueExtractorTest.php +++ /dev/null @@ -1,67 +0,0 @@ -assertInstanceOf(ContextElementInterface::class, $element); - $this->assertInstanceOf(ContextElement::class, $element); - $this->assertEquals('foo', $element->getId()); - $this->assertEquals('bar', $element->getData()); - } - - public function testExtractorWithADifferentContextClass() - { - $context = new class() implements ContextElementInterface - { - public function setId($id): void {} - public function getId() {} - public function setData($data): void {} - public function getData() {} - public function skip(): void {} - public function stop(bool $flush = true): void {} - public function flush(): void {} - public function shouldSkip(): bool {} - public function shouldStop(): bool {} - public function shouldFlush(): bool {} - }; - - $class = get_class($context); - - // Check constructor - $extract = new KeyValueExtractor($class); - $element = $extract('foo', 'bar'); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertNotInstanceOf(ContextElement::class, $element); - $this->assertInstanceOf($class, $element); - - // Check setter - $extract = new KeyValueExtractor(); - $extract->setClass($class); - $element = $extract('foo', 'bar'); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertNotInstanceOf(ContextElement::class, $element); - $this->assertInstanceOf($class, $element); - } - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithAnInvalidContextClass() - { - $context = new class() {}; - $class = get_class($context); - $extract = new KeyValueExtractor($class); - $extract('foo', 'bar'); - } -} diff --git a/tests/src/Extractor/ObjectPropertyExtractorTest.php b/tests/src/Extractor/ObjectPropertyExtractorTest.php deleted file mode 100644 index a5ae1bc..0000000 --- a/tests/src/Extractor/ObjectPropertyExtractorTest.php +++ /dev/null @@ -1,49 +0,0 @@ - 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ]); - $this->assertInstanceOf(ContextElementInterface::class, $element); - $this->assertEquals('baz', $element->getId()); - $this->assertEquals((object) [ - 'lorem' => 'ipsum', - 'bar' => 'baz', - 'dolor' => 'sit amet', - ], $element->getData()); - } - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithNonExistentProperty() - { - - $extract = new ObjectPropertyExtractor('bar'); - $extract('foo', new \stdClass()); - } - - /** - * @expectedException \RuntimeException - */ - public function testExtractorWithAnInvalidContextClass() - { - $context = new class() {}; - $class = get_class($context); - $extract = new ObjectPropertyExtractor('bar', $class); - $extract('foo', (object) ['bar' => 'baz']); - } -} diff --git a/tests/src/Iterator/CsvFileIteratorTest.php b/tests/src/Iterator/CsvFileIteratorTest.php index e493e58..943d6c9 100644 --- a/tests/src/Iterator/CsvFileIteratorTest.php +++ b/tests/src/Iterator/CsvFileIteratorTest.php @@ -8,8 +8,10 @@ class CsvFileIteratorTest extends TestCase { - - public function testIterator() + /** + * @test + */ + public function it_iterates() { $iterator = CsvFileIterator::createFromFilename(TestSuite::getDataFile('dictators.csv')); $this->assertCount(3, $iterator); diff --git a/tests/src/Iterator/CsvStringIteratorTest.php b/tests/src/Iterator/CsvStringIteratorTest.php index 5315e1a..aebd989 100644 --- a/tests/src/Iterator/CsvStringIteratorTest.php +++ b/tests/src/Iterator/CsvStringIteratorTest.php @@ -8,7 +8,10 @@ class CsvStringIteratorTest extends TestCase { - public function testIterator() + /** + * @test + */ + public function it_iterates() { $text = file_get_contents(TestSuite::getDataFile('dictators.csv')); $iterator = CsvStringIterator::createFromText($text); diff --git a/tests/src/Iterator/JsonIteratorTest.php b/tests/src/Iterator/JsonIteratorTest.php deleted file mode 100644 index 04de144..0000000 --- a/tests/src/Iterator/JsonIteratorTest.php +++ /dev/null @@ -1,72 +0,0 @@ - - [ - 'country' => 'USA', - 'name' => 'Donald Trump', - ], - 'russia' => - [ - 'country' => 'Russia', - 'name' => 'Vladimir Poutine', - ], - ]; - static::$expectedObject = [ - 'usa' => - (object) [ - 'country' => 'USA', - 'name' => 'Donald Trump', - ], - 'russia' => - (object) [ - 'country' => 'Russia', - 'name' => 'Vladimir Poutine', - ], - ]; - } - - public function testIteratorWithArrayIterator() - { - $json = new \ArrayIterator(json_decode(static::$jsonString, true)); - $iterator = new JsonIterator($json); - $this->assertEquals(static::$expectedArray, iterator_to_array($iterator)); - } - - public function testIteratorWithJsonString() - { - $json = static::$jsonString; - $iterator = new JsonIterator($json); - $this->assertEquals(static::$expectedArray, iterator_to_array($iterator)); - } - - public function testIteratorWithJsonArray() - { - $json = json_decode(static::$jsonString, true); - $iterator = new JsonIterator($json); - $this->assertEquals(static::$expectedArray, iterator_to_array($iterator)); - } - - public function testIteratorWithJsonObject() - { - $json = json_decode(static::$jsonString, false); - $iterator = new JsonIterator($json); - $this->assertEquals(static::$expectedObject, iterator_to_array($iterator)); - } -} diff --git a/tests/src/Loader/ArrayLoaderTest.php b/tests/src/Loader/ArrayLoaderTest.php index 3a4b2bd..daa54a5 100644 --- a/tests/src/Loader/ArrayLoaderTest.php +++ b/tests/src/Loader/ArrayLoaderTest.php @@ -2,11 +2,10 @@ namespace BenTools\ETL\Tests\Loader; -use BenTools\ETL\Extractor\KeyValueExtractor; -use BenTools\ETL\Runner\ETLRunner; -use PHPUnit\Framework\TestCase; - use BenTools\ETL\Loader\ArrayLoader; +use PHPUnit\Framework\TestCase; +use function BenTools\ETL\Tests\create_generator; +use function BenTools\ETL\Tests\dummy_etl; class ArrayLoaderTest extends TestCase { @@ -17,10 +16,11 @@ public function testLoader() 'foo' => 'bar', 'bar' => 'baz' ]; - $extractor = new KeyValueExtractor(); + $loader = new ArrayLoader(); - $run = new ETLRunner(); - $run($items, $extractor, null, $loader); - $this->assertEquals($loader->getArray(), $items); + foreach ($items as $key => $value) { + $loader->load(create_generator([$key => $value]), $key, dummy_etl()); + } + $this->assertEquals($items, $loader->getArray()); } } diff --git a/tests/src/Loader/CsvFileLoaderTest.php b/tests/src/Loader/CsvFileLoaderTest.php index 5ae7ff9..5a658c5 100644 --- a/tests/src/Loader/CsvFileLoaderTest.php +++ b/tests/src/Loader/CsvFileLoaderTest.php @@ -2,63 +2,33 @@ namespace BenTools\ETL\Tests\Loader; -use BenTools\ETL\Context\ContextElement; -use BenTools\ETL\Context\ContextElementInterface; -use BenTools\ETL\Extractor\IncrementorExtractor; -use BenTools\ETL\Iterator\JsonIterator; -use BenTools\ETL\Runner\ETLRunner; use BenTools\ETL\Loader\CsvFileLoader; -use BenTools\ETL\Tests\TestSuite; use PHPUnit\Framework\TestCase; use SplTempFileObject; +use function BenTools\ETL\Tests\create_generator; +use function BenTools\ETL\Tests\dummy_etl; class CsvFileLoaderTest extends TestCase { - public function testLoader() + public function testLoaderWithoutKeys() { - $keys = ['country', 'name']; - $items = new JsonIterator(file_get_contents(TestSuite::getDataFile('dictators.json'))); - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) { - $data = array_values($element->getData()); - $element->setData($data); - }; - $output = new SplTempFileObject(); - $loader = new CsvFileLoader($output, null, ',', '"', '\\', $keys); - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); + $file = new SplTempFileObject(); + $loader = new CsvFileLoader($file, '|'); + $data = [ + ['Bill', 'Clinton'], + ['Richard', 'Nixon'], + ]; - $compared = file_get_contents(TestSuite::getDataFile('dictators.csv')); + $loader->load(create_generator($data), null, dummy_etl()); - $output->rewind(); - $generated = implode(null, iterator_to_array($output)); - $this->assertSame($compared, $generated); - } - - public function testKeys() - { - // Test constructor - $keys = ['country', 'name']; - $output = new SplTempFileObject(); - $loader = new CsvFileLoader($output, null, ',', '"', '\\', $keys); - $this->assertEquals(['country', 'name'], $loader->getKeys()); + $file->rewind(); - // Test setter - $loader = $loader->setKeys(['foo', 'bar']); - $this->assertInstanceOf(CsvFileLoader::class, $loader); - $this->assertEquals(['foo', 'bar'], $loader->getKeys()); - } - - /** - * @expectedException \RuntimeException - */ - public function testSetKeysTooLate() - { + $expected = [ + 'Bill|Clinton' . PHP_EOL, + 'Richard|Nixon' . PHP_EOL, + ]; + $this->assertEquals($expected, iterator_to_array($file)); - $output = new SplTempFileObject(); - $loader = new CsvFileLoader($output); - $loader(new ContextElement('foo', ['bar', 'baz'])); - $loader->setKeys(['key1', 'key2']); } } diff --git a/tests/src/Loader/DebugLoaderTest.php b/tests/src/Loader/DebugLoaderTest.php deleted file mode 100644 index 3d7bbee..0000000 --- a/tests/src/Loader/DebugLoaderTest.php +++ /dev/null @@ -1,44 +0,0 @@ - 'bar', - 'bar' => 'baz' - ]; - $extractor = new KeyValueExtractor(); - $debug = null; - $loader = new DebugLoader([], function ($data) use (&$debug) { - $debug = [ - 'myData' => $data, - ]; - }); - $run = new ETLRunner(); - $run($items, $extractor, null, $loader); - $this->assertEquals([ - 'myData' => $items, - ], $debug); - } - - /** - * @expectedException \RuntimeException - */ - public function testInvalidCallable() - { - $load = new DebugLoader([], 'not_callable'); - $load(new ContextElement('foo', 'bar')); - $load->flush(); - } -} diff --git a/tests/src/Loader/DoctrineORMLoaderTest.php b/tests/src/Loader/DoctrineORMLoaderTest.php index b07466c..e754164 100644 --- a/tests/src/Loader/DoctrineORMLoaderTest.php +++ b/tests/src/Loader/DoctrineORMLoaderTest.php @@ -2,13 +2,9 @@ namespace BenTools\ETL\Tests\Loader; -use BenTools\ETL\Context\ContextElement; -use BenTools\ETL\Event\ContextElementEvent; -use BenTools\ETL\Event\ETLEvents; -use BenTools\ETL\Event\EventDispatcher\ETLEventDispatcher; -use BenTools\ETL\Extractor\KeyValueExtractor; use BenTools\ETL\Loader\DoctrineORMLoader; -use BenTools\ETL\Runner\ETLRunner; +use function BenTools\ETL\Tests\create_generator; +use function BenTools\ETL\Tests\dummy_etl; use Doctrine\Common\Persistence\ManagerRegistry; use Doctrine\Common\Persistence\ObjectManager; use Doctrine\Common\Persistence\ObjectRepository; @@ -29,7 +25,7 @@ private function fakeEntity($id, $name) public function __construct($id, $name) { - $this->id = $id; + $this->id = $id; $this->name = $name; } @@ -56,7 +52,7 @@ private function fakeRepository($className): ObjectRepository $fakeRepository = new class($className) implements ObjectRepository { - private $storage = []; + private $storage = []; private $className = ''; public function __construct($className) @@ -116,7 +112,7 @@ private function fakeObjectManager($repositories): ObjectManager { private $repositories = []; - private $tmpStorage = []; + private $tmpStorage = []; public function __construct(array $repositories) { @@ -294,7 +290,7 @@ public function getManagerForClass($class) public function testFakeEntity() { - $entity = $this->fakeEntity('foo', 'bar'); + $entity = $this->fakeEntity('foo', 'bar'); $anotherEntity = $this->fakeEntity('bar', 'baz'); $this->assertEquals(get_class($entity), get_class($anotherEntity)); $this->assertEquals('foo', $entity->getId()); @@ -306,7 +302,7 @@ public function testFakeEntity() public function testFakeRepository() { - $entity = $this->fakeEntity('foo', 'bar'); + $entity = $this->fakeEntity('foo', 'bar'); $className = get_class($entity); $repository = $this->fakeRepository($className); @@ -322,10 +318,10 @@ public function testFakeRepository() public function testFakeObjectManager() { - $entity = $this->fakeEntity('foo', 'bar'); - $className = get_class($entity); + $entity = $this->fakeEntity('foo', 'bar'); + $className = get_class($entity); $repository = $this->fakeRepository($className); - $em = $this->fakeObjectManager([$className => $repository]); + $em = $this->fakeObjectManager([$className => $repository]); $this->assertNull($em->find($className, $entity->getId())); $this->assertFalse($em->contains($entity)); @@ -343,19 +339,19 @@ public function testFakeObjectManager() public function testFakeRegistry() { - $entity = $this->fakeEntity('foo', 'bar'); - $className = get_class($entity); + $entity = $this->fakeEntity('foo', 'bar'); + $className = get_class($entity); $repository = $this->fakeRepository($className); - $em = $this->fakeObjectManager([$className => $repository]); - $registry = $this->fakeManagerRegistry(['default' => $em]); + $em = $this->fakeObjectManager([$className => $repository]); + $registry = $this->fakeManagerRegistry(['default' => $em]); $this->assertSame($registry->getManagerForClass($className), $em); } public function testLoaderWithDefaultSettings() { - $entity = $this->fakeEntity('foo', 'bar'); + $entity = $this->fakeEntity('foo', 'bar'); $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); + $className = get_class($entity); $registry = $this->fakeManagerRegistry( [ @@ -372,22 +368,26 @@ public function testLoaderWithDefaultSettings() $this->assertNull($repository->find($anotherEntity->getId())); // Try to load 1st entity. - $load = new DoctrineORMLoader($registry); - $load(new ContextElement($entity->getId(), $entity)); + $loader = new DoctrineORMLoader($registry); + $loader->load(create_generator([$entity->getId() => $entity]), $entity->getId(), dummy_etl()); + $loader->commit(false); + $this->assertTrue($em->contains($entity)); + $this->assertNotNull($repository->find($entity->getId())); $this->assertTrue($em->contains($entity)); $this->assertNotNull($repository->find($entity->getId())); // Try to load 2nd entity - $load(new ContextElement($anotherEntity->getId(), $anotherEntity)); + $loader->load(create_generator([$anotherEntity->getId() => $anotherEntity]), $anotherEntity->getId(), dummy_etl()); + $loader->commit(false); $this->assertTrue($em->contains($anotherEntity)); $this->assertNotNull($repository->find($anotherEntity->getId())); } public function testLoaderWithBufferedFlush() { - $entity = $this->fakeEntity('foo', 'bar'); + $entity = $this->fakeEntity('foo', 'bar'); $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); + $className = get_class($entity); $registry = $this->fakeManagerRegistry( [ @@ -403,129 +403,34 @@ public function testLoaderWithBufferedFlush() $this->assertNull($repository->find($entity->getId())); $this->assertNull($repository->find($anotherEntity->getId())); - $eventDispatcher = new ETLEventDispatcher(); - $eventDispatcher->addListener(ETLEvents::AFTER_LOAD, function (ContextElementEvent $event) use ($em, $repository) { - $loadedEntity = $event->getElement()->getData(); - $this->assertTrue($em->contains($loadedEntity)); // After load, the entity should be present in the unit of work - $this->assertNull($repository->find($loadedEntity->getId())); // But it should not be flushed yet - }); - - // Init ETL - $entities = [ - $entity, - $anotherEntity, - ]; - $extract = new KeyValueExtractor(); - - // Test with constructor - $flushEvery = 2; - $load = new DoctrineORMLoader($registry, $flushEvery); - $run = new ETLRunner(null, $eventDispatcher); - - $run($entities, $extract, null, $load); - $this->assertTrue($em->contains($entity)); - $this->assertNotNull($repository->find($entity->getId())); - $this->assertTrue($em->contains($anotherEntity)); - $this->assertNotNull($repository->find($anotherEntity->getId())); + $loader = new DoctrineORMLoader($registry); + $loader->load(create_generator([$entity->getId() => $entity]), $entity->getId(), dummy_etl()); + $this->assertTrue($em->contains($entity)); // After load, the entity should be present in the unit of work + $this->assertNull($repository->find($entity->getId())); // But it should not be flushed yet - } - public function testLoaderWithBufferedFlushSetter() - { - $entity = $this->fakeEntity('foo', 'bar'); - $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); + $loader->load(create_generator([$anotherEntity->getId() => $anotherEntity]), $anotherEntity->getId(), dummy_etl()); + $this->assertTrue($em->contains($anotherEntity)); // After load, the entity should be present in the unit of work + $this->assertNull($repository->find($anotherEntity->getId())); // But it should not be flushed yet - $registry = $this->fakeManagerRegistry( - [ - 'default' => $em = $this->fakeObjectManager([ - $className => $repository = $this->fakeRepository($className) - ]) - ] - ); - - // The storage should be empty - $this->assertFalse($em->contains($entity)); - $this->assertFalse($em->contains($anotherEntity)); - $this->assertNull($repository->find($entity->getId())); - $this->assertNull($repository->find($anotherEntity->getId())); + $loader->commit(false); - $eventDispatcher = new ETLEventDispatcher(); - $eventDispatcher->addListener(ETLEvents::AFTER_LOAD, function (ContextElementEvent $event) use ($em, $repository) { - $loadedEntity = $event->getElement()->getData(); - $this->assertTrue($em->contains($loadedEntity)); // After load, the entity should be present in the unit of work - $this->assertNull($repository->find($loadedEntity->getId())); // But it should not be flushed yet - }); - - // Init ETL - $entities = [ - $entity, - $anotherEntity, - ]; - $extract = new KeyValueExtractor(); - - // Test with constructor - $flushEvery = 2; - $load = new DoctrineORMLoader($registry); - $run = new ETLRunner(null, $eventDispatcher); - - $load->setFlushEvery($flushEvery); - $run($entities, $extract, null, $load); - $this->assertTrue($em->contains($entity)); + // Both entities should be flushed now $this->assertNotNull($repository->find($entity->getId())); - $this->assertTrue($em->contains($anotherEntity)); $this->assertNotNull($repository->find($anotherEntity->getId())); } - - public function testLoaderWillWaitForFlush() - { - $entity = $this->fakeEntity('foo', 'bar'); - $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); - - $registry = $this->fakeManagerRegistry( - [ - 'default' => $em = $this->fakeObjectManager([ - $className => $repository = $this->fakeRepository($className) - ]) - ] - ); - - // The storage should be empty - $this->assertFalse($em->contains($entity)); - $this->assertFalse($em->contains($anotherEntity)); - $this->assertNull($repository->find($entity->getId())); - $this->assertNull($repository->find($anotherEntity->getId())); - - // Try to load 1st entity - it should not be flushed - $load = new DoctrineORMLoader($registry, 0); - $load(new ContextElement($entity->getId(), $entity)); - $this->assertTrue($em->contains($entity)); - $this->assertNull($repository->find($entity->getId())); - - // Try to load 2nd entity - it should not be flushed - $load(new ContextElement($anotherEntity->getId(), $anotherEntity)); - $this->assertTrue($em->contains($anotherEntity)); - $this->assertNull($repository->find($anotherEntity->getId())); - - // Now, flush manually - $load->flush(); - $this->assertNotNull($repository->find($entity->getId())); - $this->assertNotNull($repository->find($anotherEntity->getId())); - } - /** * @expectedException \InvalidArgumentException - * @expectedExceptionMessageRegExp #The transformed data should return an entity object.# + * @expectedExceptionMessageRegExp #The transformed data should return a generator of entities.# */ public function testInvalidData() { $registry = $this->fakeManagerRegistry([]); - $load = new DoctrineORMLoader($registry); - $load(new ContextElement('foo', ['bar'])); + $loader = new DoctrineORMLoader($registry); + $loader->load(create_generator(['foo' => 'bar']), null, dummy_etl()); } /** @@ -535,10 +440,9 @@ public function testInvalidData() public function testInvalidEntityManager() { $registry = $this->fakeManagerRegistry([]); - $load = new DoctrineORMLoader($registry); - $load(new ContextElement('foo', new \stdClass())); + $loader = new DoctrineORMLoader($registry); + $loader->load(create_generator([new \stdClass()]), null, dummy_etl()); } - } diff --git a/tests/src/Loader/FileLoaderTest.php b/tests/src/Loader/FileLoaderTest.php index 0310605..40f8514 100644 --- a/tests/src/Loader/FileLoaderTest.php +++ b/tests/src/Loader/FileLoaderTest.php @@ -5,6 +5,8 @@ use BenTools\ETL\Extractor\IncrementorExtractor; use BenTools\ETL\Runner\ETLRunner; use BenTools\ETL\Loader\FileLoader; +use function BenTools\ETL\Tests\create_generator; +use function BenTools\ETL\Tests\dummy_etl; use PHPUnit\Framework\TestCase; use SplTempFileObject; @@ -16,11 +18,12 @@ public function testLoader() 'foo' => 'bar', 'bar' => 'baz' ]; - $extractor = new IncrementorExtractor(); $file = new SplTempFileObject(); $loader = new FileLoader($file); - $run = new ETLRunner(); - $run($items, $extractor, null, $loader); + + foreach ($items as $key => $value) { + $loader->load(create_generator([$key => $value]), $key, dummy_etl()); + } $file->rewind(); $this->assertEquals(implode('', [ diff --git a/tests/src/Loader/FlushableLoaderExample.php b/tests/src/Loader/FlushableLoaderExample.php deleted file mode 100644 index 63bf537..0000000 --- a/tests/src/Loader/FlushableLoaderExample.php +++ /dev/null @@ -1,46 +0,0 @@ -flushEvery = $flushEvery; - } - - public function shouldFlushAfterLoad(): bool - { - return 0 !== $this->flushEvery // Otherwise we'll wait on an explicit flush() call - && 0 === (count($this->waitingElements) % $this->flushEvery); - } - - public function flush(): void - { - $this->flushedElements = array_merge($this->flushedElements, $this->waitingElements); - $this->waitingElements = []; - } - - public function __invoke(ContextElementInterface $element): void - { - $this->waitingElements[] = $element->getData(); - } - - public function getWaitingElements() - { - return $this->waitingElements; - } - - public function getFlushedElements() - { - return $this->flushedElements; - } - -} \ No newline at end of file diff --git a/tests/src/Loader/JsonFileLoaderTest.php b/tests/src/Loader/JsonFileLoaderTest.php index c2b1fb3..5f392e8 100644 --- a/tests/src/Loader/JsonFileLoaderTest.php +++ b/tests/src/Loader/JsonFileLoaderTest.php @@ -11,6 +11,8 @@ use BenTools\ETL\Iterator\CsvFileIterator; use BenTools\ETL\Loader\JsonFileLoader; use BenTools\ETL\Runner\ETLRunner; +use function BenTools\ETL\Tests\create_generator; +use function BenTools\ETL\Tests\dummy_etl; use BenTools\ETL\Tests\TestSuite; use PHPUnit\Framework\TestCase; use SplFileObject; @@ -21,42 +23,19 @@ class JsonFileLoaderTest extends TestCase public function testLoader() { - - $keys = []; - $eventDispatcher = new ETLEventDispatcher(); - $eventDispatcher->addListener(ETLEvents::AFTER_EXTRACT, function (ContextElementEvent $event) use (&$keys) { - if (empty($keys)) { - $contextElement = $event->getElement(); - $keys = array_values($contextElement->getData()); - $contextElement->skip(); - } - }); - $items = new CsvFileIterator(new SplFileObject(TestSuite::getDataFile('dictators.csv'))); - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) use (&$keys) { - $data = array_combine($keys, $element->getData()); - $element->setData($data); - $element->setId(strtolower($data['country'])); - }; - $output = new SplTempFileObject(); - $loader = new JsonFileLoader($output, JSON_PRETTY_PRINT); - $run = new ETLRunner(null, $eventDispatcher); - $run($items, $extractor, $transformer, $loader); - - $compared = file_get_contents(TestSuite::getDataFile('dictators.json')); - - $output->rewind(); - $generated = implode(null, iterator_to_array($output)); - $this->assertSame($compared, $generated); + $file = new SplTempFileObject(); + $loader = new JsonFileLoader($file); + $data = ['foo', 'bar']; + foreach ($data as $key => $value) { + $loader->load(create_generator([$key => $value]), $key, dummy_etl()); + } + $loader->commit(false); + $file->rewind(); + $content = ''; + while (!$file->eof()) { + $content .= $file->fgets(); + } + $this->assertEquals(json_encode($data), trim($content)); } - /** - * @expectedException \RuntimeException - */ - public function testFileNotWritableShouldThrowException() - { - $output = new SplFileObject('foo.txt', 'r'); - $load = new JsonFileLoader($output); - $load(new ContextElement('foo', ['bar' => 'baz'])); - } } diff --git a/tests/src/Recipe/LoggerRecipeTest.php b/tests/src/Recipe/LoggerRecipeTest.php new file mode 100644 index 0000000..39d916b --- /dev/null +++ b/tests/src/Recipe/LoggerRecipeTest.php @@ -0,0 +1,166 @@ +createLogger(); + $builder = EtlBuilder::init() + ->loadInto(new NullLoader()) + ->useRecipe(new LoggerRecipe($logger)); + $etl = $builder->createEtl(); + $etl->process([ + 'foo' => 'bar', + 'bar' => 'baz', + ]); + + $expected = [ + 'Starting ETL...', + 'Extracted foo.', + 'Transformed foo.', + 'Loaded foo.', + 'Extracted bar.', + 'Transformed bar.', + 'Loaded bar.', + 'Flushed 2 items.', + 'ETL completed on 2 items.', + ]; + + $this->assertEquals($expected, $logger->stack); + } + + /** + * @test + */ + public function it_also_logs_skipping_items() + { + $logger = $this->createLogger(); + $builder = EtlBuilder::init() + ->loadInto(new NullLoader()) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getKey()) { + $event->getEtl()->skipCurrentItem(); + } + }) + ->useRecipe(new LoggerRecipe($logger)) + ; + $etl = $builder->createEtl(); + $etl->process([ + 'foo' => 'bar', + 'bar' => 'baz', + ]); + + $expected = [ + 'Starting ETL...', + 'Extracted foo.', + 'Skipping item foo.', + 'Extracted bar.', + 'Transformed bar.', + 'Loaded bar.', + 'Flushed 1 items.', + 'ETL completed on 1 items.', + ]; + + $this->assertEquals($expected, $logger->stack); + } + + /** + * @test + */ + public function it_also_logs_stop_event() + { + $logger = $this->createLogger(); + $builder = EtlBuilder::init() + ->loadInto(new NullLoader()) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getKey()) { + $event->getEtl()->stopProcessing(); + } + }) + ->useRecipe(new LoggerRecipe($logger)) + ; + $etl = $builder->createEtl(); + $etl->process([ + 'foo' => 'bar', + 'bar' => 'baz', + ]); + + $expected = [ + 'Starting ETL...', + 'Extracted foo.', + 'Stopping on item foo.', + 'Flushed 0 items.', + 'ETL completed on 0 items.', + ]; + + $this->assertEquals($expected, $logger->stack); + } + + /** + * @test + */ + public function it_also_logs_rollback_event() + { + $logger = $this->createLogger(); + $builder = EtlBuilder::init() + ->loadInto(new NullLoader()) + ->onExtract( + function (ItemEvent $event) { + if ('foo' === $event->getKey()) { + $event->getEtl()->stopProcessing(true); + } + }) + ->useRecipe(new LoggerRecipe($logger)) + ; + $etl = $builder->createEtl(); + $etl->process([ + 'foo' => 'bar', + 'bar' => 'baz', + ]); + + $expected = [ + 'Starting ETL...', + 'Extracted foo.', + 'Stopping on item foo.', + 'Rollback 0 items.', + 'ETL completed on 0 items.', + ]; + + $this->assertEquals($expected, $logger->stack); + } + + private function createLogger(): LoggerInterface + { + return new class implements LoggerInterface + { + public $stack = []; + public function emergency($message, array $context = []) {} + public function alert($message, array $context = []) {} + public function critical($message, array $context = []) {} + public function error($message, array $context = []) {} + public function warning($message, array $context = []) {} + public function notice($message, array $context = []) {} + public function info($message, array $context = []) {} + public function debug($message, array $context = []) {} + public function log($level, $message, array $context = []) + { + $this->stack[] = $message; + } + }; + } + +} diff --git a/tests/src/Runner/AsynchronousRunnerTest.php b/tests/src/Runner/AsynchronousRunnerTest.php deleted file mode 100644 index 4631a57..0000000 --- a/tests/src/Runner/AsynchronousRunnerTest.php +++ /dev/null @@ -1,33 +0,0 @@ -assertInstanceOf(PromiseInterface::class, $promise); - $this->assertEquals([], $load->getArray()); - - $promise->wait(); - - $this->assertEquals(PromiseInterface::FULFILLED, $promise->getState()); - $this->assertEquals(['FOO', 'BAR'], $load->getArray()); - } -} diff --git a/tests/src/Runner/ETLRunnerExtractExceptionsTest.php b/tests/src/Runner/ETLRunnerExtractExceptionsTest.php deleted file mode 100644 index 3992e18..0000000 --- a/tests/src/Runner/ETLRunnerExtractExceptionsTest.php +++ /dev/null @@ -1,110 +0,0 @@ -runner = new ETLRunner(); - $this->loader = new FlushableLoaderExample(); - $this->scenario = function () { - $data = [ - 'foo', - 'bar', - 'baz', - ]; - - $extract = new class extends IncrementorExtractor - { - public function __invoke($key, $value): ContextElementInterface - { - if ('bar' === $value) { - throw new \RuntimeException(); - } - return parent::__invoke($key, $value); - } - - }; - $transform = function (ContextElementInterface $element) { - return $element; - }; - $load = $this->loader; - $run = $this->runner; - $run($data, $extract, $transform, $load); - }; - } - - /** - * @expectedException \BenTools\ETL\Exception\ExtractionFailedException - */ - public function testExtractExceptionIsThrown() - { - $runScenario = $this->scenario; - $runScenario(); - } - - public function testExtractExceptionIsCaughtAndIgnored() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onExtractException(function (ExtractExceptionEvent $event) { - $event->ignore(true); - }); - $runScenario(); - $this->assertEquals(['foo', 'baz'], array_values($this->loader->getFlushedElements())); - } - - public function testExtractExceptionIsCaughtAndETLIsStopped() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onExtractException(function (ExtractExceptionEvent $event) { - $event->ignore(true); - $event->stop(true); - }); - $runScenario(); - $this->assertEquals(['foo'], array_values($this->loader->getFlushedElements())); - } - - public function testExtractExceptionIsCaughtAndETLIsStoppedAndNothingIsFlushed() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $this->loader->setFlushEvery(5); - $runner->onExtractException(function (ExtractExceptionEvent $event) { - $event->ignore(true); - $event->stop(true, false); - }); - $runScenario(); - $this->assertEquals([], array_values($this->loader->getFlushedElements())); - } - - -} diff --git a/tests/src/Runner/ETLRunnerLoadExceptionsTest.php b/tests/src/Runner/ETLRunnerLoadExceptionsTest.php deleted file mode 100644 index e5fc21b..0000000 --- a/tests/src/Runner/ETLRunnerLoadExceptionsTest.php +++ /dev/null @@ -1,111 +0,0 @@ -runner = new ETLRunner(); - $this->loader = new class extends FlushableLoaderExample - { - public function __invoke(ContextElementInterface $element): void - { - if ('bar' === $element->getData()) { - throw new \RuntimeException(); - } - parent::__invoke($element); - } - - }; - $this->scenario = function () { - $data = [ - 'foo', - 'bar', - 'baz', - ]; - - $extract = new IncrementorExtractor(); - $transform = function (ContextElementInterface $element) { - return $element; - }; - $load = $this->loader; - $run = $this->runner; - $run($data, $extract, $transform, $load); - }; - } - - /** - * @expectedException \RuntimeException - */ - public function testLoadExceptionIsThrown() - { - $runScenario = $this->scenario; - $runScenario(); - } - - public function testLoadExceptionIsCaughtAndSkipped() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onLoadException(function (ContextElementEvent $event) { - $event->getElement()->skip(); // Useless at that point - because an exception occured, the element could no be loaded. - $event->setException(null); - }); - $runScenario(); - $this->assertEquals(['foo', 'baz'], array_values($this->loader->getFlushedElements())); - } - - public function testLoadExceptionIsCaughtAndSkippedAndETLIsStopped() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onLoadException(function (ContextElementEvent $event) { - $event->getElement()->stop(); - $event->setException(null); - }); - $runScenario(); - $this->assertEquals(['foo'], array_values($this->loader->getFlushedElements())); - } - - public function testLoadExceptionIsCaughtAndSkippedAndETLIsStoppedAndNothingIsFlushed() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $this->loader->setFlushEvery(5); - $runner->onLoadException(function (ContextElementEvent $event) { - $event->getElement()->stop(false); - $event->setException(null); - }); - $runScenario(); - $this->assertEquals([], array_values($this->loader->getFlushedElements())); - } - - -} diff --git a/tests/src/Runner/ETLRunnerTest.php b/tests/src/Runner/ETLRunnerTest.php deleted file mode 100644 index 1a3cd23..0000000 --- a/tests/src/Runner/ETLRunnerTest.php +++ /dev/null @@ -1,378 +0,0 @@ -getData(); - $element->setData(implode('|', [ - $data['country_code'], - $data['periods'][0]['effective_from'], - $data['periods'][0]['rates']['standard'], - ])); - }; - $loader = function (ContextElementInterface $element) use (&$output) { - $output[$element->getId()] = $element->getData(); - }; - - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); - $this->assertCount(count($output), $items); - $this->assertSame('HU|0000-01-01|27', $output[2]); - return $output; - } - - /** - * @depends testSimpleETL - */ - public function testETLWithFlushableLoader($input) - { - $items = json_decode(file_get_contents(TestSuite::getDataFile('vat.json')), true)['rates']; - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) { - $data = $element->getData(); - $element->setData(implode('|', [ - $data['country_code'], - $data['periods'][0]['effective_from'], - $data['periods'][0]['rates']['standard'], - ])); - }; - $flushEvery = 5; - $loader = new class($flushEvery) implements FlushableLoaderInterface - { - - private $tmp = []; - private $output = []; - private $counter = 0; - private $nbFlush = 0; - private $flushEvery; - - public function __construct(int $flushEvery) - { - $this->flushEvery = $flushEvery; - } - - public function shouldFlushAfterLoad(): bool - { - return 0 === ($this->counter % $this->flushEvery); - } - - public function flush(): void - { - $this->output = array_replace($this->output, $this->tmp); - $this->tmp = []; - $this->nbFlush++; - } - - public function getOutput() - { - return $this->output; - } - - public function getNbFlush() - { - return $this->nbFlush; - } - - public function __invoke(ContextElementInterface $element): void - { - $this->tmp[$element->getId()] = $element->getData(); - $this->counter++; - } - }; - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); - - $this->assertCount(count($loader->getOutput()), $items); - $this->assertSame('HU|0000-01-01|27', $loader->getOutput()[2]); - $this->assertSame($loader->getOutput(), $input); - $this->assertSame($loader->getNbFlush(), (int) ceil(count($items) / $flushEvery)); - } - - /** - * Skip 1 item - * @depends testSimpleETL - */ - public function testSkip($input) - { - $items = json_decode(file_get_contents(TestSuite::getDataFile('vat.json')), true)['rates']; - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) { - $data = $element->getData(); - $element->setData(implode('|', [ - $data['country_code'], - $data['periods'][0]['effective_from'], - $data['periods'][0]['rates']['standard'], - ])); - if ($data['code'] == 'PL') { - $element->skip(); - } - }; - $flushEvery = 5; - $loader = new class($flushEvery) implements FlushableLoaderInterface - { - - private $tmp = []; - private $output = []; - private $counter = 0; - private $nbFlush = 0; - private $flushEvery; - - public function __construct(int $flushEvery) - { - $this->flushEvery = $flushEvery; - } - - public function flush(): void - { - $this->output = array_replace($this->output, $this->tmp); - $this->tmp = []; - $this->nbFlush++; - } - - public function shouldFlushAfterLoad(): bool - { - return 0 === ($this->counter % $this->flushEvery); - } - - public function getOutput() - { - return $this->output; - } - - public function getNbFlush() - { - return $this->nbFlush; - } - - public function __invoke(ContextElementInterface $element): void - { - $this->tmp[$element->getId()] = $element->getData(); - $this->counter++; - } - }; - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); - - $this->assertSame(count($loader->getOutput()), count($items) - 1); - $this->assertSame('HU|0000-01-01|27', $loader->getOutput()[2]); - $this->assertSame($loader->getNbFlush(), (int) ceil((count($items) - 1) / $flushEvery)); - } - - /** - * Abort and flush - * @depends testSimpleETL - */ - public function testAbortAndFlush($input) - { - $items = json_decode(file_get_contents(TestSuite::getDataFile('vat.json')), true)['rates']; - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) { - $data = $element->getData(); - $element->setData(implode('|', [ - $data['country_code'], - $data['periods'][0]['effective_from'], - $data['periods'][0]['rates']['standard'], - ])); - if ($data['code'] == 'FR') { - $element->stop(true); - } - }; - $flushEvery = 5; - $loader = new class($flushEvery) implements FlushableLoaderInterface - { - - private $tmp = []; - private $output = []; - private $counter = 0; - private $nbFlush = 0; - private $flushEvery; - - public function __construct(int $flushEvery) - { - $this->flushEvery = $flushEvery; - } - - public function flush(): void - { - $this->output = array_replace($this->output, $this->tmp); - $this->tmp = []; - $this->nbFlush++; - } - - public function shouldFlushAfterLoad(): bool - { - return 0 === ($this->counter % $this->flushEvery); - } - - public function getOutput() - { - return $this->output; - } - - public function getNbFlush() - { - return $this->nbFlush; - } - - public function __invoke(ContextElementInterface $element): void - { - $this->tmp[$element->getId()] = $element->getData(); - $this->counter++; - } - }; - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); - - $this->assertCount(6, $loader->getOutput()); - } - - /** - * Abort and flush - * @depends testSimpleETL - */ - public function testAbortAndDoNotFlush($input) - { - $items = json_decode(file_get_contents(TestSuite::getDataFile('vat.json')), true)['rates']; - $extractor = new IncrementorExtractor(); - $transformer = function (ContextElementInterface $element) { - $data = $element->getData(); - $element->setData(implode('|', [ - $data['country_code'], - $data['periods'][0]['effective_from'], - $data['periods'][0]['rates']['standard'], - ])); - if ($data['code'] == 'FR') { - $element->stop(false); - } - }; - $flushEvery = 5; - $loader = new class($flushEvery) implements FlushableLoaderInterface - { - - private $tmp = []; - private $output = []; - private $counter = 0; - private $nbFlush = 0; - private $flushEvery; - - public function __construct(int $flushEvery) - { - $this->flushEvery = $flushEvery; - } - - public function flush(): void - { - $this->output = array_replace($this->output, $this->tmp); - $this->tmp = []; - $this->nbFlush++; - } - - public function shouldFlushAfterLoad(): bool - { - return 0 === ($this->counter % $this->flushEvery); - } - - public function getOutput() - { - return $this->output; - } - - public function getNbFlush() - { - return $this->nbFlush; - } - - public function __invoke(ContextElementInterface $element): void - { - $this->tmp[$element->getId()] = $element->getData(); - $this->counter++; - } - }; - $run = new ETLRunner(); - $run($items, $extractor, $transformer, $loader); - - $this->assertCount(5, $loader->getOutput()); - } - - public function testTransformerCanBeOmitted() - { - $items = json_decode(file_get_contents(TestSuite::getDataFile('vat.json')), true)['rates']; - $extractor = new IncrementorExtractor(); - $loader = new ArrayLoader(); - $run = new ETLRunner(); - $run($items, $extractor, null, $loader); - $result = $loader->getArray(); - $this->assertCount(count($result), $items); - $this->assertArrayHasKey(0, $result); - $this->assertSame([ - 'name' => 'Germany', - 'code' => 'DE', - 'country_code' => 'DE', - 'periods' => [ - [ - 'effective_from' => '0000-01-01', - 'rates' => [ - 'reduced' => 7.0, - 'standard' => 19.0, - ] - ] - ] - ], $result[0]); - } - - public function testBuiltInEventDispatcher() - { - $items = [ - 'foo', - 'bar', - 'baz', - ]; - $extract = new IncrementorExtractor(); - $doNothing = $transform = function () {}; - $load = new DebugLoader([], $doNothing); - $run = new ETLRunner(); - - $extractEventReceived = $transformEventReceived = $loadEventReceived = $flushEventReceived = false; - - $run->onExtract(function () use (&$extractEventReceived) { - $extractEventReceived = true; - }); - - $run->onTransform(function () use (&$transformEventReceived) { - $transformEventReceived = true; - }); - - $run->onLoad(function () use (&$loadEventReceived) { - $loadEventReceived = true; - }); - - $run->onFlush(function () use (&$flushEventReceived) { - $flushEventReceived = true; - }); - - $run($items, $extract, $transform, $load); - - $this->assertTrue($extractEventReceived); - $this->assertTrue($transformEventReceived); - $this->assertTrue($loadEventReceived); - $this->assertTrue($flushEventReceived); - } -} diff --git a/tests/src/Runner/ETLRunnerTransformExceptionsTest.php b/tests/src/Runner/ETLRunnerTransformExceptionsTest.php deleted file mode 100644 index 50ed5a4..0000000 --- a/tests/src/Runner/ETLRunnerTransformExceptionsTest.php +++ /dev/null @@ -1,104 +0,0 @@ -runner = new ETLRunner(); - $this->loader = new FlushableLoaderExample(); - $this->scenario = function () { - $data = [ - 'foo', - 'bar', - 'baz', - ]; - - $extract = new IncrementorExtractor(); - $transform = function (ContextElementInterface $element) { - if ('bar' === $element->getData()) { - throw new \RuntimeException(); - } - return $element; - }; - $load = $this->loader; - $run = $this->runner; - $run($data, $extract, $transform, $load); - }; - } - - /** - * @expectedException \RuntimeException - */ - public function testTransformExceptionIsThrown() - { - $runScenario = $this->scenario; - $runScenario(); - } - - public function testTransformExceptionIsCaughtAndSkipped() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onTransformException(function (ContextElementEvent $event) { - $event->getElement()->skip(); - $event->setException(null); - }); - $runScenario(); - $this->assertEquals(['foo', 'baz'], array_values($this->loader->getFlushedElements())); - } - - public function testTransformExceptionIsCaughtAndSkippedAndETLIsStopped() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $runner->onTransformException(function (ContextElementEvent $event) { - $event->getElement()->stop(); - $event->setException(null); - }); - $runScenario(); - $this->assertEquals(['foo'], array_values($this->loader->getFlushedElements())); - } - - public function testTransformExceptionIsCaughtAndSkippedAndETLIsStoppedAndNothingIsFlushed() - { - $runScenario = $this->scenario; - $runner = $this->runner; - $this->loader->setFlushEvery(5); - $runner->onTransformException(function (ContextElementEvent $event) { - $event->getElement()->stop(false); - $event->setException(null); - }); - $runScenario(); - $this->assertEquals([], array_values($this->loader->getFlushedElements())); - } - - -} diff --git a/tests/src/Transformer/CallableTransformerTest.php b/tests/src/Transformer/CallableTransformerTest.php new file mode 100644 index 0000000..2aeddeb --- /dev/null +++ b/tests/src/Transformer/CallableTransformerTest.php @@ -0,0 +1,20 @@ +transform($item, 0, dummy_etl()); + $this->assertSame('caps are hell', \iterator_to_array($transformed)[0]); + + } +} diff --git a/tests/src/Transformer/CallbackTransformerTest.php b/tests/src/Transformer/CallbackTransformerTest.php deleted file mode 100644 index e19e850..0000000 --- a/tests/src/Transformer/CallbackTransformerTest.php +++ /dev/null @@ -1,37 +0,0 @@ -assertSame('123e4567-e89b-12d3-a456-426655440000', $element->getId()); - $this->assertSame('caps are hell', $element->getData()); - } - - public function testTransformerByClosure() - { - $element = new ContextElement('123e4567-e89b-12d3-a456-426655440000', ['WTF' => 'CAPS ARE HELL']); - $transform = new CallbackTransformer(function ($arrayOfStrings) { - return array_map(function ($string) { - return strtolower($string); - }, $arrayOfStrings); - }); - - $transform($element); - - $this->assertSame('123e4567-e89b-12d3-a456-426655440000', $element->getId()); - $this->assertSame(['WTF' => 'caps are hell'], $element->getData()); - } -} diff --git a/tests/src/Transformer/StepTransformerTest.php b/tests/src/Transformer/StepTransformerTest.php deleted file mode 100644 index ab8a728..0000000 --- a/tests/src/Transformer/StepTransformerTest.php +++ /dev/null @@ -1,168 +0,0 @@ -stack = new StepTransformer(['first', 'second', 'third']); - } - - /** - * @expectedException \InvalidArgumentException - */ - public function testInvalidSteps() - { - $this->stack->registerSteps([new \stdClass()]); - } - - public function testTransformer() - { - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $stack->registerTransformer('first', $foo); - $stack->registerTransformer('second', $bar); - - $stack($context); - - $this->assertEquals('bar', $context->getData()); - - } - - public function testTransformerWithInvertedSteps() - { - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $stack->registerTransformer('first', $bar); - $stack->registerTransformer('second', $foo); - - $stack($context); - - $this->assertEquals('foo', $context->getData()); - } - - public function testMultipleTransformers() - { - - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $stack->registerTransformer('first', $bar); - $stack->registerTransformer('first', $foo); - $stack($context); - $this->assertEquals('foo', $context->getData()); - } - - public function testMultipleTransformersWithPriority() - { - - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $stack->registerTransformer('first', $bar); - $stack->registerTransformer('first', $foo, 100); - $stack($context); - $this->assertEquals('bar', $context->getData()); - } - - public function testStop() - { - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) use ($stack) { - $element->setData('foo'); - $stack->stop(); - }; - $bar = function (ContextElementInterface $element) use ($stack) { - $element->setData('bar'); - $stack->stop(); - }; - $baz = function (ContextElementInterface $element) use ($stack) { - $element->setData('baz'); - $stack->stop(); - }; - $stack->registerTransformer('first', $foo); - $stack->registerTransformer('second', $bar); - $stack->registerTransformer('third', $baz); - $stack($context); - $this->assertEquals('foo', $context->getData()); - - } - - public function testStopStep() - { - $context = new ContextElement(); - $stack = $this->stack; - $barHasNotBeenCalled = true; - $foo = function (ContextElementInterface $element) use ($stack) { - $element->setData('foo'); - $stack->stop('first'); - }; - $bar = function (ContextElementInterface $element) use ($stack, &$barHasNotBeenCalled) { - $barHasNotBeenCalled = false; - $element->setData('bar'); - }; - $baz = function (ContextElementInterface $element) use ($stack) { - $element->setData('baz'); - }; - $stack->registerTransformer('first', $foo); - $stack->registerTransformer('first', $bar); - $stack->registerTransformer('second', $baz); - $stack($context); - $this->assertEquals('baz', $context->getData()); - $this->assertTrue($barHasNotBeenCalled); - } - - public function testStopStepBeforeItBegins() - { - $context = new ContextElement(); - $stack = $this->stack; - $foo = function (ContextElementInterface $element) use ($stack) { - $element->setData('foo'); - $stack->stop('first'); - }; - $bar = function (ContextElementInterface $element) use ($stack) { - $element->setData('bar'); - $stack->stop(); - }; - $stack->registerTransformer('first', $foo); - $stack->registerTransformer('second', $bar); - $stack->stop('second'); - $stack($context); - $this->assertEquals('foo', $context->getData()); - } - -} diff --git a/tests/src/Transformer/TransformerStackTest.php b/tests/src/Transformer/TransformerStackTest.php deleted file mode 100644 index f6abb1d..0000000 --- a/tests/src/Transformer/TransformerStackTest.php +++ /dev/null @@ -1,95 +0,0 @@ -assertCount(2, iterator_to_array($stack)); - } - - public function testAddTransformer() - { - $stack = new TransformerStack(); - $this->assertCount(0, iterator_to_array($stack)); - - $stack->registerTransformer(function () {}); - $this->assertCount(1, iterator_to_array($stack)); - - $stack->registerTransformer(function () {}); - $this->assertCount(2, iterator_to_array($stack)); - } - - public function testDataIsTransformed() - { - $stack = new TransformerStack(); - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $stack->registerTransformer($foo); - $stack->registerTransformer($bar); - - $context = new ContextElement(); - $stack($context); - $this->assertEquals('bar', $context->getData()); - } - - public function testTransformerPriorities() - { - $stack = new TransformerStack(); - $foo = function (ContextElementInterface $element) { - $element->setData('foo'); - }; - $bar = function (ContextElementInterface $element) { - $element->setData('bar'); - }; - $baz = function (ContextElementInterface $element) { - $element->setData('baz'); - }; - $stack->registerTransformer($foo, 50); - $stack->registerTransformer($bar, 0); - $stack->registerTransformer($baz, 100); - - $context = new ContextElement(); - $stack($context); - $this->assertEquals('bar', $context->getData()); - } - - public function testStop() - { - $stack = new TransformerStack(); - $foo = function (ContextElementInterface $element) use ($stack) { - $element->setData('foo'); - $stack->stop(); - }; - $bar = function (ContextElementInterface $element) use ($stack) { - $element->setData('bar'); - $stack->stop(); - }; - $baz = function (ContextElementInterface $element) use ($stack) { - $element->setData('baz'); - $stack->stop(); - }; - $stack->registerTransformer($foo, 50); - $stack->registerTransformer($bar, 100); - $stack->registerTransformer($baz, 0); - - $context = new ContextElement(); - $stack($context); - $this->assertEquals('bar', $context->getData()); - } -}