From 9608d061788c67ff65184248e45cb85dd8586456 Mon Sep 17 00:00:00 2001 From: Beno!t POLASZEK Date: Thu, 26 Oct 2023 10:23:55 +0200 Subject: [PATCH] Refactor: 4.0 (#10) --- .github/workflows/ci.yml | 36 ++ .gitignore | 6 +- .php-cs-fixer.dist.php | 16 + .scrutinizer.yml | 38 -- .travis.yml | 20 - LICENSE | 4 +- README.md | 394 +++++--------- composer.json | 70 ++- phpstan.dist.neon | 7 + phpunit.xml.dist | 46 +- src/Etl.php | 390 -------------- src/EtlBuilder.php | 362 ------------- src/EtlConfiguration.php | 13 + src/EtlExecutor.php | 189 +++++++ src/EtlState.php | 123 +++++ src/EventDispatcher/EtlEvents.php | 84 --- src/EventDispatcher/Event/EndEvent.php | 15 + src/EventDispatcher/Event/EndProcessEvent.php | 41 -- src/EventDispatcher/Event/EtlEvent.php | 58 --- src/EventDispatcher/Event/Event.php | 12 + src/EventDispatcher/Event/ExtractEvent.php | 20 + .../Event/ExtractExceptionEvent.php | 21 + src/EventDispatcher/Event/FlushEvent.php | 59 +-- .../Event/FlushExceptionEvent.php | 26 + src/EventDispatcher/Event/InitEvent.php | 19 + src/EventDispatcher/Event/ItemEvent.php | 55 -- .../Event/ItemExceptionEvent.php | 94 ---- src/EventDispatcher/Event/LoadEvent.php | 20 + .../Event/LoadExceptionEvent.php | 26 + src/EventDispatcher/Event/RollbackEvent.php | 41 -- src/EventDispatcher/Event/StartEvent.php | 19 + .../Event/StartProcessEvent.php | 16 - src/EventDispatcher/Event/TransformEvent.php | 23 + .../Event/TransformExceptionEvent.php | 26 + src/EventDispatcher/EventDispatcher.php | 87 +--- src/EventDispatcher/EventListener.php | 55 -- .../PrioritizedListenerProvider.php | 38 ++ src/EventDispatcher/StoppableEventTrait.php | 20 + src/Exception/EtlException.php | 9 +- src/Exception/ExtractException.php | 9 + src/Exception/FlushException.php | 9 + src/Exception/LoadException.php | 9 + src/Exception/SkipRequest.php | 12 + src/Exception/StopRequest.php | 12 + src/Exception/TransformException.php | 9 + src/Exception/UnexpectedTypeException.php | 49 -- src/Extractor/CSVExtractor.php | 42 ++ src/Extractor/CallableExtractor.php | 21 + src/Extractor/CsvExtractor.php | 116 ----- src/Extractor/ExtractorInterface.php | 17 +- src/Extractor/FileExtractor.php | 43 +- src/Extractor/IterableExtractor.php | 33 ++ src/Extractor/JSONExtractor.php | 55 ++ src/Extractor/JsonExtractor.php | 113 ---- src/Extractor/NullExtractor.php | 18 + src/Extractor/TextLinesExtractor.php | 48 ++ src/Internal/ClonableTrait.php | 43 ++ src/Internal/EtlBuilderTrait.php | 68 +++ src/Internal/EtlEventListenersTrait.php | 124 +++++ src/Internal/EtlExceptionsTrait.php | 72 +++ src/Iterator/CSVIterator.php | 120 +++++ src/Iterator/CsvFileIterator.php | 69 --- src/Iterator/CsvIteratorInterface.php | 13 - src/Iterator/CsvStringIterator.php | 76 --- src/Iterator/FileIterator.php | 49 ++ src/Iterator/FileLinesIterator.php | 43 -- src/Iterator/KeysAwareCsvIterator.php | 83 --- src/Iterator/PregSplitIterator.php | 32 ++ src/Iterator/StrTokIterator.php | 36 ++ src/Iterator/StringIteratorInterface.php | 13 - src/Iterator/TextLinesIterator.php | 68 --- src/Loader/ArrayLoader.php | 73 --- src/Loader/CSVLoader.php | 111 ++++ src/Loader/CallableLoader.php | 37 ++ src/Loader/CsvFileLoader.php | 128 ----- src/Loader/DoctrineORMLoader.php | 103 ++-- src/Loader/FileLoader.php | 109 ---- src/Loader/InMemoryLoader.php | 32 ++ src/Loader/JSONLoader.php | 94 ++++ src/Loader/JsonFileLoader.php | 140 ----- src/Loader/LoaderInterface.php | 37 +- src/Loader/NullLoader.php | 40 -- .../EmptyStringToNullNormalizer.php | 17 + .../NumericStringToNumberNormalizer.php | 20 + src/Normalizer/ValueNormalizerInterface.php | 10 + src/Recipe/LoggerRecipe.php | 260 +++++----- src/Recipe/Recipe.php | 27 +- src/Transformer/CallableTransformer.php | 30 +- src/Transformer/NullTransformer.php | 16 + src/Transformer/TransformerInterface.php | 20 +- tests/Behavior/Events/EndEventTest.php | 29 ++ tests/Behavior/Events/ExtractEventTest.php | 27 + .../Events/ExtractExceptionEventTest.php | 22 + tests/Behavior/Events/FlushEventTest.php | 28 + .../Events/FlushExceptionEventTest.php | 61 +++ tests/Behavior/Events/InitEventTest.php | 29 ++ tests/Behavior/Events/LoadEventTest.php | 31 ++ .../Events/LoadExceptionEventTest.php | 31 ++ tests/Behavior/Events/StartEventTest.php | 30 ++ tests/Behavior/Events/TransformEventTest.php | 31 ++ .../Events/TransformExceptionEventTest.php | 34 ++ tests/Behavior/ExtractExceptionTest.php | 29 ++ tests/Behavior/FlushExceptionTest.php | 50 ++ tests/Behavior/FlushTest.php | 75 +++ tests/Behavior/LoadExceptionTest.php | 51 ++ tests/Behavior/SkipTest.php | 86 +++ tests/Behavior/StopTest.php | 107 ++++ tests/Behavior/TransformExceptionTest.php | 56 ++ tests/Unit/EtlExecutorTest.php | 34 ++ .../EventDispatcher/EventDispatcherTest.php | 75 +++ tests/Unit/Extractor/CSVExtractorTest.php | 46 ++ .../Unit/Extractor/CallableExtractorTest.php | 20 + .../Unit/Extractor/IterableExtractorTest.php | 28 + tests/Unit/Extractor/JSONExtractorTest.php | 39 ++ .../Unit/Extractor/TextLinesExtractorTest.php | 54 ++ tests/Unit/Iterator/CSVIteratorTest.php | 144 ++++++ tests/Unit/Iterator/PregSplitIteratorTest.php | 28 + tests/Unit/Iterator/StrTokIteratorTest.php | 26 + tests/Unit/Loader/CSVLoaderTest.php | 82 +++ tests/Unit/Loader/CallableLoaderTest.php | 35 ++ tests/Unit/Loader/Doctrine/Book.php | 22 + tests/Unit/Loader/DoctrineORMLoaderTest.php | 46 ++ tests/Unit/Loader/JSONLoaderTest.php | 41 ++ .../EmptyStringToNullNormalizerTest.php | 26 + .../NumericStringToNumberNormalizerTest.php | 28 + tests/Unit/Recipe/RecipeTest.php | 27 + .../Transformer/CallableTransformerTest.php | 22 + .../Unit/Transformer/NullTransformerTest.php | 23 + tests/data/10-biggest-cities.csv | 11 + tests/data/10-biggest-cities.json | 72 +++ tests/data/10-biggest-cities.php | 74 +++ tests/data/dictators.csv | 3 - tests/data/dictators.json | 10 - tests/data/vat.json | 489 ------------------ tests/functions.php | 26 - tests/src/EtlBuilderTest.php | 365 ------------- tests/src/EtlExceptionsTest.php | 171 ------ .../EventDispatcher/EventDispatcherTest.php | 128 ----- tests/src/Iterator/CsvFileIteratorTest.php | 33 -- tests/src/Iterator/CsvStringIteratorTest.php | 33 -- tests/src/Iterator/FileLinesIteratorTest.php | 28 - .../src/Iterator/KeysAwareCsvIteratorTest.php | 110 ---- tests/src/Iterator/TextLinesIteratorTest.php | 44 -- tests/src/Loader/ArrayLoaderTest.php | 26 - tests/src/Loader/CsvFileLoaderTest.php | 33 -- tests/src/Loader/DoctrineORMLoaderTest.php | 445 ---------------- tests/src/Loader/FileLoaderTest.php | 61 --- tests/src/Loader/FlushableLoaderTest.php | 159 ------ tests/src/Loader/JsonFileLoaderTest.php | 40 -- tests/src/Recipe/LoggerRecipeTest.php | 186 ------- tests/src/TestSuite.php | 18 - .../Transformer/CallableTransformerTest.php | 19 - 152 files changed, 4130 insertions(+), 5533 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .php-cs-fixer.dist.php delete mode 100644 .scrutinizer.yml delete mode 100644 .travis.yml create mode 100644 phpstan.dist.neon delete mode 100644 src/Etl.php delete mode 100644 src/EtlBuilder.php create mode 100644 src/EtlConfiguration.php create mode 100644 src/EtlExecutor.php create mode 100644 src/EtlState.php delete mode 100644 src/EventDispatcher/EtlEvents.php create mode 100644 src/EventDispatcher/Event/EndEvent.php delete mode 100644 src/EventDispatcher/Event/EndProcessEvent.php delete mode 100644 src/EventDispatcher/Event/EtlEvent.php create mode 100644 src/EventDispatcher/Event/Event.php create mode 100644 src/EventDispatcher/Event/ExtractEvent.php create mode 100644 src/EventDispatcher/Event/ExtractExceptionEvent.php create mode 100644 src/EventDispatcher/Event/FlushExceptionEvent.php create mode 100644 src/EventDispatcher/Event/InitEvent.php delete mode 100644 src/EventDispatcher/Event/ItemEvent.php delete mode 100644 src/EventDispatcher/Event/ItemExceptionEvent.php create mode 100644 src/EventDispatcher/Event/LoadEvent.php create mode 100644 src/EventDispatcher/Event/LoadExceptionEvent.php delete mode 100644 src/EventDispatcher/Event/RollbackEvent.php create mode 100644 src/EventDispatcher/Event/StartEvent.php delete mode 100644 src/EventDispatcher/Event/StartProcessEvent.php create mode 100644 src/EventDispatcher/Event/TransformEvent.php create mode 100644 src/EventDispatcher/Event/TransformExceptionEvent.php delete mode 100644 src/EventDispatcher/EventListener.php create mode 100644 src/EventDispatcher/PrioritizedListenerProvider.php create mode 100644 src/EventDispatcher/StoppableEventTrait.php create mode 100644 src/Exception/ExtractException.php create mode 100644 src/Exception/FlushException.php create mode 100644 src/Exception/LoadException.php create mode 100644 src/Exception/SkipRequest.php create mode 100644 src/Exception/StopRequest.php create mode 100644 src/Exception/TransformException.php delete mode 100644 src/Exception/UnexpectedTypeException.php create mode 100644 src/Extractor/CSVExtractor.php create mode 100644 src/Extractor/CallableExtractor.php delete mode 100644 src/Extractor/CsvExtractor.php create mode 100644 src/Extractor/IterableExtractor.php create mode 100644 src/Extractor/JSONExtractor.php delete mode 100644 src/Extractor/JsonExtractor.php create mode 100644 src/Extractor/NullExtractor.php create mode 100644 src/Extractor/TextLinesExtractor.php create mode 100644 src/Internal/ClonableTrait.php create mode 100644 src/Internal/EtlBuilderTrait.php create mode 100644 src/Internal/EtlEventListenersTrait.php create mode 100644 src/Internal/EtlExceptionsTrait.php create mode 100644 src/Iterator/CSVIterator.php delete mode 100644 src/Iterator/CsvFileIterator.php delete mode 100644 src/Iterator/CsvIteratorInterface.php delete mode 100644 src/Iterator/CsvStringIterator.php create mode 100644 src/Iterator/FileIterator.php delete mode 100644 src/Iterator/FileLinesIterator.php delete mode 100644 src/Iterator/KeysAwareCsvIterator.php create mode 100644 src/Iterator/PregSplitIterator.php create mode 100644 src/Iterator/StrTokIterator.php delete mode 100644 src/Iterator/StringIteratorInterface.php delete mode 100644 src/Iterator/TextLinesIterator.php delete mode 100644 src/Loader/ArrayLoader.php create mode 100644 src/Loader/CSVLoader.php create mode 100644 src/Loader/CallableLoader.php delete mode 100644 src/Loader/CsvFileLoader.php delete mode 100644 src/Loader/FileLoader.php create mode 100644 src/Loader/InMemoryLoader.php create mode 100644 src/Loader/JSONLoader.php delete mode 100644 src/Loader/JsonFileLoader.php delete mode 100644 src/Loader/NullLoader.php create mode 100644 src/Normalizer/EmptyStringToNullNormalizer.php create mode 100644 src/Normalizer/NumericStringToNumberNormalizer.php create mode 100644 src/Normalizer/ValueNormalizerInterface.php create mode 100644 src/Transformer/NullTransformer.php create mode 100644 tests/Behavior/Events/EndEventTest.php create mode 100644 tests/Behavior/Events/ExtractEventTest.php create mode 100644 tests/Behavior/Events/ExtractExceptionEventTest.php create mode 100644 tests/Behavior/Events/FlushEventTest.php create mode 100644 tests/Behavior/Events/FlushExceptionEventTest.php create mode 100644 tests/Behavior/Events/InitEventTest.php create mode 100644 tests/Behavior/Events/LoadEventTest.php create mode 100644 tests/Behavior/Events/LoadExceptionEventTest.php create mode 100644 tests/Behavior/Events/StartEventTest.php create mode 100644 tests/Behavior/Events/TransformEventTest.php create mode 100644 tests/Behavior/Events/TransformExceptionEventTest.php create mode 100644 tests/Behavior/ExtractExceptionTest.php create mode 100644 tests/Behavior/FlushExceptionTest.php create mode 100644 tests/Behavior/FlushTest.php create mode 100644 tests/Behavior/LoadExceptionTest.php create mode 100644 tests/Behavior/SkipTest.php create mode 100644 tests/Behavior/StopTest.php create mode 100644 tests/Behavior/TransformExceptionTest.php create mode 100644 tests/Unit/EtlExecutorTest.php create mode 100644 tests/Unit/EventDispatcher/EventDispatcherTest.php create mode 100644 tests/Unit/Extractor/CSVExtractorTest.php create mode 100644 tests/Unit/Extractor/CallableExtractorTest.php create mode 100644 tests/Unit/Extractor/IterableExtractorTest.php create mode 100644 tests/Unit/Extractor/JSONExtractorTest.php create mode 100644 tests/Unit/Extractor/TextLinesExtractorTest.php create mode 100644 tests/Unit/Iterator/CSVIteratorTest.php create mode 100644 tests/Unit/Iterator/PregSplitIteratorTest.php create mode 100644 tests/Unit/Iterator/StrTokIteratorTest.php create mode 100644 tests/Unit/Loader/CSVLoaderTest.php create mode 100644 tests/Unit/Loader/CallableLoaderTest.php create mode 100644 tests/Unit/Loader/Doctrine/Book.php create mode 100644 tests/Unit/Loader/DoctrineORMLoaderTest.php create mode 100644 tests/Unit/Loader/JSONLoaderTest.php create mode 100644 tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php create mode 100644 tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php create mode 100644 tests/Unit/Recipe/RecipeTest.php create mode 100644 tests/Unit/Transformer/CallableTransformerTest.php create mode 100644 tests/Unit/Transformer/NullTransformerTest.php create mode 100644 tests/data/10-biggest-cities.csv create mode 100644 tests/data/10-biggest-cities.json create mode 100644 tests/data/10-biggest-cities.php delete mode 100644 tests/data/dictators.csv delete mode 100644 tests/data/dictators.json delete mode 100644 tests/data/vat.json delete mode 100644 tests/functions.php delete mode 100644 tests/src/EtlBuilderTest.php delete mode 100644 tests/src/EtlExceptionsTest.php delete mode 100644 tests/src/EventDispatcher/EventDispatcherTest.php delete mode 100644 tests/src/Iterator/CsvFileIteratorTest.php delete mode 100644 tests/src/Iterator/CsvStringIteratorTest.php delete mode 100644 tests/src/Iterator/FileLinesIteratorTest.php delete mode 100644 tests/src/Iterator/KeysAwareCsvIteratorTest.php delete mode 100644 tests/src/Iterator/TextLinesIteratorTest.php delete mode 100644 tests/src/Loader/ArrayLoaderTest.php delete mode 100644 tests/src/Loader/CsvFileLoaderTest.php delete mode 100644 tests/src/Loader/DoctrineORMLoaderTest.php delete mode 100644 tests/src/Loader/FileLoaderTest.php delete mode 100644 tests/src/Loader/FlushableLoaderTest.php delete mode 100644 tests/src/Loader/JsonFileLoaderTest.php delete mode 100644 tests/src/Recipe/LoggerRecipeTest.php delete mode 100644 tests/src/TestSuite.php delete mode 100644 tests/src/Transformer/CallableTransformerTest.php diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..aa0911a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI Workflow +on: + push: + branches: [ master, '4.0' ] + pull_request: + branches: [ master, '4.0' ] + +jobs: + tests: + runs-on: ubuntu-22.04 + strategy: + matrix: + php: + - 8.2 + - 8.3 + steps: + - uses: actions/checkout@v2 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + + - name: Install dependencies + run: composer install --prefer-dist --no-progress + + - name: Check types + run: vendor/bin/phpstan analyse + + - name: Run test suite + run: vendor/bin/pest --coverage --coverage-clover=coverage.xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index 0fab2fb..0486d9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -/.idea -vendor -composer.lock \ No newline at end of file +vendor/ +composer.lock +.php-cs-fixer.cache diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php new file mode 100644 index 0000000..2be2b9e --- /dev/null +++ b/.php-cs-fixer.dist.php @@ -0,0 +1,16 @@ +in(__DIR__) +; + +return (new PhpCsFixer\Config()) + ->setRules([ + '@Symfony' => true, + 'global_namespace_import' => [ + 'import_functions' => true, + 'import_constants' => true, + ], + ]) + ->setFinder($finder) +; diff --git a/.scrutinizer.yml b/.scrutinizer.yml deleted file mode 100644 index ef7119e..0000000 --- a/.scrutinizer.yml +++ /dev/null @@ -1,38 +0,0 @@ -filter: - excluded_paths: [tests/*] -checks: - php: - code_rating: true - remove_extra_empty_lines: true - remove_php_closing_tag: true - remove_trailing_whitespace: true - fix_use_statements: - remove_unused: true - preserve_multiple: false - preserve_blanklines: true - order_alphabetically: true - fix_php_opening_tag: true - fix_linefeed: true - fix_line_ending: true - fix_identation_4spaces: true - fix_doc_comments: true -tools: - php_analyzer: true - php_code_coverage: false - php_code_sniffer: - config: - standard: PSR2 - filter: - paths: ['src'] - php_loc: - enabled: true - excluded_dirs: [vendor, tests] - php_cpd: - enabled: true - excluded_dirs: [vendor, tests] -build: - nodes: - analysis: - tests: - override: - - php-scrutinizer-run \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9cc942e..0000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: php - -php: - - '7.2' - - '7.3' - - '7.4' - -before_script: - - travis_retry composer self-update - - travis_retry composer global require hirak/prestissimo - - travis_retry composer install --no-interaction --dev - - travis_retry phpenv rehash - -script: - - ./vendor/bin/phpcs --standard=psr2 -n src/ - - mkdir -p build/logs - - ./vendor/bin/phpunit --coverage-clover build/logs/clover.xml - -after_script: - - php vendor/bin/coveralls -v diff --git a/LICENSE b/LICENSE index 5e7b846..83470fc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2019 Benoit POLASZEK +Copyright (c) 2016-2023 Beno!t POLASZEK Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file +THE SOFTWARE. diff --git a/README.md b/README.md index 0b209e6..57c1eb5 100644 --- a/README.md +++ b/README.md @@ -1,330 +1,222 @@ [![Latest Stable Version](https://poser.pugx.org/bentools/etl/v/stable)](https://packagist.org/packages/bentools/etl) [![License](https://poser.pugx.org/bentools/etl/license)](https://packagist.org/packages/bentools/etl) -[![Build Status](https://img.shields.io/travis/bpolaszek/bentools-etl/master.svg?style=flat-square)](https://travis-ci.org/bpolaszek/bentools-etl) -[![Coverage Status](https://coveralls.io/repos/github/bpolaszek/bentools-etl/badge.svg?branch=master)](https://coveralls.io/github/bpolaszek/bentools-etl?branch=master) -[![Quality Score](https://img.shields.io/scrutinizer/g/bpolaszek/bentools-etl.svg?style=flat-square)](https://scrutinizer-ci.com/g/bpolaszek/bentools-etl) +[![CI Workflow](https://github.com/bpolaszek/bentools-etl/actions/workflows/ci.yml/badge.svg)](https://github.com/bpolaszek/bentools-etl/actions/workflows/ci.yml) +[![Coverage](https://codecov.io/gh/bpolaszek/bentools-etl/branch/main/graph/badge.svg?token=L5ulTaymbt)](https://codecov.io/gh/bpolaszek/bentools-etl) [![Total Downloads](https://poser.pugx.org/bentools/etl/downloads)](https://packagist.org/packages/bentools/etl) -Okay, so you heard about the [Extract / Transform / Load](https://en.wikipedia.org/wiki/Extract,_transform,_load) pattern and you're looking for a PHP library to do the stuff. +Okay, so you heard about the [Extract / Transform / Load](https://en.wikipedia.org/wiki/Extract,_transform,_load) pattern, +and you're looking for a PHP library to do the stuff. -Alright, let's go! +Alright, let's go! Installation ------------ ```bash -composer require bentools/etl:^3.0@alpha +composer require bentools/etl ``` -_Warning: version 3.0 is a complete rewrite and a involves important BC breaks. Don't upgrade from `^2.0` unless you know what you're doing!_ +_Warning: version 3.1 is a complete rewrite and a involves important BC breaks._ + +_Don't upgrade from `^2.0` unless you know what you're doing!_ Usage ----- -To sum up, you will apply _transformations_ onto an `iterable` of any _things_ in order to _load_ them in some place. -Sometimes your `iterable` is ready to go, sometimes you just don't need to perform transformations, but anyway you need to load that data somewhere. +Let's cover the basic concepts: +- **Extract**: you have a source of data (a database, a CSV file, whatever) - an **extractor** is able to read that data and provide an iterator of items +- **Transform**: apply transformation to each item. A **transformer** may generate 0, 1 or several items to **load** (for example, 1 item may generate multiple SQL queries) +- **Load**: load transformed item to the destination. For example, **extracted items** have been **transformed** to SQL queries, and your **loader** will run those queries against your database. -Let's start with a really simple example: +Now let's have a look on how simple it is: ```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Loader\JsonFileLoader; - -$data = [ - 'foo', - 'bar', -]; - -$etl = EtlBuilder::init() - ->loadInto(JsonFileLoader::toFile(__DIR__.'/data.json')) - ->createEtl(); -$etl->process($data); +use Bentools\ETL\EtlExecutor; + +$etl = (new EtlExecutor()) + ->transformWith(function (string $name) { + yield strtoupper($name); + }); + +$singers = ['Bob Marley', 'Amy Winehouse']; +$report = $etl->process($singers); +dump($report->output); // ["BOB MARLEY", "AMY WINEHOUSE"] ``` -Basically you just loaded the string `["foo","bar"]` into `data.json`. Yay! +OK, that wasn't really hard, here we basically don't have to extract anything (we can already iterate on `$singers`), +and we're not loading anywhere, except into PHP's memory. -Now let's apply a basic uppercase transformation: +Now let's take this to the next level: -```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Loader\JsonFileLoader; - -$data = [ - 'foo', - 'bar', -]; - -$etl = EtlBuilder::init() - ->transformWith(new CallableTransformer('strtoupper')) - ->loadInto(JsonFileLoader::factory()) - ->createEtl(); -$etl->process($data, __DIR__.'/data.json'); // You can also set the output file when processing :-) +```csv +city_english_name,city_local_name,country_iso_code,continent,population +"New York","New York",US,"North America",8537673 +"Los Angeles","Los Angeles",US,"North America",39776830 +Tokyo,東京,JP,Asia,13929286 ``` -Didn't you just write the string `["FOO","BAR"]` into `data.json` ? Yes, you did! +```php +use Bentools\ETL\EtlExecutor; -Okay, but what if your source data is not an iterable (yet)? It can be a CSV file or a CSV string, for instance. Here's another example: +$etl = (new EtlExecutor()) + ->extractFrom(new CSVExtractor(options: ['columns' => 'auto'])) + ->loadInto(new JSONLoader()); -```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Extractor\CsvExtractor; -use BenTools\ETL\Loader\JsonFileLoader; - -$data = <<extractFrom(new CsvExtractor()) - ->loadInto(JsonFileLoader::factory(['json_options' => \JSON_PRETTY_PRINT])) - ->createEtl(); -$etl->process($data, __DIR__.'/data.json'); +$report = $etl->process('file:///tmp/cities.csv', 'file:///tmp/cities.json'); +dump($report->output); // file:///tmp/cities.json ``` -As you guessed, the following content was just written into `presidents.json`: - +Then, let's have a look at `/tmp/cities.json`: ```json [ { - "country_code": "US", - "country_name": "USA", - "president": "Donald Trump" + "city_english_name": "New York", + "city_local_name": "New York", + "country_iso_code": "US", + "continent": "North America", + "population": 8537673 }, { - "country_code": "RU", - "country_name": "Russia", - "president": "Vladimir Putin" + "city_english_name": "Los Angeles", + "city_local_name": "Los Angeles", + "country_iso_code": "US", + "continent": "North America", + "population": 39776830 + }, + { + "city_english_name": "Tokyo", + "city_local_name": "東京", + "country_iso_code": "JP", + "continent": "Asia", + "population": 13929286 } ] ``` -We provide helpful extractors and loaders to manipulate JSON, CSV, text, and you'll also find a `DoctrineORMLoader` for when your transformer _yields_ Doctrine entities. +Notice that we didn't _transform_ anything here, we just denormalized the CSV file to an array, then serialized that array to a JSON file. -Because yes, a transformer must return a `\Generator`. Why? Because a single extracted item can lead to several output items. Let's take a more sophisticated example: +The `CSVExtractor` has some options to _read_ the data, such as considering that the 1st row is the column keys. -```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Extractor\JsonExtractor; - -$pdo = new \PDO('mysql:host=localhost;dbname=test'); -$input = __DIR__.'/presidents.json'; - -$etl = EtlBuilder::init() - ->extractFrom(new JsonExtractor()) - ->transformWith( - function ($item) use ($pdo) { - $stmt = $pdo->prepare('SELECT country_code FROM countries WHERE country_code = ?'); - $stmt->bindValue(1, $item['country_code'], \PDO::PARAM_STR); - $stmt->execute(); - if (0 === $stmt->rowCount()) { - yield ['INSERT INTO countries (country_code, country_name) VALUES (?, ?)', [$item['country_code'], $item['country_name']]]; - } +Creating your own Extractor / Transformers / Loaders +-------------------------------------------------- - yield ['REPLACE INTO presidents (country_code, president_name) VALUES (?, ?)', [$item['country_code'], $item['president']]]; - - } - ) - ->loadInto( - $loader = function (\Generator $queries) use ($pdo) { - foreach ($queries as $query) { - list($sql, $params) = $query; - $stmt = $pdo->prepare($sql); - foreach ($params as $i => $value) { - $stmt->bindValue($i + 1, $value); - } - $stmt->execute(); - } - } - ) - ->createEtl(); +You can implement `ExtractorInterface`, `TransformerInterface` and `LoaderInterface`, or basically use simple `callable` with the same signatures. -$etl->process(__DIR__.'/presidents.json'); // The JsonExtractor will convert that file to a PHP array +Here's another example: +```php +$pdo = new \PDO('mysql:host=localhost;dbname=cities'); +$etl = (new EtlExecutor()) + ->extractFrom(new CSVExtractor(options: ['columns' => 'auto'])) + ->transformWith(function (mixed $city) { + yield [ + 'INSERT INTO countries (country_code, continent) VALUES (?, ?)', + [$city['country_iso_code'], $city['continent']], + ]; + yield [ + 'INSERT INTO cities (english_name, local_name, country_code, population)', + [$city['city_english_name'], $city['city_local_name'], $city['country_code'], $city['population']], + ]; + }) + ->loadInto(function (array $query, EtlState $state) { + /** @var \PDO $pdo */ + $pdo = $state->destination; + [$sql, $params] = $query; + $stmt = $pdo->prepare($sql); + $stmt->execute($params); + }); +$etl->process('file:///tmp/cities.csv', $pdo); ``` -As you can see, from a single item, we loaded up to 2 queries. - -Your _extractors_, _transformers_ and _loaders_ can implement [`ExtractorInterface`](https://github.com/bpolaszek/bentools-etl/tree/master/src/Extractor/ExtractorInterface.php), [`TransformerInterface`](https://github.com/bpolaszek/bentools-etl/tree/master/src/Transformer/TransformerInterface.php) or [`LoaderInterface`](https://github.com/bpolaszek/bentools-etl/tree/master/src/Loader/LoaderInterface.php) as well as being simple `callables`. - +As you can see, you can use `EtlState.destination` to retrieve the second argument you passed yo `$etl->process()`. -Skipping items --------------- +The `EtlState` object contains all elements relative to the state of your ETL workflow being running. -Each _extractor_ / _transformer_ / _loader_ callback gets the current `Etl` object injected in their arguments. +Using events +------------ -This allows you to ask the ETL to skip an item, or even to stop the whole process: +The `EtlExecutor` emits various events during the workflow: +- `InitEvent` when `process()` was just called +- `StartEvent` when extraction just started (we might know the total number of items to extract at this time, if the extractor provides this) +- `ExtractEvent` upon each extracted item +- `ExtractExceptionEvent` when something wrong occured during extraction (this is generally not recoverable) +- `TransformEvent` upon each transformed item +- `TransformExceptionEvent` when something wrong occured during transformation (the exception can be dismissed) +- `LoadEvent` upon each loaded item +- `LoadExceptionEvent` when something wrong occured during loading (the exception can be dismissed) +- `FlushEvent` at each flush +- `FlushExceptionEvent` when something wrong occured during flush (the exception can be dismissed) +- `EndEvent` whenever the workflow is complete. + +You can hook to those events during `EtlExecutor` instantiation, i.e.: ```php -use BenTools\ETL\Etl; -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Transformer\CallableTransformer; - -$fruits = [ - 'apple', - 'banana', - 'strawberry', - 'pineapple', - 'pear', -]; - - -$storage = []; -$etl = EtlBuilder::init() - ->transformWith(new CallableTransformer('strtoupper')) - ->loadInto( - function ($generated, $key, Etl $etl) use (&$storage) { - foreach ($generated as $fruit) { - if ('BANANA' === $fruit) { - $etl->skipCurrentItem(); - break; - } - if ('PINEAPPLE' === $fruit) { - $etl->stopProcessing(); - break; - } - $storage[] = $fruit; - } - }) - ->createEtl(); - -$etl->process($fruits); - -var_dump($storage); // ['APPLE', 'STRAWBERRY'] +$etl = (new EtlExecutor()) + ->onExtract( + fn (ExtractEvent $event) => $logger->info('Extracting item #{key}', ['key' => $event->state->currentItemKey]), + ); ``` +Skipping items +-------------- -Events ------- +You can skip items at any time. -Now you're wondering how you can hook on the ETL lifecycle, to log things, handle exceptions, ... This library ships with a built-in Event Dispatcher that you can leverage when: +Use the `$state->skip()` method from the `EtlState` object as soon as your business logic requires it. -* The ETL starts -* An item has been extracted -* The extraction failed -* An item has been transformed -* Transformation failed -* Loader is initialized (1st item is about to be loaded) -* An item has been loaded -* Loading failed -* An item has been skipped -* The ETL was stopped -* A flush operation was completed -* A rollback operation was completed -* The ETL completed the whole process. +Stopping the workflow +--------------------- -The _ItemEvents_ (on extract, transform, load) will allow you to mark the current item to be skipped, or even handle runtime exceptions. Let's take another example: +You can stop the workflow at any time. -```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\EventDispatcher\Event\ItemExceptionEvent; - -$fruits = [ - 'apple', - new \RuntimeException('Is tomato a fruit?'), - 'banana', -]; - - -$storage = []; -$etl = EtlBuilder::init() - ->transformWith( - function ($item, $key) { - if ($item instanceof \Exception) { - throw $item; - } +Use the `$state->stop()` method from the `EtlState` object as soon as your business logic requires it. - yield $key => $item; - }) - ->loadInto( - function (iterable $transformed) use (&$storage) { - foreach ($transformed as $fruit) { - $storage[] = $fruit; - } - }) - ->onTransformException( - function (ItemExceptionEvent $event) { - echo $event->getException()->getMessage(); // Is tomato a fruit? - $event->ignoreException(); - }) - ->createEtl(); +Early flush +----------- -$etl->process($fruits); +You can define the flush frequency (defaults to 1) and optionally flush earlier than expected at any time: -var_dump($storage); // ['apple', 'banana'] +```php +$etl = (new EtlExecutor(options: new EtlConfiguration(flushEvery: 10))) + ->onLoad( + function (LoadEvent $event) { + if (/* whatever reason */) { + $event->state->flush(); + } + }, + ); ``` -Here, we intentionnally threw an exception during the _transform_ operation. But thanks to the event dispatcher, we could tell the ETL this exception can be safely ignored and it can pursue the rest of the process. - -You can attach as many event listeners as you wish, and sort them by priority. - - Recipes ------- -A recipe is an ETL pattern that can be reused through different tasks. -If you want to log everything that goes through an ETL for example, use our built-in Logger recipe: +Recipes are reusable configurations of an `EtlExecutor`. +For example, to enable logging, use the `LoggerRecipe`: ```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\Recipe\LoggerRecipe; +use Bentools\ETL\EtlExecutor; +use Bentools\ETL\Recipe\LoggerRecipe; +use Monolog\Logger; -$etl = EtlBuilder::init() - ->useRecipe(new LoggerRecipe($logger)) - ->createEtl(); +$logger = new Logger(); +$etl = (new EtlExecutor()) + ->withRecipe(new LoggerRecipe($logger)); ``` -You can also create your own recipes: +This will basically listen to all events and fire log entries. -```php -use BenTools\ETL\EtlBuilder; -use BenTools\ETL\EventDispatcher\Event\ItemEvent; -use BenTools\ETL\Extractor\JsonExtractor; -use BenTools\ETL\Loader\CsvFileLoader; -use BenTools\ETL\Recipe\Recipe; - - -class JSONToCSVRecipe extends Recipe -{ - /** - * @inheritDoc - */ - public function updateBuilder(EtlBuilder $builder): EtlBuilder - { - return $builder - ->extractFrom(new JsonExtractor()) - ->loadInto($loader = CsvFileLoader::factory(['delimiter' => ';'])) - ->onLoaderInit( - function (ItemEvent $event) use ($loader) { - $loader::factory(['keys' => array_keys($event->getItem())], $loader); - }) - ; - } - -} - -$builder = EtlBuilder::init()->useRecipe(new JSONToCSVRecipe()); -$etl = $builder->createEtl(); -$etl->process(__DIR__.'/presidents.json', __DIR__.'/presidents.csv'); -``` +You can create your own recipes by implementing `Bentools\ETL\Recipe\Recipe` or using a callable with the same signature. -The above example will result in `presidents.csv` containing: -```csv -country_code;country_name;president -US;USA;"Donald Trump" -RU;Russia;"Vladimir Putin" -``` +Contribute +---------- -To sum up, a _recipe_ is a kind of an `ETLBuilder` factory, but keep in mind that a recipe will only **add** event listeners to the existing builder but can also **replace** the builder's _extractor_, _transformer_ and/or _loader_. - -Tests ------ +Contributions are welcome! +Before sending your PRs, run this command to ensure test pass and 100% of the code is covered. ```bash -./vendor/bin/phpunit +composer ci:check ``` License ------- -MIT +MIT. diff --git a/composer.json b/composer.json index 9119635..a73a721 100644 --- a/composer.json +++ b/composer.json @@ -1,52 +1,46 @@ { "name": "bentools/etl", - "description": "PHP ETL (Extract / Transform / Load) implementation. Almost no dependency.", - "keywords": [ - "extract", - "transform", - "load", - "extractor", - "transformer", - "loader", - "etl" - ], - "license": "MIT", - "authors": [ - { - "name": "Beno!t POLASZEK" - } - ], + "description": "PHP ETL (Extract / Transform / Load) implementation, with very few dependencies.", + "type": "library", "require": { - "php": ">=7.2", - "thecodingmachine/safe": "^1.0", - "psr/event-dispatcher": "^1.0" + "php": ">=8.2", + "psr/event-dispatcher": "^1.0", + "psr/log": "^3.0", + "symfony/options-resolver": "@stable", + "thecodingmachine/safe": "^2.5" }, "require-dev": { - "phpunit/phpunit": "^6.0", - "squizlabs/php_codesniffer": "~2.0", - "doctrine/orm": "^2.7", - "symfony/var-dumper": "^4.0", - "php-coveralls/php-coveralls": "^2.1", - "psr/log": "^1.1", - "phpstan/phpstan": "^0.11.5" + "doctrine/orm": "^2.16", + "friendsofphp/php-cs-fixer": "^3.35", + "mockery/mockery": "^1.6", + "pestphp/pest": "^2.24", + "phpstan/phpstan": "^1.10", + "phpstan/phpstan-mockery": "^1.1", + "symfony/var-dumper": "*" }, - "autoload-dev": { + "license": "MIT", + "autoload": { "psr-4": { - "BenTools\\ETL\\Tests\\": "tests/src" - }, - "files": [ - "vendor/symfony/var-dumper/Resources/functions/dump.php", - "tests/functions.php" - ] + "Bentools\\ETL\\": "src/" + } }, - "autoload": { + "autoload-dev": { "psr-4": { - "BenTools\\ETL\\": "src" + "BenTools\\ETL\\Tests\\": "tests/" } }, - "extra": { - "branch-alias": { - "dev-master": "3.0.x-dev" + "scripts": { + "ci:check": [ + "vendor/bin/php-cs-fixer fix", + "vendor/bin/phpstan analyse", + "vendor/bin/pest --coverage" + ] + }, + "minimum-stability": "stable", + "config": { + "sort-packages": true, + "allow-plugins": { + "pestphp/pest-plugin": true } } } diff --git a/phpstan.dist.neon b/phpstan.dist.neon new file mode 100644 index 0000000..b76d47f --- /dev/null +++ b/phpstan.dist.neon @@ -0,0 +1,7 @@ +parameters: + level: 6 + paths: + - src/ + - tests/ +includes: + - vendor/phpstan/phpstan-mockery/extension.neon diff --git a/phpunit.xml.dist b/phpunit.xml.dist index a99db0b..f12db56 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,28 +1,32 @@ - + + + + + + + + + + - - ./tests + + tests - - - ./src - - + + + + + + src + + diff --git a/src/Etl.php b/src/Etl.php deleted file mode 100644 index 842a095..0000000 --- a/src/Etl.php +++ /dev/null @@ -1,390 +0,0 @@ -extract = $extract; - $this->transform = $transform ?? self::defaultTransformer(); - $this->init = $initLoader; - $this->load = $load ?? new NullLoader(); - $this->flush = $flush; - $this->rollback = $rollback; - $this->flushEvery = null !== $flushEvery ? max(1, $flushEvery) : null; - $this->eventDispatcher = $eventDispatcher ?? new EventDispatcher(); - } - - /** - * Run the ETL on the given input. - * - * @param mixed|null $data - * @param ...$initLoaderArgs - Optional arguments for loader init - * @throws EtlException - */ - public function process($data = null, ...$initLoaderArgs): void - { - $flushCounter = $totalCounter = 0; - $this->start(); - - foreach ($this->extract($data) as $key => $item) { - if ($this->shouldSkip) { - $this->skip($item, $key); - continue; - } - - if ($this->shouldStop) { - $this->stop($item, $key); - break; - } - - $transformed = $this->transform($item, $key); - - if ($this->shouldSkip) { - $this->skip($item, $key); - continue; - } - - if ($this->shouldStop) { - $this->stop($item, $key); - break; - } - - $flushCounter++; - $totalCounter++; - - if (1 === $totalCounter) { - $this->initLoader($item, $key, ...$initLoaderArgs); - } - - $flush = (null === $this->flushEvery ? false : (0 === ($totalCounter % $this->flushEvery))); - $this->load($transformed(), $item, $key, $flush, $flushCounter, $totalCounter); - } - - $this->end($flushCounter, $totalCounter); - } - - private function start(): void - { - $this->reset(); - $this->eventDispatcher->dispatch(new StartProcessEvent($this)); - } - - /** - * Mark the current item to be skipped. - */ - public function skipCurrentItem(): void - { - $this->shouldSkip = true; - } - - /** - * Ask the loader to trigger flush ASAP. - */ - public function triggerFlush(): void - { - $this->shouldFlush = true; - } - - /** - * Process item skip. - * - * @param $item - * @param $key - */ - private function skip($item, $key): void - { - $this->shouldSkip = false; - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::SKIP, $item, $key, $this)); - } - - /** - * Ask the ETl to stop. - * - * @param bool $rollback - if the loader should rollback instead of flushing. - */ - public function stopProcessing(bool $rollback = false): void - { - $this->shouldStop = true; - $this->shouldRollback = $rollback; - } - - /** - * @param $item - * @param $key - */ - private function stop($item, $key): void - { - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::STOP, $item, $key, $this)); - } - - /** - * Reset the ETL. - */ - private function reset(): void - { - $this->shouldSkip = false; - $this->shouldFlush = false; - $this->shouldStop = false; - $this->shouldRollback = false; - } - - /** - * Extract data. - * - * @param $data - * @return iterable - * @throws EtlException - */ - private function extract($data): iterable - { - $items = null === $this->extract ? $data : ($this->extract)($data, $this); - - if (null === $items) { - $items = new \EmptyIterator(); - } - - if (!\is_iterable($items)) { - throw new EtlException('Could not extract data.'); - } - - try { - foreach ($items as $key => $item) { - try { - $this->shouldSkip = false; - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::EXTRACT, $item, $key, $this)); - yield $key => $item; - } catch (\Exception $e) { - continue; - } - } - } catch (\Throwable $e) { - /** @var ItemExceptionEvent $event */ - $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::EXTRACT_EXCEPTION, $item ?? null, $key ?? null, $this, $e)); - if ($event->shouldThrowException()) { - throw $e; - } - } - } - - /** - * Transform data. - * - * @param $item - * @param $key - * @return callable - * @throws EtlException - */ - private function transform($item, $key) - { - $transformed = ($this->transform)($item, $key, $this); - - if (!$transformed instanceof \Generator) { - throw new EtlException('The transformer must return a generator.'); - } - - $output = []; - - // Traverse generator to trigger events - try { - foreach ($transformed as $key => $value) { - $output[] = [$key, $value]; - } - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::TRANSFORM, $item, $key, $this)); - } catch (\Exception $e) { - /** @var ItemExceptionEvent $event */ - $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::TRANSFORM_EXCEPTION, $item, $key, $this, $e)); - if ($event->shouldThrowException()) { - throw $e; - } - } - - return static function () use ($output) { - foreach ($output as [$key, $value]) { - yield $key => $value; - } - }; - } - - /** - * Init the loader on the 1st item. - */ - private function initLoader($item, $key, ...$initLoaderArgs): void - { - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::LOADER_INIT, $item, $key, $this)); - - if (null === $this->init) { - return; - } - - ($this->init)(...$initLoaderArgs); - } - - /** - * Load data. - * - * @param iterable $data - */ - private function load(iterable $data, $item, $key, bool $flush, int &$flushCounter, int &$totalCounter): void - { - try { - ($this->load)($data, $key, $this); - $this->eventDispatcher->dispatch(new ItemEvent(EtlEvents::LOAD, $item, $key, $this)); - } catch (\Throwable $e) { - /** @var ItemExceptionEvent $event */ - $event = $this->eventDispatcher->dispatch(new ItemExceptionEvent(EtlEvents::LOAD_EXCEPTION, $item, $key, $this, $e)); - if ($event->shouldThrowException()) { - throw $e; - } - $flushCounter--; - $totalCounter--; - } - - $flush = $this->shouldFlush || $flush; - - if (true === $flush) { - $this->flush($flushCounter, true); - } - } - - /** - * Flush elements. - */ - private function flush(int &$flushCounter, bool $partial): void - { - if (null === $this->flush) { - return; - } - - ($this->flush)($partial); - $this->eventDispatcher->dispatch(new FlushEvent($this, $flushCounter, $partial)); - $flushCounter = 0; - $this->shouldFlush = false; - } - - /** - * Restore loader's initial state. - */ - private function rollback(int &$flushCounter): void - { - if (null === $this->rollback) { - return; - } - - ($this->rollback)(); - $this->eventDispatcher->dispatch(new RollbackEvent($this, $flushCounter)); - $flushCounter = 0; - } - - /** - * Process the end of the ETL. - * - * @param int $flushCounter - * @param int $totalCounter - */ - private function end(int $flushCounter, int $totalCounter): void - { - if ($this->shouldRollback) { - $this->rollback($flushCounter); - $totalCounter = max(0, $totalCounter - $flushCounter); - } else { - $this->flush($flushCounter, false); - } - $this->eventDispatcher->dispatch(new EndProcessEvent($this, $totalCounter)); - $this->reset(); - } - - /** - * @return callable - */ - private static function defaultTransformer(): callable - { - return function ($item, $key): \Generator { - yield $key => $item; - }; - } -} diff --git a/src/EtlBuilder.php b/src/EtlBuilder.php deleted file mode 100644 index 183464e..0000000 --- a/src/EtlBuilder.php +++ /dev/null @@ -1,362 +0,0 @@ -extractFrom($extractor); - } - - if (null !== $transformer) { - $this->transformWith($transformer); - } - - if (null !== $loader) { - $this->loadInto($loader); - } - } - - /** - * @return EtlBuilder - */ - public static function init($extractor = null, $transformer = null, $loader = null): self - { - return new self($extractor, $transformer, $loader); - } - - /** - * @param $extractor - * @return EtlBuilder - * @throws \InvalidArgumentException - */ - public function extractFrom($extractor): self - { - if ($extractor instanceof ExtractorInterface) { - $this->extractor = [$extractor, 'extract']; - - return $this; - } - - if (\is_callable($extractor) || null === $extractor) { - $this->extractor = $extractor; - - return $this; - } - - - throw new \InvalidArgumentException(sprintf('Expected callable, null or instance of %s, got %s', ExtractorInterface::class, \is_object($extractor) ? \get_class($extractor) : \gettype($extractor))); - } - - /** - * @param $transformer - * @return EtlBuilder - * @throws \InvalidArgumentException - */ - public function transformWith($transformer): self - { - - if ($transformer instanceof TransformerInterface) { - $this->transformer = [$transformer, 'transform']; - - return $this; - } - - if (\is_callable($transformer) || null === $transformer) { - $this->transformer = $transformer; - - return $this; - } - - throw new \InvalidArgumentException(sprintf('Expected callable, null or instance of %s, got %s', TransformerInterface::class, \is_object($transformer) ? \get_class($transformer) : \gettype($transformer))); - } - - /** - * @param $loader - * @return EtlBuilder - * @throws \InvalidArgumentException - */ - public function loadInto($loader): self - { - if ($loader instanceof LoaderInterface) { - $this->loader = [$loader, 'load']; - $this->initLoader = [$loader, 'init']; - $this->committer = [$loader, 'commit']; - $this->restorer = [$loader, 'rollback']; - - return $this; - } - - if (\is_callable($loader)) { - $this->loader = $loader; - - return $this; - } - - - throw new \InvalidArgumentException(sprintf('Expected callable or instance of %s, got %s', LoaderInterface::class, \is_object($loader) ? \get_class($loader) : \gettype($loader))); - } - - /** - * @param int|null $nbItems - * @return EtlBuilder - */ - public function flushEvery(?int $nbItems): self - { - $this->flushEvery = $nbItems; - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onStart(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::START, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onExtract(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::EXTRACT, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onExtractException(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::EXTRACT_EXCEPTION, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onTransform(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::TRANSFORM, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onTransformException(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::TRANSFORM_EXCEPTION, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onLoaderInit(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::LOADER_INIT, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onLoad(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::LOAD, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onLoadException(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::LOAD_EXCEPTION, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onFlush(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::FLUSH, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onSkip(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::SKIP, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onStop(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::STOP, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onRollback(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::ROLLBACK, $callable, $priority); - - return $this; - } - - /** - * @param callable $callable - * @param int $priority - * @return EtlBuilder - */ - public function onEnd(callable $callable, int $priority = 0): self - { - $this->listeners[] = new EventListener(EtlEvents::END, $callable, $priority); - - return $this; - } - - /** - * @param Recipe $recipe - * @return EtlBuilder - */ - public function useRecipe(Recipe $recipe): self - { - return $recipe->updateBuilder($this); - } - - /** - * @return Etl - * @throws \RuntimeException - */ - public function createEtl(): Etl - { - $this->checkValidity(); - - return new Etl( - $this->extractor, - $this->transformer, - $this->loader, - $this->initLoader, - $this->committer, - $this->restorer, - $this->flushEvery, - new EventDispatcher($this->listeners) - ); - } - - /** - * @return bool - */ - private function checkValidity(): void - { - if (null === $this->loader) { - throw new \RuntimeException('Loader has not been provided.'); - } - - if (null !== $this->flushEvery && $this->flushEvery <= 0) { - throw new \RuntimeException('The "flushEvery" option must be null or an integer > 0.'); - } - } -} diff --git a/src/EtlConfiguration.php b/src/EtlConfiguration.php new file mode 100644 index 0000000..b60e4bf --- /dev/null +++ b/src/EtlConfiguration.php @@ -0,0 +1,13 @@ + + */ + use EtlBuilderTrait; + + /** + * @use EtlExceptionsTrait + */ + use EtlExceptionsTrait; + + private EventDispatcherInterface $eventDispatcher; + + public function __construct( + private ExtractorInterface $extractor = new IterableExtractor(), + private TransformerInterface $transformer = new NullTransformer(), + private LoaderInterface $loader = new InMemoryLoader(), + private EtlConfiguration $options = new EtlConfiguration(), + ) { + $this->listenerProvider = new PrioritizedListenerProvider(); + $this->eventDispatcher = new EventDispatcher($this->listenerProvider); + } + + public function process(mixed $source = null, mixed $destination = null): EtlState + { + $state = new EtlState(options: $this->options, source: $source, destination: $destination); + + try { + $this->dispatch(new InitEvent($state)); + + foreach ($this->extract($state) as $extractedItem) { + try { + $transformedItems = $this->transform($extractedItem, $state); + $this->load($transformedItems, $state); + } catch (SkipRequest) { + } + } + } catch (StopRequest) { + } + + $output = $this->flush($state, false); + + if (!$state->nbTotalItems) { + $state = $state->withNbTotalItems($state->nbLoadedItems); + } + + $state = $state->withOutput($output); + $this->dispatch(new EndEvent($state)); + + gc_collect_cycles(); + + return $state; + } + + private function extract(EtlState &$state): Generator + { + try { + $items = $this->extractor->extract($state); + if (is_countable($items)) { + $state = $state->withNbTotalItems(count($items)); + } + $this->dispatch(new StartEvent($state)); + foreach ($items as $key => $value) { + try { + $state = $state->withUpdatedItemKey($key); + $event = $this->dispatch(new ExtractEvent($state, $value)); + yield $event->item; + } catch (SkipRequest) { + } + } + } catch (StopRequest) { + return; + } catch (Throwable $exception) { + $this->throwExtractException($exception, $state); + } + } + + /** + * @return list + */ + private function transform(mixed $item, EtlState $state): array + { + try { + $items = [...$this->transformer->transform($item, $state)]; + + return $this->dispatch(new TransformEvent($state, $items))->items; + } catch (SkipRequest|StopRequest $e) { + throw $e; + } catch (Throwable $e) { + $this->throwTransformException($e, $state); + } + + return []; + } + + /** + * @param list $items + */ + private function load(array $items, EtlState &$state): void + { + try { + foreach ($items as $item) { + $this->loader->load($item, $state); + $state = $state->withIncrementedNbLoadedItems(); + $this->dispatch(new LoadEvent($state, $item)); + } + } catch (SkipRequest|StopRequest $e) { + throw $e; + } catch (Throwable $e) { + $this->throwLoadException($e, $state); + } + + $this->flush($state, true); + } + + private function flush(EtlState &$state, bool $isPartial): mixed + { + if ($isPartial && !$state->shouldFlush()) { + return null; + } + + if (0 === $state->nbLoadedItems) { + return null; + } + + $output = null; + $state->flush(); + try { + $output = $this->loader->flush($isPartial, $state); + } catch (Throwable $e) { + $this->throwFlushException($e, $state); + } + $this->dispatch(new FlushEvent($state, $isPartial, $output)); + $state = $state->withClearedFlush(); + + return $output; + } + + /** + * @template T of object + * + * @param T $event + * + * @return T + */ + private function dispatch(object $event): object + { + $this->eventDispatcher->dispatch($event); + + return $event; + } +} diff --git a/src/EtlState.php b/src/EtlState.php new file mode 100644 index 0000000..02ecacd --- /dev/null +++ b/src/EtlState.php @@ -0,0 +1,123 @@ + $context + */ + public function __construct( + public readonly EtlConfiguration $options = new EtlConfiguration(), + public readonly mixed $source = null, + public readonly mixed $destination = null, + public readonly mixed $currentItemKey = null, + public readonly int $currentItemIndex = -1, + public readonly int $nbExtractedItems = 0, + public readonly int $nbLoadedItems = 0, + public readonly ?int $nbTotalItems = null, + public readonly DateTimeImmutable $startedAt = new DateTimeImmutable(), + public readonly ?DateTimeImmutable $endedAt = null, + public readonly mixed $output = null, + private readonly int $nbLoadedItemsSinceLastFlush = 0, + private bool $flush = false, + public array $context = [], + ) { + } + + /** + * Flush after current item. + */ + public function flush(): void + { + $this->flush = true; + } + + /** + * Skip current item. + */ + public function skip(): void + { + throw new SkipRequest(); + } + + /** + * Stop after current item. + */ + public function stop(): void + { + throw new StopRequest(); + } + + public function shouldFlush(): bool + { + return $this->flush + || (0 === ($this->nbLoadedItemsSinceLastFlush % $this->options->flushEvery)); + } + + public function getDuration(): float + { + $endedAt = $this->endedAt ?? new DateTimeImmutable(); + + return (float) ($endedAt->format('U.u') - $this->startedAt->format('U.u')); + } + + /** + * @internal + */ + public function withUpdatedItemKey(mixed $key): self + { + return $this->clone([ + 'currentItemKey' => $key, + 'currentItemIndex' => $this->currentItemIndex + 1, + 'nbExtractedItems' => $this->nbExtractedItems + 1, + ]); + } + + /** + * @internal + */ + public function withIncrementedNbLoadedItems(): self + { + return $this->clone([ + 'nbLoadedItems' => $this->nbLoadedItems + 1, + 'nbLoadedItemsSinceLastFlush' => $this->nbLoadedItemsSinceLastFlush + 1, + ]); + } + + /** + * @internal + */ + public function withNbTotalItems(?int $nbTotalItems): self + { + return $this->clone(['nbTotalItems' => $nbTotalItems]); + } + + /** + * @internal + */ + public function withOutput(mixed $output): self + { + return $this->clone(['output' => $output]); + } + + /** + * @internal + */ + public function withClearedFlush(): self + { + return $this->clone([ + 'flush' => false, + 'nbLoadedItemsSinceLastFlush' => 0, + ]); + } +} diff --git a/src/EventDispatcher/EtlEvents.php b/src/EventDispatcher/EtlEvents.php deleted file mode 100644 index d6ca38d..0000000 --- a/src/EventDispatcher/EtlEvents.php +++ /dev/null @@ -1,84 +0,0 @@ -counter = $counter; - } - - /** - * @return int - */ - public function getCounter(): int - { - return $this->counter; - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return EtlEvents::END; - } -} diff --git a/src/EventDispatcher/Event/EtlEvent.php b/src/EventDispatcher/Event/EtlEvent.php deleted file mode 100644 index f8d776e..0000000 --- a/src/EventDispatcher/Event/EtlEvent.php +++ /dev/null @@ -1,58 +0,0 @@ -etl = $etl; - } - - /** - * @return Etl - */ - public function getEtl(): Etl - { - return $this->etl; - } - - /** - * @return string - */ - abstract public function getName(): string; - - /** - * Stop event propagation. - */ - final public function stopPropagation(): void - { - $this->propagationStopped = true; - } - - /** - * @inheritDoc - */ - final public function isPropagationStopped(): bool - { - return true === $this->propagationStopped; - } -} diff --git a/src/EventDispatcher/Event/Event.php b/src/EventDispatcher/Event/Event.php new file mode 100644 index 0000000..82fb645 --- /dev/null +++ b/src/EventDispatcher/Event/Event.php @@ -0,0 +1,12 @@ +counter = $counter; - $this->partial = $partial; - } +use Bentools\ETL\EtlState; +use Bentools\ETL\EventDispatcher\StoppableEventTrait; +use Psr\EventDispatcher\StoppableEventInterface; - /** - * @return int - */ - public function getCounter(): int - { - return $this->counter; - } - - /** - * @return bool - */ - public function isPartial(): bool - { - return $this->partial; - } +final class FlushEvent extends Event implements StoppableEventInterface +{ + use StoppableEventTrait; - /** - * @inheritDoc - */ - public function getName(): string - { - return EtlEvents::FLUSH; + public function __construct( + public readonly EtlState $state, + public readonly bool $partial, + public mixed $output, + ) { } } diff --git a/src/EventDispatcher/Event/FlushExceptionEvent.php b/src/EventDispatcher/Event/FlushExceptionEvent.php new file mode 100644 index 0000000..ba9739e --- /dev/null +++ b/src/EventDispatcher/Event/FlushExceptionEvent.php @@ -0,0 +1,26 @@ +exception = null; + } +} diff --git a/src/EventDispatcher/Event/InitEvent.php b/src/EventDispatcher/Event/InitEvent.php new file mode 100644 index 0000000..a78a966 --- /dev/null +++ b/src/EventDispatcher/Event/InitEvent.php @@ -0,0 +1,19 @@ +name = $name; - $this->item = $item; - $this->key = $key; - parent::__construct($etl); - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return $this->name; - } - - /** - * @return mixed - */ - public function getItem() - { - return $this->item; - } - - /** - * @return mixed - */ - public function getKey() - { - return $this->key; - } -} diff --git a/src/EventDispatcher/Event/ItemExceptionEvent.php b/src/EventDispatcher/Event/ItemExceptionEvent.php deleted file mode 100644 index 43de09f..0000000 --- a/src/EventDispatcher/Event/ItemExceptionEvent.php +++ /dev/null @@ -1,94 +0,0 @@ -name = $name; - $this->item = $item; - $this->key = $key; - $this->exception = $exception; - parent::__construct($etl); - } - - /** - * @return \Throwable - */ - public function getException(): \Throwable - { - return $this->exception; - } - - /** - * @return bool - */ - public function shouldThrowException(): bool - { - return $this->shouldBeThrown; - } - - /** - * Exception should not be thrown. - * Implicitely skips the current item. - */ - public function ignoreException(): void - { - $this->shouldBeThrown = false; - $this->etl->skipCurrentItem(); - } - - /** - * @return mixed - */ - public function getItem() - { - return $this->item; - } - - /** - * @return mixed - */ - public function getKey() - { - return $this->key; - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return $this->name; - } -} diff --git a/src/EventDispatcher/Event/LoadEvent.php b/src/EventDispatcher/Event/LoadEvent.php new file mode 100644 index 0000000..4cd7899 --- /dev/null +++ b/src/EventDispatcher/Event/LoadEvent.php @@ -0,0 +1,20 @@ +exception = null; + } +} diff --git a/src/EventDispatcher/Event/RollbackEvent.php b/src/EventDispatcher/Event/RollbackEvent.php deleted file mode 100644 index 08ffb5b..0000000 --- a/src/EventDispatcher/Event/RollbackEvent.php +++ /dev/null @@ -1,41 +0,0 @@ -counter = $counter; - } - - /** - * @return int - */ - public function getCounter(): int - { - return $this->counter; - } - - /** - * @inheritDoc - */ - public function getName(): string - { - return EtlEvents::ROLLBACK; - } -} diff --git a/src/EventDispatcher/Event/StartEvent.php b/src/EventDispatcher/Event/StartEvent.php new file mode 100644 index 0000000..5efdc54 --- /dev/null +++ b/src/EventDispatcher/Event/StartEvent.php @@ -0,0 +1,19 @@ + $items + */ + public function __construct( + public readonly EtlState $state, + public array $items, + ) { + } +} diff --git a/src/EventDispatcher/Event/TransformExceptionEvent.php b/src/EventDispatcher/Event/TransformExceptionEvent.php new file mode 100644 index 0000000..5612707 --- /dev/null +++ b/src/EventDispatcher/Event/TransformExceptionEvent.php @@ -0,0 +1,26 @@ +exception = null; + } +} diff --git a/src/EventDispatcher/EventDispatcher.php b/src/EventDispatcher/EventDispatcher.php index 5445bbb..aa4b63f 100644 --- a/src/EventDispatcher/EventDispatcher.php +++ b/src/EventDispatcher/EventDispatcher.php @@ -1,89 +1,36 @@ addListener($listener); - } - } - - /** - * @param EventListener $eventListener - */ - private function addListener(EventListener $eventListener): void - { - $this->listeners[] = $eventListener; - } - - - /** - * @inheritDoc - */ - public function getListenersForEvent(object $event): iterable - { - if (!$event instanceof EtlEvent) { - return []; - } - - $listenersForEvent = \array_filter( - $this->listeners, - function (EventListener $eventListener) use ($event) { - return $eventListener->getEventName() === $event->getName(); - } - ); - - \usort( - $listenersForEvent, - function (EventListener $a, EventListener $b) { - return $b->getPriority() <=> $a->getPriority(); - } - ); - - return \array_map( - function (EventListener $eventListener) { - return $eventListener->getCallable(); - }, - $listenersForEvent - ); + public function __construct( + private ListenerProviderInterface $listenerProvider, + ) { } /** - * @inheritDoc + * @template T of object + * + * @param T $event + * + * @return T */ - public function dispatch(object $event) + public function dispatch(object $event): object { - if (!$event instanceof EtlEvent) { - return $event; - } - - $listeners = $this->getListenersForEvent($event); + $listeners = $this->listenerProvider->getListenersForEvent($event); + $isStoppable = $event instanceof StoppableEventInterface; - foreach ($listeners as $listen) { - if ($event->isPropagationStopped()) { + foreach ($listeners as $callback) { + if ($isStoppable && $event->isPropagationStopped()) { break; } - $listen($event); + $callback($event); } return $event; diff --git a/src/EventDispatcher/EventListener.php b/src/EventDispatcher/EventListener.php deleted file mode 100644 index 1e23483..0000000 --- a/src/EventDispatcher/EventListener.php +++ /dev/null @@ -1,55 +0,0 @@ -eventName = $eventName; - $this->listener = $listener; - $this->priority = $priority; - } - - /** - * @return string - */ - public function getEventName(): ?string - { - return $this->eventName; - } - - /** - * @return callable - */ - public function getCallable(): callable - { - return $this->listener; - } - - /** - * @return int - */ - public function getPriority(): int - { - return $this->priority; - } -} diff --git a/src/EventDispatcher/PrioritizedListenerProvider.php b/src/EventDispatcher/PrioritizedListenerProvider.php new file mode 100644 index 0000000..97d5ec2 --- /dev/null +++ b/src/EventDispatcher/PrioritizedListenerProvider.php @@ -0,0 +1,38 @@ +>> + */ + private array $prioritizedListeners = []; + + /** + * @var array> + */ + private array $flattenedListeners = []; + + public function listenTo(string $eventClass, callable $callback, int $priority = 0): void + { + $this->prioritizedListeners[$eventClass][$priority][] = $callback; + krsort($this->prioritizedListeners[$eventClass]); + $this->flattenedListeners[$eventClass] = array_merge(...$this->prioritizedListeners[$eventClass]); + } + + /** + * @return iterable + */ + public function getListenersForEvent(object $event): iterable + { + return $this->flattenedListeners[$event::class] ?? []; + } +} diff --git a/src/EventDispatcher/StoppableEventTrait.php b/src/EventDispatcher/StoppableEventTrait.php new file mode 100644 index 0000000..d43193e --- /dev/null +++ b/src/EventDispatcher/StoppableEventTrait.php @@ -0,0 +1,20 @@ +propagationStopped = true; + } + + final public function isPropagationStopped(): bool + { + return true === $this->propagationStopped; + } +} diff --git a/src/Exception/EtlException.php b/src/Exception/EtlException.php index 9fb02ec..f86f5d6 100644 --- a/src/Exception/EtlException.php +++ b/src/Exception/EtlException.php @@ -1,8 +1,11 @@ source ?? $this->content; + + if (!is_string($content)) { + throw new ExtractException('Invalid source.'); + } + + if (str_starts_with($content, 'file://')) { + $iterator = (new FileExtractor(substr($content, 7), $this->options))->extract($state); + } else { + $iterator = (new TextLinesExtractor($content, $this->options))->extract($state); + } + + return new CSVIterator($iterator, $this->options); + } +} diff --git a/src/Extractor/CallableExtractor.php b/src/Extractor/CallableExtractor.php new file mode 100644 index 0000000..2f0a6d7 --- /dev/null +++ b/src/Extractor/CallableExtractor.php @@ -0,0 +1,21 @@ +closure)($state); + } +} diff --git a/src/Extractor/CsvExtractor.php b/src/Extractor/CsvExtractor.php deleted file mode 100644 index 840adc2..0000000 --- a/src/Extractor/CsvExtractor.php +++ /dev/null @@ -1,116 +0,0 @@ -delimiter = $options['delimiter'] ?? ','; - $this->enclosure = $options['enclosure'] ?? '"'; - $this->escapeString = $options['escape_string'] ?? '\\'; - $this->createKeys = $options['create_keys'] ?? false; - $this->type = $options['type'] ?? self::EXTRACT_AUTO; - } - - /** - * @inheritDoc - */ - public function extract($input, Etl $etl): iterable - { - switch ($this->type) { - case self::EXTRACT_FROM_STRING: - $iterator = $this->extractFromString($input); - break; - case self::EXTRACT_FROM_FILE: - $iterator = $this->extractFromFile($input); - break; - case self::EXTRACT_AUTO: - $iterator = $this->extractAuto($input); - break; - default: - throw new \InvalidArgumentException('Invalid input.'); - } - - return true === $this->createKeys ? new KeysAwareCsvIterator($iterator) : $iterator; - } - - /** - * @param $string - * @return CsvStringIterator - */ - private function extractFromString($string) - { - return CsvStringIterator::createFromText($string, $this->delimiter, $this->enclosure, $this->escapeString); - } - - /** - * @param $file - * @return CsvFileIterator - * @throws UnexpectedTypeException - */ - private function extractFromFile($file) - { - if ($file instanceof \SplFileInfo) { - return new CsvFileIterator($file, $this->delimiter, $this->enclosure, $this->escapeString); - }; - - UnexpectedTypeException::throwIfNot($file, 'string'); - - return CsvFileIterator::createFromFilename($file, $this->delimiter, $this->enclosure, $this->escapeString); - } - - /** - * @param $input - * @return CsvFileIterator|CsvStringIterator - * @throws UnexpectedTypeException - */ - private function extractAuto($input) - { - if (\strlen($input) < 3000 && \file_exists($input)) { - return $this->extractFromFile($input); - } - - return $this->extractFromString($input); - } -} diff --git a/src/Extractor/ExtractorInterface.php b/src/Extractor/ExtractorInterface.php index 758d789..b4542f0 100644 --- a/src/Extractor/ExtractorInterface.php +++ b/src/Extractor/ExtractorInterface.php @@ -1,20 +1,15 @@ */ - public function extract($input, Etl $etl): iterable; + public function extract(EtlState $state): iterable; } diff --git a/src/Extractor/FileExtractor.php b/src/Extractor/FileExtractor.php index 89e198f..53549d9 100644 --- a/src/Extractor/FileExtractor.php +++ b/src/Extractor/FileExtractor.php @@ -1,31 +1,40 @@ contentExtractor = $contentExtractor; + $file = $state->source ?? $this->file; + + return new FileIterator($this->resolveFile($file), $this->options); } - /** - * @inheritDoc - */ - public function extract(/*string */$filename, Etl $etl): iterable + private function resolveFile(mixed $file): SplFileObject { - return $this->contentExtractor->extract(file_get_contents($filename), $etl); + return match (true) { + $file instanceof SplFileObject => $file, + is_string($file) => new SplFileObject($file), + default => throw new ExtractException('Invalid file.'), + }; } } diff --git a/src/Extractor/IterableExtractor.php b/src/Extractor/IterableExtractor.php new file mode 100644 index 0000000..0ae89d6 --- /dev/null +++ b/src/Extractor/IterableExtractor.php @@ -0,0 +1,33 @@ + $source + */ + public function __construct( + public iterable $source = new EmptyIterator(), + ) { + } + + public function extract(EtlState $state): iterable + { + $source = $state->source ?? $this->source; + + if (!is_iterable($source)) { + throw new ExtractException('Provided source is not iterable.'); + } + + return $source; + } +} diff --git a/src/Extractor/JSONExtractor.php b/src/Extractor/JSONExtractor.php new file mode 100644 index 0000000..4e867e8 --- /dev/null +++ b/src/Extractor/JSONExtractor.php @@ -0,0 +1,55 @@ +source ?? $this->source; + + $source = $this->resolveFile($source); + if ($source instanceof SplFileObject) { + $content = $source->fread($source->getSize()); + } + + if (is_string($content)) { + $content = json_decode($content, true); + } + + if (null === $content) { + return new EmptyIterator(); + } + + if (!is_iterable($content)) { + throw new ExtractException('Provided JSON is not iterable.'); + } + + yield from $content; + } + + private function resolveFile(mixed $source): ?SplFileObject + { + return match (true) { + $source instanceof SplFileObject => $source, + is_string($source) && str_starts_with($source, 'file://') => new SplFileObject(substr($source, 7)), + default => null, + }; + } +} diff --git a/src/Extractor/JsonExtractor.php b/src/Extractor/JsonExtractor.php deleted file mode 100644 index 6546f0c..0000000 --- a/src/Extractor/JsonExtractor.php +++ /dev/null @@ -1,113 +0,0 @@ -type = $options['type'] ?? self::EXTRACT_AUTO; - } - - /** - * @param $json - * @return iterable - * @throws UnexpectedTypeException - */ - private function extractFromArray($json): iterable - { - UnexpectedTypeException::throwIfNot($json, 'array'); - - return $json; - } - - /** - * @param $json - * @return iterable - * @throws UnexpectedTypeException - */ - private function extractFromString($json): iterable - { - UnexpectedTypeException::throwIfNot($json, 'string'); - - return \Safe\json_decode($json, true); - } - - /** - * @param $json - * @return iterable - * @throws UnexpectedTypeException - */ - private function extractFromFile($file): iterable - { - if ($file instanceof \SplFileInfo) { - $file = $file->getPathname(); - } - - UnexpectedTypeException::throwIfNot($file, 'string'); - - if (!\is_readable($file)) { - throw new \InvalidArgumentException(sprintf('File %s is not readable.', $file)); - } - - return \Safe\json_decode( - \Safe\file_get_contents($file), - true - ); - } - - private function extractAuto($json) - { - if (\is_array($json)) { - return $this->extractFromArray($json); - } - - try { - $json = \Safe\json_decode($json, true); - - return $this->extractFromArray($json); - } catch (JsonException $e) { - // Is it a file? - if (\strlen($json) < 3000 && \file_exists($json)) { - return $this->extractFromFile($json); - } - - throw $e; - } - } - - /** - * @inheritDoc - */ - public function extract($json, Etl $etl): iterable - { - switch ($this->type) { - case self::EXTRACT_FROM_ARRAY: - return $this->extractFromArray($json); - case self::EXTRACT_FROM_FILE: - return $this->extractFromFile($json); - case self::EXTRACT_FROM_STRING: - return $this->extractFromString($json); - case self::EXTRACT_AUTO: - return $this->extractAuto($json); - } - - throw new \RuntimeException('Invalid type provided for '.self::class); - } -} diff --git a/src/Extractor/NullExtractor.php b/src/Extractor/NullExtractor.php new file mode 100644 index 0000000..b2e3f92 --- /dev/null +++ b/src/Extractor/NullExtractor.php @@ -0,0 +1,18 @@ +setIgnoreUndefined(); + $resolver->setDefaults(['skipEmptyLines' => true]); + $resolver->setAllowedTypes('skipEmptyLines', 'bool'); + $this->options = $resolver->resolve($options); + } + + public function extract(EtlState $state): StrTokIterator|PregSplitIterator|EmptyIterator + { + $content = $state->source ?? $this->content; + + if (null === $content) { + return new EmptyIterator(); + } + + if ($this->options['skipEmptyLines']) { + return new StrTokIterator($content); + } + + return new PregSplitIterator($content); + } +} diff --git a/src/Internal/ClonableTrait.php b/src/Internal/ClonableTrait.php new file mode 100644 index 0000000..9c0bdad --- /dev/null +++ b/src/Internal/ClonableTrait.php @@ -0,0 +1,43 @@ + $overridenProps + */ + private function clone(array $overridenProps = []): self + { + static $refl, $properties, $constructorParams; + $refl ??= new ReflectionClass($this); + $properties ??= array_combine(array_column($refl->getProperties(), 'name'), $refl->getProperties()); + $constructorParams ??= $refl->getConstructor()->getParameters(); + + $args = (function () use ($properties, $constructorParams) { + foreach ($constructorParams as $param) { + $key = $param->getName(); + yield $key => $properties[$key]->getValue($this); + } + })(); + + return new self(...[ + ...$args, + ...$overridenProps, + ]); + } +} diff --git a/src/Internal/EtlBuilderTrait.php b/src/Internal/EtlBuilderTrait.php new file mode 100644 index 0000000..33416dc --- /dev/null +++ b/src/Internal/EtlBuilderTrait.php @@ -0,0 +1,68 @@ + + */ + use EtlEventListenersTrait; + + public function extractFrom(ExtractorInterface|callable $extractor): self + { + if (!$extractor instanceof ExtractorInterface) { + $extractor = new CallableExtractor($extractor(...)); + } + + return $this->clone(['extractor' => $extractor]); + } + + public function transformWith(TransformerInterface|callable $transformer): self + { + if (!$transformer instanceof TransformerInterface) { + $transformer = new CallableTransformer($transformer(...)); + } + + return $this->clone(['transformer' => $transformer]); + } + + public function loadInto(LoaderInterface|callable $loader): self + { + if (!$loader instanceof LoaderInterface) { + $loader = new CallableLoader($loader(...)); + } + + return $this->clone(['loader' => $loader]); + } + + public function withOptions(EtlConfiguration $configuration): self + { + return $this->clone(['options' => $configuration]); + } + + public function withRecipe(Recipe|callable $recipe): self + { + if (!$recipe instanceof Recipe) { + $recipe = Recipe::fromCallable($recipe); + } + + return $recipe->fork($this); + } +} diff --git a/src/Internal/EtlEventListenersTrait.php b/src/Internal/EtlEventListenersTrait.php new file mode 100644 index 0000000..b0f07ca --- /dev/null +++ b/src/Internal/EtlEventListenersTrait.php @@ -0,0 +1,124 @@ +listenTo(InitEvent::class, $callback, $priority); + } + + /** + * @param callable(StartEvent): void $callback + */ + public function onStart(callable $callback, int $priority = 0): self + { + return $this->listenTo(StartEvent::class, $callback, $priority); + } + + /** + * @param callable(ExtractEvent): void $callback + */ + public function onExtract(callable $callback, int $priority = 0): self + { + return $this->listenTo(ExtractEvent::class, $callback, $priority); + } + + /** + * @param callable(ExtractExceptionEvent): void $callback + */ + public function onExtractException(callable $callback, int $priority = 0): self + { + return $this->listenTo(ExtractExceptionEvent::class, $callback, $priority); + } + + /** + * @param callable(TransformEvent): void $callback + */ + public function onTransform(callable $callback, int $priority = 0): self + { + return $this->listenTo(TransformEvent::class, $callback, $priority); + } + + /** + * @param callable(TransformExceptionEvent): void $callback + */ + public function onTransformException(callable $callback, int $priority = 0): self + { + return $this->listenTo(TransformExceptionEvent::class, $callback, $priority); + } + + /** + * @param callable(LoadEvent): void $callback + */ + public function onLoad(callable $callback, int $priority = 0): self + { + return $this->listenTo(LoadEvent::class, $callback, $priority); + } + + /** + * @param callable(LoadExceptionEvent): void $callback + */ + public function onLoadException(callable $callback, int $priority = 0): self + { + return $this->listenTo(LoadExceptionEvent::class, $callback, $priority); + } + + /** + * @param callable(FlushEvent): void $callback + */ + public function onFlush(callable $callback, int $priority = 0): self + { + return $this->listenTo(FlushEvent::class, $callback, $priority); + } + + /** + * @param callable(FlushExceptionEvent): void $callback + */ + public function onFlushException(callable $callback, int $priority = 0): self + { + return $this->listenTo(FlushExceptionEvent::class, $callback, $priority); + } + + /** + * @param callable(EndEvent): void $callback + */ + public function onEnd(callable $callback, int $priority = 0): self + { + return $this->listenTo(EndEvent::class, $callback, $priority); + } + + private function listenTo(string $eventClass, callable $callback, int $priority = 0): self + { + $clone = $this->clone(); + $clone->listenerProvider->listenTo($eventClass, $callback, $priority); + + return $clone; + } +} diff --git a/src/Internal/EtlExceptionsTrait.php b/src/Internal/EtlExceptionsTrait.php new file mode 100644 index 0000000..4eb87ef --- /dev/null +++ b/src/Internal/EtlExceptionsTrait.php @@ -0,0 +1,72 @@ +dispatch(new ExtractExceptionEvent($state, $exception))->exception; + } + + private function throwTransformException(Throwable $exception, EtlState $state): void + { + if (!$exception instanceof TransformException) { + $exception = new TransformException('Error during transformation.', previous: $exception); + } + + $exception = $this->dispatch(new TransformExceptionEvent($state, $exception))->exception; + + if ($exception) { + throw $exception; + } + } + + private function throwLoadException(Throwable $exception, EtlState $state): void + { + if (!$exception instanceof LoadException) { + $exception = new LoadException('Error during loading.', previous: $exception); + } + + $exception = $this->dispatch(new LoadExceptionEvent($state, $exception))->exception; + + if ($exception) { + throw $exception; + } + } + + private function throwFlushException(Throwable $exception, EtlState $state): void + { + if (!$exception instanceof FlushException) { + $exception = new FlushException('Error during flush.', previous: $exception); + } + + $exception = $this->dispatch(new FlushExceptionEvent($state, $exception))->exception; + + if ($exception) { + throw $exception; + } + } +} diff --git a/src/Iterator/CSVIterator.php b/src/Iterator/CSVIterator.php new file mode 100644 index 0000000..0b0358d --- /dev/null +++ b/src/Iterator/CSVIterator.php @@ -0,0 +1,120 @@ +> + */ +final readonly class CSVIterator implements IteratorAggregate +{ + /** + * @var array{delimiter: string, enclosure: string, escapeString: string, columns: 'auto'|string[]|null, normalizers: ValueNormalizerInterface[]} + */ + private array $options; + + /** + * @param array{delimiter?: string, enclosure?: string, escapeString?: string, columns?: 'auto'|string[]|null, normalizers?: ValueNormalizerInterface[]} $options + */ + public function __construct( + private PregSplitIterator|StrTokIterator|FileIterator $text, + array $options = [], + ) { + $resolver = (new OptionsResolver())->setIgnoreUndefined(); + $resolver->setDefaults([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escapeString' => '\\', + 'columns' => null, + 'normalizers' => [ + new NumericStringToNumberNormalizer(), + new EmptyStringToNullNormalizer(), + ], + ]); + $resolver->setAllowedTypes('delimiter', 'string'); + $resolver->setAllowedTypes('enclosure', 'string'); + $resolver->setAllowedTypes('escapeString', 'string'); + $resolver->setAllowedTypes('normalizers', ValueNormalizerInterface::class.'[]'); + $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']); + $resolver->setAllowedValues('columns', function (array|string|null $value) { + return 'auto' === $value || null === $value || is_array($value); + }); + $this->options = $resolver->resolve($options); + } + + public function getIterator(): Traversable + { + $columns = $this->options['columns']; + if ('auto' === $columns) { + $columns = null; + } + foreach ($this->text as $r => $row) { + $fields = str_getcsv( + $row, + $this->options['delimiter'], + $this->options['enclosure'], + $this->options['escapeString'], + ); + if (0 === $r && 'auto' === $this->options['columns']) { + $columns ??= $fields; + continue; + } + + if ($this->options['normalizers']) { + array_walk($fields, function (&$value) { + foreach ($this->options['normalizers'] as $normalizer) { + $value = $normalizer->normalize($value); + } + + return $value; + }); + } + + if (!empty($columns)) { + yield self::combine($columns, $fields); + continue; + } + yield $fields; + } + } + + /** + * @param string[] $keys + * @param string[] $values + * + * @return string[] + */ + private static function combine(array $keys, array $values): array + { + $nbKeys = count($keys); + $nbValues = count($values); + + if ($nbKeys < $nbValues) { + return array_combine($keys, array_slice(array_values($values), 0, $nbKeys)); + } + + if ($nbKeys > $nbValues) { + return array_combine($keys, array_merge($values, array_fill(0, $nbKeys - $nbValues, null))); + } + + return array_combine($keys, $values); + } +} diff --git a/src/Iterator/CsvFileIterator.php b/src/Iterator/CsvFileIterator.php deleted file mode 100644 index d980136..0000000 --- a/src/Iterator/CsvFileIterator.php +++ /dev/null @@ -1,69 +0,0 @@ -file = $file; - $this->file->setCsvControl($delimiter, $enclosure, $escapeString); - $this->file->setFlags(SplFileObject::READ_CSV); - parent::__construct($this->file); - } - - /** - * @param string $filename - * @param string $delimiter - * @param string $enclosure - * @param string $escape - * @return static - */ - public static function createFromFilename(string $filename, $delimiter = ',', $enclosure = '"', $escape = '\\') - { - return new static(new SplFileObject($filename, 'r'), $delimiter, $enclosure, $escape); - } - - /** - * @inheritDoc - */ - public function accept() - { - $current = $this->getInnerIterator()->current(); - return !empty( - \array_filter( - $current, - function ($cell) { - return null !== $cell; - } - ) - ); - } - - /** - * @inheritdoc - */ - public function count() - { - if (null === $this->nbLines) { - $this->rewind(); - $this->nbLines = \count(\iterator_to_array($this)); - } - - return $this->nbLines; - } -} diff --git a/src/Iterator/CsvIteratorInterface.php b/src/Iterator/CsvIteratorInterface.php deleted file mode 100644 index 1a5a641..0000000 --- a/src/Iterator/CsvIteratorInterface.php +++ /dev/null @@ -1,13 +0,0 @@ -stringIterator = $iterator; - $this->delimiter = $delimiter; - $this->enclosure = $enclosure; - $this->escapeString = $escapeString; - } - - /** - * @param string $text - * @param string $delimiter - * @param string $enclosure - * @param string $escapeString - * @return static - */ - public static function createFromText( - string $text, - $delimiter = ',', - $enclosure = '"', - $escapeString = '\\' - ) { - - return new static(new TextLinesIterator($text, true), $delimiter, $enclosure, $escapeString); - } - - /** - * @inheritDoc - */ - public function getIterator() - { - foreach ($this->stringIterator as $line) { - yield \str_getcsv($line, $this->delimiter, $this->enclosure, $this->escapeString); - } - } -} diff --git a/src/Iterator/FileIterator.php b/src/Iterator/FileIterator.php new file mode 100644 index 0000000..f067d4a --- /dev/null +++ b/src/Iterator/FileIterator.php @@ -0,0 +1,49 @@ + + */ +final readonly class FileIterator implements IteratorAggregate +{ + /** + * @var array{skipEmptyLines: bool} + */ + private array $options; + + /** + * @param array{skipEmptyLines?: bool} $options + */ + public function __construct( + private SplFileObject $file, + array $options = [], + ) { + $resolver = (new OptionsResolver())->setIgnoreUndefined(); + $resolver->setDefaults(['skipEmptyLines' => true]); + $resolver->setAllowedTypes('skipEmptyLines', 'bool'); + $this->options = $resolver->resolve($options); + } + + public function getIterator(): Traversable + { + foreach ($this->file as $row) { + $line = rtrim($row, PHP_EOL); + if ($this->options['skipEmptyLines'] && '' === $line) { + continue; + } + yield $line; + } + } +} diff --git a/src/Iterator/FileLinesIterator.php b/src/Iterator/FileLinesIterator.php deleted file mode 100644 index 4e9670b..0000000 --- a/src/Iterator/FileLinesIterator.php +++ /dev/null @@ -1,43 +0,0 @@ -file = $file; - } - - /** - * @inheritDoc - */ - public function getIterator() - { - foreach ($this->file as $row) { - yield rtrim($row, \PHP_EOL); - } - } - - /** - * @param string $fileName - * @return FileLinesIterator - * @throws \LogicException - * @throws \RuntimeException - */ - public static function createFromFilename(string $fileName): self - { - return new self(new \SplFileObject($fileName)); - } -} diff --git a/src/Iterator/KeysAwareCsvIterator.php b/src/Iterator/KeysAwareCsvIterator.php deleted file mode 100644 index 8dc6c9a..0000000 --- a/src/Iterator/KeysAwareCsvIterator.php +++ /dev/null @@ -1,83 +0,0 @@ -csvIterator = $csvIterator; - $this->keys = $keys; - $this->skipFirstRow = $skipFirstRow; - } - - /** - * @inheritDoc - */ - public function getIterator() - { - foreach ($this->csvIterator as $value) { - if (false === $this->started) { - $this->started = true; - if (empty($this->keys)) { - $this->keys = $value; - } - if (true === $this->skipFirstRow) { - continue; - } - } - yield self::combine($this->keys, $value); - } - } - - /** - * Combine keys & values - * - * @param array $keys - * @param array $values - * @return array - */ - private static function combine(array $keys, array $values): array - { - $nbKeys = \count($keys); - $nbValues = \count($values); - - if ($nbKeys < $nbValues) { - return \array_combine($keys, \array_slice(\array_values($values), 0, $nbKeys)); - } - - if ($nbKeys > $nbValues) { - return \array_combine($keys, \array_merge($values, \array_fill(0, $nbKeys - $nbValues, null))); - } - - return \array_combine($keys, $values); - } -} diff --git a/src/Iterator/PregSplitIterator.php b/src/Iterator/PregSplitIterator.php new file mode 100644 index 0000000..1fab004 --- /dev/null +++ b/src/Iterator/PregSplitIterator.php @@ -0,0 +1,32 @@ + + */ +final readonly class PregSplitIterator implements IteratorAggregate +{ + public function __construct( + public string $content, + ) { + } + + public function getIterator(): Traversable + { + $lines = preg_split("/((\r?\n)|(\r\n?))/", $this->content); + foreach ($lines as $line) { + yield rtrim($line, PHP_EOL); + } + } +} diff --git a/src/Iterator/StrTokIterator.php b/src/Iterator/StrTokIterator.php new file mode 100644 index 0000000..18c7dc8 --- /dev/null +++ b/src/Iterator/StrTokIterator.php @@ -0,0 +1,36 @@ + + */ +final readonly class StrTokIterator implements IteratorAggregate +{ + public function __construct( + public string $content, + ) { + } + + public function getIterator(): Traversable + { + $tok = strtok($this->content, "\r\n"); + while (false !== $tok) { + $line = $tok; + $tok = strtok("\n\r"); + yield rtrim($line, PHP_EOL); + } + } +} diff --git a/src/Iterator/StringIteratorInterface.php b/src/Iterator/StringIteratorInterface.php deleted file mode 100644 index 21a489f..0000000 --- a/src/Iterator/StringIteratorInterface.php +++ /dev/null @@ -1,13 +0,0 @@ -content = $content; - $this->skipEmptyLines = $skipEmptyLines; - } - - /** - * @return string[] - */ - public function getIterator() - { - if (true === $this->skipEmptyLines) { - return $this->traverseWithStrTok(); - } else { - return $this->traverseWithPregSplit(); - } - } - - /** - * Uses a regex to split lines. - * @return \Generator|string[] - */ - private function traverseWithPregSplit() - { - $lines = preg_split("/((\r?\n)|(\r\n?))/", $this->content); - foreach ($lines as $line) { - yield $line; - } - } - - /** - * Uses strtok to split lines. Provides better performance, but skips empty lines. - * @return \Generator|string[] - */ - private function traverseWithStrTok() - { - $tok = \strtok($this->content, "\r\n"); - while (false !== $tok) { - $line = $tok; - $tok = \strtok("\n\r"); - yield $line; - } - } -} diff --git a/src/Loader/ArrayLoader.php b/src/Loader/ArrayLoader.php deleted file mode 100644 index 5ae5f31..0000000 --- a/src/Loader/ArrayLoader.php +++ /dev/null @@ -1,73 +0,0 @@ -array = &$array; - $this->preserveKeys = $preserveKeys; - } - - /** - * @inheritDoc - */ - public function init(): void - { - } - - /** - * @inheritDoc - */ - public function load(\Generator $items, $key, Etl $etl): void - { - foreach ($items as $v) { - if ($this->preserveKeys) { - $this->array[$key] = $v; - } else { - $this->array[] = $v; - } - } - } - - /** - * @inheritDoc - */ - public function rollback(): void - { - } - - /** - * @inheritDoc - */ - public function commit(bool $partial): void - { - } - - /** - * @return array - */ - public function getArray(): array - { - return $this->array; - } -} diff --git a/src/Loader/CSVLoader.php b/src/Loader/CSVLoader.php new file mode 100644 index 0000000..3e545c3 --- /dev/null +++ b/src/Loader/CSVLoader.php @@ -0,0 +1,111 @@ +setIgnoreUndefined(); + $resolver->setDefaults([ + 'delimiter' => ',', + 'enclosure' => '"', + 'escapeString' => '\\', + 'columns' => null, + 'eol' => PHP_EOL, + ]); + $resolver->setAllowedTypes('delimiter', 'string'); + $resolver->setAllowedTypes('enclosure', 'string'); + $resolver->setAllowedTypes('escapeString', 'string'); + $resolver->setAllowedTypes('columns', ['string[]', 'null', 'string']); + $resolver->setAllowedValues('columns', function (array|string|null $value) { + return 'auto' === $value || null === $value || is_array($value); + }); + $resolver->setAllowedTypes('eol', 'string'); + $this->options = $resolver->resolve($options); + } + + public function load(mixed $item, EtlState $state): void + { + $context = &$state->context[__CLASS__]; + $context['columsWritten'] ??= false; + + if (!$context['columsWritten']) { + if (is_array($this->options['columns'])) { + $context['pending'][] = $this->options['columns']; + $context['columsWritten'] = true; + } elseif ('auto' === $this->options['columns']) { + $context['pending'][] = array_keys($item); + $context['columsWritten'] = true; + } + } + + $context['pending'][] = $item; + } + + public function flush(bool $isPartial, EtlState $state): string + { + $context = &$state->context[__CLASS__]; + $context['pending'] ??= []; + $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination); + foreach ($context['pending'] as $item) { + $this->write($file, $item); + } + + $context['pending'] = []; + + if (!$isPartial && $file instanceof SplTempFileObject) { + $file->rewind(); + + return implode('', [...$file]); // @phpstan-ignore-line + } + + return 'file://'.$file->getPathname(); + } + + /** + * @param array $item + */ + private function write(SplFileObject $file, array $item): void + { + $options = $this->options; + $file->fputcsv($item, $options['delimiter'], $options['enclosure'], $options['escapeString'], $options['eol']); + } + + private function resolveDestination(mixed $destination): SplFileObject + { + $isFileName = is_string($destination) && str_starts_with($destination, 'file://'); + + return match (true) { + $destination instanceof SplFileObject => $destination, + $isFileName => new SplFileObject(substr($destination, 7), 'w'), + null === $destination => new SplTempFileObject(), + default => throw new LoadException('Invalid destination.'), + }; + } +} diff --git a/src/Loader/CallableLoader.php b/src/Loader/CallableLoader.php new file mode 100644 index 0000000..cebf810 --- /dev/null +++ b/src/Loader/CallableLoader.php @@ -0,0 +1,37 @@ +destination ?? $this->closure; + if (!is_callable($callback)) { + throw new LoadException('Invalid destination.'); + } + $state->context['output'] = $callback($item, $state); + $state->flush(); + } + + /** + * @codeCoverageIgnore + */ + public function flush(bool $isPartial, EtlState $state): mixed + { + return $state->context['output']; + } +} diff --git a/src/Loader/CsvFileLoader.php b/src/Loader/CsvFileLoader.php deleted file mode 100644 index f36aacb..0000000 --- a/src/Loader/CsvFileLoader.php +++ /dev/null @@ -1,128 +0,0 @@ -file, SplFileObject::class); - foreach ($generator as $row) { - $this->file->fputcsv($row, $this->delimiter, $this->enclosure, $this->escape); - } - } - - /** - * @inheritDoc - */ - public function init($options = null): void - { - if (\func_num_args() > 0) { - if (!\is_array($file = \func_get_arg(0))) { - self::factory(['file' => $file], $this); - } else { - self::factory($options, $this); - } - } - - if (!empty($this->keys)) { - $this->file->fputcsv($this->keys, $this->delimiter, $this->enclosure, $this->escape); - } - } - - /** - * @inheritDoc - */ - public function commit(bool $partial): void - { - } - - /** - * @inheritDoc - */ - public function rollback(): void - { - } - - /** - * @param $file - * @param array $options - * @return FileLoader - * @throws \LogicException - * @throws \RuntimeException - */ - public static function toFile($file, array $options = []): self - { - return self::factory(\array_replace($options, ['file' => $file])); - } - - /** - * @param array $options - * @param self|null $that - * @return CsvFileLoader - * @throws UnexpectedTypeException - * @throws \LogicException - * @throws \RuntimeException - */ - public static function factory(array $options = [], self $that = null): self - { - $that = $that ?? new self; - - $file = $options['file'] ?? $that->file ?? null; - if ($file instanceof SplFileObject) { - $that->file = $file; - } elseif (is_string($file)) { - $that->file = new SplFileObject($file, 'w'); - } - UnexpectedTypeException::throwIfNot($that->file, SplFileObject::class, true); - - $that->delimiter = $options['delimiter'] ?? $that->delimiter ?? ','; - $that->enclosure = $options['enclosure'] ?? $that->enclosure ?? '"'; - $that->escape = $options['escape'] ?? $that->escape ?? '\\'; - $that->keys = $options['keys'] ?? $that->keys ?? []; - - return $that; - } -} diff --git a/src/Loader/DoctrineORMLoader.php b/src/Loader/DoctrineORMLoader.php index 7254ea4..49e7d30 100644 --- a/src/Loader/DoctrineORMLoader.php +++ b/src/Loader/DoctrineORMLoader.php @@ -1,92 +1,47 @@ managerRegistry = $managerRegistry; - } +use function gettype; +use function is_object; +use function sprintf; - /** - * @inheritDoc - */ - public function init(): void - { +final readonly class DoctrineORMLoader implements LoaderInterface +{ + public function __construct( + private ManagerRegistry $managerRegistry, + ) { } - /** - * @inheritDoc - */ - public function load(\Generator $entities, $key, Etl $etl): void + public function load(mixed $item, EtlState $state): void { - foreach ($entities as $entity) { - if (!is_object($entity)) { - throw new \InvalidArgumentException("The transformed data should return a generator of entities."); - } - - $className = ClassUtils::getClass($entity); - $objectManager = $this->managerRegistry->getManagerForClass($className); - if (null === $objectManager) { - throw new \RuntimeException(sprintf("Unable to locate Doctrine manager for class %s.", $className)); - } - - $objectManager->persist($entity); - - if (!in_array($objectManager, $this->objectManagers)) { - $this->objectManagers[] = $objectManager; - } + if (!is_object($item)) { + throw new LoadException(sprintf('Expecting object, got %s.', gettype($item))); } - } + $manager = $this->managerRegistry->getManagerForClass($item::class) + ?? throw new LoadException(sprintf('Could not find manager for class %s.', $item::class)); - /** - * @inheritDoc - */ - public function rollback(): void - { - foreach ($this->objectManagers as $objectManager) { - $objectManager->clear(); - } - $this->objectManagers = []; + $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage(); + $managers->attach($manager); + $manager->persist($item); } - /** - * @inheritDoc - */ - public function commit(bool $partial): void + public function flush(bool $isPartial, EtlState $state): null { - foreach ($this->objectManagers as $objectManager) { - $objectManager->flush(); + $managers = $state->context[__CLASS__]['managers'] ??= new SplObjectStorage(); + foreach ($managers as $manager) { + $manager->flush(); + $managers->detach($manager); } - $this->objectManagers = []; + + return null; } } diff --git a/src/Loader/FileLoader.php b/src/Loader/FileLoader.php deleted file mode 100644 index c0b6ca6..0000000 --- a/src/Loader/FileLoader.php +++ /dev/null @@ -1,109 +0,0 @@ - 0) { - if (!\is_array($file = \func_get_arg(0))) { - self::factory(['file' => $file], $this); - } else { - self::factory($options, $this); - } - } - } - - /** - * @inheritDoc - */ - public function load(\Generator $items, $key, Etl $etl): void - { - UnexpectedTypeException::throwIfNot($this->file, SplFileObject::class); - foreach ($items as $item) { - $this->file->fwrite($item.$this->eol); - } - } - - /** - * @inheritDoc - */ - public function rollback(): void - { - } - - /** - * @inheritDoc - */ - public function commit(bool $partial): void - { - } - - /** - * @param string|SplFileObject $file - * @param array $options - * @return FileLoader - * @throws \LogicException - * @throws \RuntimeException - */ - public static function toFile($file, array $options = []): self - { - return self::factory(\array_replace($options, ['file' => $file])); - } - - /** - * @param array $options - * @param self|null $that - * @return FileLoader - * @throws \LogicException - * @throws \RuntimeException - */ - public static function factory(array $options = [], self $that = null): self - { - $that = $that ?? new self; - - $file = $options['file'] ?? $that->file ?? null; - if ($file instanceof SplFileObject) { - $that->file = $file; - } elseif (is_string($file)) { - $that->file = new SplFileObject($file, 'w'); - } - UnexpectedTypeException::throwIfNot($that->file, SplFileObject::class, true); - - $that->eol = $options['eol'] ?? $that->eol ?? \PHP_EOL; - - return $that; - } -} diff --git a/src/Loader/InMemoryLoader.php b/src/Loader/InMemoryLoader.php new file mode 100644 index 0000000..20a8bad --- /dev/null +++ b/src/Loader/InMemoryLoader.php @@ -0,0 +1,32 @@ +context['pending'][] = $item; + } + + /** + * @return list> + */ + public function flush(bool $isPartial, EtlState $state): array + { + $state->context['batchNumber'] ??= 0; + foreach ($state->context['pending'] as $key => $value) { + $state->context['batches'][$state->context['batchNumber']][] = $value; + } + $state->context['pending'] = []; + ++$state->context['batchNumber']; + + return array_merge(...$state->context['batches'] ?? []); + } +} diff --git a/src/Loader/JSONLoader.php b/src/Loader/JSONLoader.php new file mode 100644 index 0000000..0d4683a --- /dev/null +++ b/src/Loader/JSONLoader.php @@ -0,0 +1,94 @@ +context[__CLASS__]['pending'][] = $item; + } + + public function flush(bool $isPartial, EtlState $state): string + { + $context = &$state->context[__CLASS__]; + $context['hasStarted'] ??= false; + $context['pending'] ??= []; + + $file = $context['file'] ??= $this->resolveDestination($state->destination ?? $this->destination); + // $this->writeOpeningBracketIfNotDoneYet($state, $file); + match ($isPartial) { + true => $this->partialFlush($state, $file), + false => $this->finalFlush($state, $file), + }; + $context['pending'] = []; + + if (!$isPartial && $file instanceof SplTempFileObject) { + $file->rewind(); + + return implode('', [...$file]); // @phpstan-ignore-line + } + + return 'file://'.$file->getPathname(); + } + + private function partialFlush(EtlState $state, SplFileObject $file): void + { + $context = &$state->context[__CLASS__]; + $serialized = json_encode($context['pending'], JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + $serialized = ltrim($serialized, '['); + $serialized = rtrim($serialized, ']'); + $serialized = trim($serialized); + + if (!($context['openingBracket'] ?? false)) { + $file->fwrite('['); + $context['openingBracket'] = true; + $file->fwrite(PHP_EOL.' '.$serialized); + } elseif ([] !== $context['pending']) { + $file->fwrite(','); + $file->fwrite(PHP_EOL.' '.$serialized); + } + } + + private function finalFlush(EtlState $state, SplFileObject $file): void + { + $this->partialFlush($state, $file); + if ($state->nbLoadedItems > 0) { + $file->fwrite(PHP_EOL); + } + $file->fwrite(']'.PHP_EOL); + } + + private function resolveDestination(mixed $destination): SplFileObject + { + $isFileName = is_string($destination) && str_starts_with($destination, 'file://'); + + return match (true) { + $destination instanceof SplFileObject => $destination, + $isFileName => new SplFileObject(substr($destination, 7), 'w'), + null === $destination => new SplTempFileObject(), + default => throw new LoadException('Invalid destination.'), + }; + } +} diff --git a/src/Loader/JsonFileLoader.php b/src/Loader/JsonFileLoader.php deleted file mode 100644 index 846465d..0000000 --- a/src/Loader/JsonFileLoader.php +++ /dev/null @@ -1,140 +0,0 @@ - 0) { - if (!\is_array($file = \func_get_arg(0))) { - self::factory(['file' => $file], $this); - } else { - self::factory($options, $this); - } - } - $this->data = []; - } - - - /** - * @inheritDoc - */ - public function load(\Generator $items, $identifier, Etl $etl): void - { - UnexpectedTypeException::throwIfNot($this->file, SplFileObject::class); - foreach ($items as $key => $value) { - $this->data[$key] = $value; - } - } - - /** - * @inheritDoc - */ - public function rollback(): void - { - } - - - /** - * @inheritDoc - */ - public function commit(bool $partial): void - { - if (true === $partial) { - return; - } - - if (0 === $this->file->fwrite(json_encode($this->data, $this->jsonOptions, $this->jsonDepth))) { - throw new \RuntimeException(sprintf('Unable to write to %s', $this->file->getPathname())); - } - } - - /** - * @param string|SplFileObject $file - * @param int $jsonOptions - * @param int $jsonDepth - * @return JsonFileLoader - * @throws \InvalidArgumentException - * @throws \LogicException - * @throws \RuntimeException - */ - public static function toFile($file, int $jsonOptions = 0, int $jsonDepth = 512): self - { - return self::factory( - [ - 'file' => $file, - 'json_options' => $jsonOptions, - 'json_depth' => $jsonDepth, - ] - ); - } - - /** - * @param array $options - * @param self|null $that - * @return JsonFileLoader - * @throws \InvalidArgumentException - * @throws \LogicException - * @throws \RuntimeException - */ - public static function factory(array $options = [], self $that = null): self - { - $that = $that ?? new self; - - $that->jsonOptions = $options['json_options'] ?? $that->jsonOptions ?? 0; - $that->jsonDepth = $options['json_depth'] ?? $that->jsonDepth ?? 512; - - $file = $options['file'] ?? $that->file ?? null; - if ($file instanceof SplFileObject) { - $that->file = $file; - } elseif (is_string($file)) { - $that->file = new SplFileObject($file, 'w'); - } - UnexpectedTypeException::throwIfNot($that->file, SplFileObject::class, true); - - return $that; - } -} diff --git a/src/Loader/LoaderInterface.php b/src/Loader/LoaderInterface.php index 0143581..ac04fb0 100644 --- a/src/Loader/LoaderInterface.php +++ b/src/Loader/LoaderInterface.php @@ -1,39 +1,22 @@ LogLevel::INFO, - EtlEvents::EXTRACT => LogLevel::INFO, - EtlEvents::TRANSFORM => LogLevel::INFO, - EtlEvents::LOADER_INIT => LogLevel::INFO, - EtlEvents::LOAD => LogLevel::INFO, - EtlEvents::FLUSH => LogLevel::INFO, - EtlEvents::SKIP => LogLevel::INFO, - EtlEvents::STOP => LogLevel::INFO, - EtlEvents::ROLLBACK => LogLevel::INFO, - EtlEvents::END => LogLevel::INFO, - ]; - - private const DEFAULT_EVENT_PRIORITIES = [ - EtlEvents::START => 128, - EtlEvents::EXTRACT => 128, - EtlEvents::TRANSFORM => 128, - EtlEvents::LOADER_INIT => 128, - EtlEvents::LOAD => 128, - EtlEvents::FLUSH => 128, - EtlEvents::SKIP => 128, - EtlEvents::STOP => 128, - EtlEvents::ROLLBACK => 128, - EtlEvents::END => 128, - ]; - - /** - * @var LoggerInterface - */ - private $logger; - - /** - * @var array - */ - private $logLevels; - - /** - * @var array - */ - private $eventPriorities; - /** - * LoggerRecipe constructor. + * @param array $logLevels + * @param array $priorities */ - public function __construct(LoggerInterface $logger, array $logLevels = [], array $eventPriorities = []) - { - $this->logger = $logger; - $this->logLevels = \array_replace(self::DEFAULT_LOG_LEVELS, $logLevels); - $this->eventPriorities = \array_replace(self::DEFAULT_EVENT_PRIORITIES, $eventPriorities); + public function __construct( + private readonly LoggerInterface $logger = new NullLogger(), + private readonly array $logLevels = [ + StartEvent::class => LogLevel::INFO, + FlushEvent::class => LogLevel::INFO, + EndEvent::class => LogLevel::INFO, + ExtractExceptionEvent::class => LogLevel::ERROR, + TransformExceptionEvent::class => LogLevel::ERROR, + LoadExceptionEvent::class => LogLevel::ERROR, + FlushExceptionEvent::class => LogLevel::ERROR, + ], + private readonly string $defaultLogLevel = LogLevel::DEBUG, + private readonly array $priorities = [], + private readonly int $defaultPriority = -1, + ) { } - /** - * @inheritDoc - */ - public function updateBuilder(EtlBuilder $builder): EtlBuilder + public function fork(EtlExecutor $executor): EtlExecutor { - return $builder - ->onStart( - function (EtlEvent $event) { - $this->logger->log($this->getLogLevel($event), 'Starting ETL...'); - }, - $this->getPriority(EtlEvents::START) - ) + return $executor + ->onInit(fn (InitEvent $event) => $this->log($event, 'Initializing ETL...', ['state' => $event->state]), + $this->priorities[InitEvent::class] ?? $this->defaultPriority) + ->onStart(fn (StartEvent $event) => $this->log($event, 'Starting ETL...', ['state' => $event->state]), + $this->priorities[StartEvent::class] ?? $this->defaultPriority) ->onExtract( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Extracted %s.', $event->getKey())); - }, - $this->getPriority(EtlEvents::EXTRACT) + fn (ExtractEvent $event) => $this->log( + $event, + 'Extracting item #{key}', + [ + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + 'item' => $event->item, + ], + ), + $this->priorities[ExtractEvent::class] ?? $this->defaultPriority, + ) + ->onExtractException( + fn (ExtractExceptionEvent $event) => $this->log( + $event, + 'Extract exception on key #{key}: {msg}', + [ + 'msg' => $event->exception->getMessage(), + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + ], + ), + $this->priorities[ExtractExceptionEvent::class] ?? $this->defaultPriority, ) ->onTransform( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Transformed %s.', $event->getKey())); - }, - $this->getPriority(EtlEvents::TRANSFORM) + fn (TransformEvent $event) => $this->log( + $event, + 'Transformed item #{key}', + [ + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + 'items' => $event->items, + ], + ), + $this->priorities[TransformEvent::class] ?? $this->defaultPriority, ) - ->onLoaderInit( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), 'Initializing loader...'); - }, - $this->getPriority(EtlEvents::LOAD) + ->onTransformException( + fn (TransformExceptionEvent $event) => $this->log( + $event, + 'Transform exception on key #{key}: {msg}', + [ + 'msg' => $event->exception->getMessage(), + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + ], + ), + $this->priorities[TransformExceptionEvent::class] ?? $this->defaultPriority, ) ->onLoad( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Loaded %s.', $event->getKey())); - }, - $this->getPriority(EtlEvents::LOAD) + fn (LoadEvent $event) => $this->log( + $event, + 'Loaded item #{key}', + [ + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + 'item' => $event->item, + ], + ), + $this->priorities[LoadEvent::class] ?? $this->defaultPriority, ) - ->onSkip( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Skipping item %s.', $event->getKey())); - }, - $this->getPriority(EtlEvents::SKIP) - ) - ->onStop( - function (ItemEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Stopping on item %s.', $event->getKey())); - }, - $this->getPriority(EtlEvents::STOP) + ->onLoadException( + fn (LoadExceptionEvent $event) => $this->log( + $event, + 'Load exception on key #{key}: {msg}', + [ + 'msg' => $event->exception->getMessage(), + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + ], + ), + $this->priorities[LoadExceptionEvent::class] ?? $this->defaultPriority, ) ->onFlush( - function (FlushEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Flushed %d items.', $event->getCounter())); - }, - $this->getPriority(EtlEvents::FLUSH) + fn (FlushEvent $event) => $this->log( + $event, + $event->partial ? 'Flushing items (partial)...' : 'Flushing items...', + [ + 'key' => $event->state->currentItemKey, + 'state' => $event->state, + ], + ), + $this->priorities[FlushEvent::class] ?? $this->defaultPriority, ) - ->onRollback( - function (RollbackEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('Rollback %d items.', $event->getCounter())); - }, - $this->getPriority(EtlEvents::ROLLBACK) + ->onFlushException( + fn (FlushExceptionEvent $event) => $this->log( + $event, + 'Flush exception: {msg}', + [ + 'msg' => $event->exception->getMessage(), + 'state' => $event->state, + ], + ), + $this->priorities[FlushExceptionEvent::class] ?? $this->defaultPriority, ) ->onEnd( - function (EndProcessEvent $event) { - $this->logger->log($this->getLogLevel($event), sprintf('ETL completed on %d items.', $event->getCounter())); - }, - $this->getPriority(EtlEvents::END) + fn (EndEvent $event) => $this->log( + $event, + 'ETL complete. {nb} items were loaded in {duration}s.', + [ + 'nb' => $event->state->nbLoadedItems, + 'duration' => $event->state->getDuration(), + 'state' => $event->state, + ], + ), + $this->priorities[EndEvent::class] ?? $this->defaultPriority, ); } /** - * @param EtlEvent $event - * @return string + * @param array $context */ - private function getLogLevel(EtlEvent $event): string + private function log(Event $event, string|Stringable $message, array $context = []): void { - return $this->logLevels[$event->getName()] ?? LogLevel::INFO; - } + $level = $this->logLevels[$event::class] ?? $this->defaultLogLevel; - /** - * @param EtlEvent $event - * @return int - */ - private function getPriority(string $eventName): int - { - return $this->eventPriorities[$eventName] ?? 128; + $this->logger->log($level, $message, $context); } } diff --git a/src/Recipe/Recipe.php b/src/Recipe/Recipe.php index 97094f0..7fb44e7 100644 --- a/src/Recipe/Recipe.php +++ b/src/Recipe/Recipe.php @@ -1,15 +1,28 @@ recipe)($executor); + } + }; + } } diff --git a/src/Transformer/CallableTransformer.php b/src/Transformer/CallableTransformer.php index aaa432e..cd494f2 100644 --- a/src/Transformer/CallableTransformer.php +++ b/src/Transformer/CallableTransformer.php @@ -1,29 +1,25 @@ callable = $callable; +final readonly class CallableTransformer implements TransformerInterface +{ + public function __construct( + public Closure $closure, + ) { } /** - * @inheritDoc + * @return Generator */ - public function transform($value, $key, Etl $etl): \Generator + public function transform(mixed $item, EtlState $state): Generator { - yield $key => ($this->callable)($value); + return ($this->closure)($item, $state); } } diff --git a/src/Transformer/NullTransformer.php b/src/Transformer/NullTransformer.php new file mode 100644 index 0000000..6b81f63 --- /dev/null +++ b/src/Transformer/NullTransformer.php @@ -0,0 +1,16 @@ + */ - public function transform($value, $key, Etl $etl): \Generator; + public function transform(mixed $item, EtlState $state): Generator; } diff --git a/tests/Behavior/Events/EndEventTest.php b/tests/Behavior/Events/EndEventTest.php new file mode 100644 index 0000000..79a2ef0 --- /dev/null +++ b/tests/Behavior/Events/EndEventTest.php @@ -0,0 +1,29 @@ +onEnd(function (EndEvent $e) use (&$event) { + $event = $e; + }); + + // When + $report = $executor->process(['foo', 'bar']); + + // Then + expect($event)->toBeInstanceOf(EndEvent::class) + ->and($report->nbTotalItems)->toBe(2) + ->and($report->nbLoadedItems)->toBe(2) + ; +}); diff --git a/tests/Behavior/Events/ExtractEventTest.php b/tests/Behavior/Events/ExtractEventTest.php new file mode 100644 index 0000000..d7f550a --- /dev/null +++ b/tests/Behavior/Events/ExtractEventTest.php @@ -0,0 +1,27 @@ + 'foo', 3 => 'bar']; + $extractedItems = []; + + // Given + $executor = (new EtlExecutor()) + ->onExtract(function (ExtractEvent $event) use (&$extractedItems) { + $extractedItems[$event->state->currentItemKey] = $event->item; + }); + + // When + $executor->process($items); + + // Then + expect($extractedItems)->toBe($items); +}); diff --git a/tests/Behavior/Events/ExtractExceptionEventTest.php b/tests/Behavior/Events/ExtractExceptionEventTest.php new file mode 100644 index 0000000..77a9461 --- /dev/null +++ b/tests/Behavior/Events/ExtractExceptionEventTest.php @@ -0,0 +1,22 @@ +onExtractException(function (ExtractExceptionEvent $event) { + $event->exception = new ExtractException('It miserably failed.'); + }); + $executor->process($items()); +})->throws(ExtractException::class, 'It miserably failed.'); diff --git a/tests/Behavior/Events/FlushEventTest.php b/tests/Behavior/Events/FlushEventTest.php new file mode 100644 index 0000000..36387b5 --- /dev/null +++ b/tests/Behavior/Events/FlushEventTest.php @@ -0,0 +1,28 @@ +withOptions(new EtlConfiguration(flushEvery: 2)) + ->onFlush(function (FlushEvent $e) use (&$flushEventsCounter) { + ++$flushEventsCounter; + }); + + // When + $executor->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); + + // Then + expect($flushEventsCounter)->toBe(3); +}); diff --git a/tests/Behavior/Events/FlushExceptionEventTest.php b/tests/Behavior/Events/FlushExceptionEventTest.php new file mode 100644 index 0000000..e212220 --- /dev/null +++ b/tests/Behavior/Events/FlushExceptionEventTest.php @@ -0,0 +1,61 @@ +loadInto(new FlushFailsLoader()) + ->onFlushException(function (FlushExceptionEvent $event) { + $event->removeException(); + }) + ; + $report = $executor->process($items); + expect($report->output)->toBe([ + ['strawberry', 'raspberry'], + ['peach'], + ]); +}); + +class FlushFailsLoader implements LoaderInterface +{ + public function load(mixed $item, EtlState $state): void + { + $state->context['pending'][] = $item; + } + + /** + * @return list> + */ + public function flush(bool $isPartial, EtlState $state): array + { + $state->context['batchNumber'] ??= 0; + $state->context['hasFailed'] ??= false; + + // Trigger failure on 1st flush + if (!$state->context['hasFailed']) { + $state->context['hasFailed'] = true; + $state->context['pending'] = []; + throw new RuntimeException('Flush failed.'); + } + foreach ($state->context['pending'] as $key => $value) { + $state->context['batches'][$state->context['batchNumber']][] = $value; + } + $state->context['pending'] = []; + ++$state->context['batchNumber']; + + return $state->context['batches']; + } +} diff --git a/tests/Behavior/Events/InitEventTest.php b/tests/Behavior/Events/InitEventTest.php new file mode 100644 index 0000000..5e7920c --- /dev/null +++ b/tests/Behavior/Events/InitEventTest.php @@ -0,0 +1,29 @@ +onInit(function (InitEvent $e) use (&$event) { + $event = $e; + $e->state->stop(); + }); + + // When + $executor->process('sourceArgs', 'destArgs'); + + // Then + expect($event)->toBeInstanceOf(InitEvent::class) + ->and($event->state->source)->toBe('sourceArgs') + ->and($event->state->destination)->toBe('destArgs'); +}); diff --git a/tests/Behavior/Events/LoadEventTest.php b/tests/Behavior/Events/LoadEventTest.php new file mode 100644 index 0000000..1569c86 --- /dev/null +++ b/tests/Behavior/Events/LoadEventTest.php @@ -0,0 +1,31 @@ +transformWith(function (mixed $value) { + yield $value; + yield strtoupper($value); + }) + ->onLoad(function (LoadEvent $e) use (&$loadedItems) { + $loadedItems[] = $e->item; + }); + + // When + $executor->process([2 => 'foo', 3 => 'bar']); + + // Then + expect($loadedItems)->toHaveCount(4) + ->and($loadedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']); +}); diff --git a/tests/Behavior/Events/LoadExceptionEventTest.php b/tests/Behavior/Events/LoadExceptionEventTest.php new file mode 100644 index 0000000..dfd9375 --- /dev/null +++ b/tests/Behavior/Events/LoadExceptionEventTest.php @@ -0,0 +1,31 @@ +loadInto(function (mixed $value) use (&$loadedItems) { + if ('bar' === $value) { + throw new LoadException('Cannot load `bar`.'); + } + $loadedItems[] = $value; + }) + ->onLoadException(function (LoadExceptionEvent $event) { + $event->removeException(); + }) + ; + $executor->process($items); + + expect($loadedItems)->toBe(['foo', 'baz']); +}); diff --git a/tests/Behavior/Events/StartEventTest.php b/tests/Behavior/Events/StartEventTest.php new file mode 100644 index 0000000..39680ad --- /dev/null +++ b/tests/Behavior/Events/StartEventTest.php @@ -0,0 +1,30 @@ +onStart(function (StartEvent $e) use (&$event) { + $event = $e; + $e->state->stop(); + }); + + // When + $executor->process(['foo', 'bar']); + + // Then + expect($event)->toBeInstanceOf(StartEvent::class) + ->and($event->state->nbTotalItems)->toBe(2) + ->and($event->state->nbLoadedItems)->toBe(0) + ; +}); diff --git a/tests/Behavior/Events/TransformEventTest.php b/tests/Behavior/Events/TransformEventTest.php new file mode 100644 index 0000000..16500a8 --- /dev/null +++ b/tests/Behavior/Events/TransformEventTest.php @@ -0,0 +1,31 @@ +transformWith(function (mixed $value) { + yield $value; + yield strtoupper($value); + }) + ->onTransform(function (TransformEvent $e) use (&$transformedItems) { + $transformedItems = [...$transformedItems, ...$e->items]; + }); + + // When + $executor->process([2 => 'foo', 3 => 'bar']); + + // Then + expect($transformedItems)->toHaveCount(4) + ->and($transformedItems)->toBe(['foo', 'FOO', 'bar', 'BAR']); +}); diff --git a/tests/Behavior/Events/TransformExceptionEventTest.php b/tests/Behavior/Events/TransformExceptionEventTest.php new file mode 100644 index 0000000..4188f58 --- /dev/null +++ b/tests/Behavior/Events/TransformExceptionEventTest.php @@ -0,0 +1,34 @@ +transformWith(function (mixed $value) { + if ('bar' === $value) { + throw new TransformException('Cannot transform `bar`.'); + } + yield $value; + }) + ->loadInto(function (mixed $value) use (&$loadedItems) { + $loadedItems[] = $value; + }) + ->onTransformException(function (TransformExceptionEvent $event) { + $event->removeException(); + }) + ; + $executor->process($items); + + expect($loadedItems)->toBe(['foo', 'baz']); +}); diff --git a/tests/Behavior/ExtractExceptionTest.php b/tests/Behavior/ExtractExceptionTest.php new file mode 100644 index 0000000..a900d3e --- /dev/null +++ b/tests/Behavior/ExtractExceptionTest.php @@ -0,0 +1,29 @@ +process($items()); +})->throws(ExtractException::class, 'Something bad happened.'); + +it('throws an extract exception when some other exception is thrown', function () { + $items = function () { + yield 'foo'; + throw new RuntimeException('Something bad happened.'); + }; + + $executor = new EtlExecutor(); + $executor->process($items()); +})->throws(ExtractException::class, 'Error during extraction.'); diff --git a/tests/Behavior/FlushExceptionTest.php b/tests/Behavior/FlushExceptionTest.php new file mode 100644 index 0000000..167f9d6 --- /dev/null +++ b/tests/Behavior/FlushExceptionTest.php @@ -0,0 +1,50 @@ +process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); +})->throws(FlushException::class, 'Flush failed.'); + +it('throws a load exception when some other exception is thrown', function () { + // Given + $loader = new FlushFailsLoader(new RuntimeException('Flush failed.')); + $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2))); + + // When + $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); +})->throws(FlushException::class, 'Error during flush.'); + +class FlushFailsLoader implements LoaderInterface +{ + public function __construct( + private Exception $failure, + ) { + } + + public function load(mixed $item, EtlState $state): void + { + } + + public function flush(bool $isPartial, EtlState $state): never + { + throw $this->failure; + } +} diff --git a/tests/Behavior/FlushTest.php b/tests/Behavior/FlushTest.php new file mode 100644 index 0000000..6d53f34 --- /dev/null +++ b/tests/Behavior/FlushTest.php @@ -0,0 +1,75 @@ +process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); + + // Then + expect($report->output)->toBeArray() + ->and($report->output)->toHaveCount(3) + ->and($report->output[0])->toBe(['banana', 'apple']) + ->and($report->output[1])->toBe(['strawberry', 'raspberry']) + ->and($report->output[2])->toBe(['peach']); +}); + +it('forces flushes', function () { + $loader = new BatchLoader(); + + // Given + $etl = (new EtlExecutor(loader: $loader, options: new EtlConfiguration(flushEvery: 2))) + ->onExtract(function (ExtractEvent $event) { + if (0 === $event->state->currentItemIndex) { + $event->state->flush(); + } + }); + + // When + $report = $etl->process(['banana', 'apple', 'strawberry', 'raspberry', 'peach']); + + // Then + expect($report->output)->toBeArray() + ->and($report->output)->toHaveCount(3) + ->and($report->output[0])->toBe(['banana']) + ->and($report->output[1])->toBe(['apple', 'strawberry']) + ->and($report->output[2])->toBe(['raspberry', 'peach']); +}); + +class BatchLoader implements LoaderInterface +{ + public function load(mixed $item, EtlState $state): void + { + $state->context['pending'][] = $item; + } + + /** + * @return list> + */ + public function flush(bool $isPartial, EtlState $state): array + { + $state->context['batchNumber'] ??= 0; + foreach ($state->context['pending'] as $key => $value) { + $state->context['batches'][$state->context['batchNumber']][] = $value; + } + $state->context['pending'] = []; + ++$state->context['batchNumber']; + + return $state->context['batches']; + } +} diff --git a/tests/Behavior/LoadExceptionTest.php b/tests/Behavior/LoadExceptionTest.php new file mode 100644 index 0000000..aa90181 --- /dev/null +++ b/tests/Behavior/LoadExceptionTest.php @@ -0,0 +1,51 @@ +loadInto(function (mixed $value) { + if ('bar' === $value) { + throw new LoadException('Cannot load `bar`.'); + } + }); + $executor->process($items); +})->throws(LoadException::class, 'Cannot load `bar`.'); + +it('throws a load exception when some other exception is thrown', function () { + $items = ['foo', 'bar', 'baz']; + $executor = (new EtlExecutor())->loadInto(function (mixed $value) { + if ('bar' === $value) { + throw new RuntimeException('Cannot load `bar`.'); + } + }); + $executor->process($items); +})->throws(LoadException::class, 'Error during loading.'); + +it('has stopped processing items, but has loaded the previous ones', function () { + $items = ['foo', 'bar', 'baz']; + $loadedItems = []; + $executor = (new EtlExecutor()) + ->loadInto(function (mixed $value) use (&$loadedItems) { + if ('bar' === $value) { + throw new LoadException('Cannot load `bar`.'); + } + $loadedItems[] = $value; + }) + ; + try { + $executor->process($items); + } catch (LoadException) { + } + + expect($loadedItems)->toBe(['foo']); +}); diff --git a/tests/Behavior/SkipTest.php b/tests/Behavior/SkipTest.php new file mode 100644 index 0000000..b698a81 --- /dev/null +++ b/tests/Behavior/SkipTest.php @@ -0,0 +1,86 @@ + 'auto', + ]); + $cities = []; + + // Given + $executor = (new EtlExecutor(extractor: $extractor)) + ->transformWith(function (mixed $value) { + yield $value['city_english_name']; + }) + ->loadInto(function (string $city) use (&$cities) { + $cities[] = $city; + }) + ->onExtract(function (ExtractEvent $event) { + if ('US' === $event->item['country_iso_code']) { + $event->state->skip(); + } + }); + + // When + $executor->process(); + + // Then + expect($cities)->toBe([ + 'Tokyo', + 'Shanghai', + 'Mumbai', + 'Istanbul', + 'Moscow', + 'Cairo', + 'Lima', + 'London', + ]); +}); + +it('skips items during transformation', function () { + $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/data/10-biggest-cities.csv', [ + 'columns' => 'auto', + ]); + $cities = []; + + // Given + $executor = (new EtlExecutor(extractor: $extractor)) + ->transformWith(function (mixed $value) { + yield $value['city_english_name']; + }) + ->loadInto(function (string $city) use (&$cities) { + $cities[] = $city; + }) + ->onTransform(function (TransformEvent $event) { + if ('Tokyo' === [...$event->items][0]) { + $event->state->skip(); + } + }); + + // When + $executor->process(); + + // Then + expect($cities)->toBe([ + 'New York', + 'Los Angeles', + 'Shanghai', + 'Mumbai', + 'Istanbul', + 'Moscow', + 'Cairo', + 'Lima', + 'London', + ]); +}); diff --git a/tests/Behavior/StopTest.php b/tests/Behavior/StopTest.php new file mode 100644 index 0000000..11fdaf3 --- /dev/null +++ b/tests/Behavior/StopTest.php @@ -0,0 +1,107 @@ + 'auto', + ]); + $cities = []; + + // Given + $executor = (new EtlExecutor(extractor: $extractor)) + ->transformWith(function (mixed $value) { + yield $value['city_english_name']; + }) + ->loadInto(function (string $city) use (&$cities) { + $cities[] = $city; + }) + ->onExtract(function (ExtractEvent $event) { + if ('JP' === $event->item['country_iso_code']) { + $event->state->stop(); + } + }); + + // When + $executor->process(); + + // Then + expect($cities)->toBe([ + 'New York', + 'Los Angeles', + ]); +}); + +it('stops the process during transformation', function () { + $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/data/10-biggest-cities.csv', [ + 'columns' => 'auto', + ]); + $cities = []; + + // Given + $executor = (new EtlExecutor(extractor: $extractor)) + ->transformWith(function (mixed $value) { + yield $value['city_english_name']; + }) + ->loadInto(function (string $city) use (&$cities) { + $cities[] = $city; + }) + ->onTransform(function (TransformEvent $event) { + if ('Shanghai' === [...$event->items][0]) { + $event->state->stop(); + } + }); + + // When + $executor->process(); + + // Then + expect($cities)->toBe([ + 'New York', + 'Los Angeles', + 'Tokyo', + ]); +}); + +it('stops the process during loading', function () { + $extractor = new CSVExtractor('file://'.dirname(__DIR__).'/data/10-biggest-cities.csv', [ + 'columns' => 'auto', + ]); + $cities = []; + + // Given + $executor = (new EtlExecutor(extractor: $extractor)) + ->transformWith(function (mixed $value) { + yield $value['city_english_name']; + }) + ->loadInto(function (string $city) use (&$cities) { + $cities[] = $city; + }) + ->onLoad(function (LoadEvent $event) { + if ('Shanghai' === $event->item) { + $event->state->stop(); + } + }); + + // When + $executor->process(); + + // Then + expect($cities)->toBe([ + 'New York', + 'Los Angeles', + 'Tokyo', + 'Shanghai', + ]); +}); diff --git a/tests/Behavior/TransformExceptionTest.php b/tests/Behavior/TransformExceptionTest.php new file mode 100644 index 0000000..9f02815 --- /dev/null +++ b/tests/Behavior/TransformExceptionTest.php @@ -0,0 +1,56 @@ +transformWith(function (mixed $value) { + if ('bar' === $value) { + throw new TransformException('Cannot transform `bar`.'); + } + yield $value; + }); + $executor->process($items); +})->throws(TransformException::class, 'Cannot transform `bar`.'); + +it('throws a transform exception when some other exception is thrown', function () { + $items = ['foo', 'bar', 'baz']; + $executor = (new EtlExecutor())->transformWith(function (mixed $value) { + if ('bar' === $value) { + throw new RuntimeException('Cannot transform `bar`.'); + } + yield $value; + }); + $executor->process($items); +})->throws(TransformException::class, 'Error during transformation.'); + +it('has stopped processing items, but has loaded the previous ones', function () { + $items = ['foo', 'bar', 'baz']; + $loadedItems = []; + $executor = (new EtlExecutor()) + ->transformWith(function (mixed $value) { + if ('bar' === $value) { + throw new TransformException('Cannot transform `bar`.'); + } + yield $value; + }) + ->loadInto(function (mixed $value) use (&$loadedItems) { + $loadedItems[] = $value; + }) + ; + try { + $executor->process($items); + } catch (TransformException) { + } + + expect($loadedItems)->toBe(['foo']); +}); diff --git a/tests/Unit/EtlExecutorTest.php b/tests/Unit/EtlExecutorTest.php new file mode 100644 index 0000000..1e7493f --- /dev/null +++ b/tests/Unit/EtlExecutorTest.php @@ -0,0 +1,34 @@ +extractFrom(fn () => yield from ['foo', 'bar']) + ->transformWith(fn (mixed $value) => yield strtoupper($value)) + ->loadInto(function (string $item) use (&$items) { + $items[] = $item; + }) + ->withOptions(new EtlConfiguration(flushEvery: 1)); + + // When + $report = $etl->process(); + + // Then + expect($items)->toBe(['FOO', 'BAR']) + ->and($report->nbTotalItems)->toBe(2) + ->and($report->nbLoadedItems)->toBe(2) + ->and($report->getDuration())->toBeBetween(0, 1) + ; +}); diff --git a/tests/Unit/EventDispatcher/EventDispatcherTest.php b/tests/Unit/EventDispatcher/EventDispatcherTest.php new file mode 100644 index 0000000..d44b1ac --- /dev/null +++ b/tests/Unit/EventDispatcher/EventDispatcherTest.php @@ -0,0 +1,75 @@ +visitors[] = $visitor; + if (2 === count($this->visitors)) { + $this->stopPropagation(); + } + } +} + +it('dispatches events, to the appropriate listeners, in the correct order', function () { + $listenerProvider = new PrioritizedListenerProvider(); + $bus = new EventDispatcher($listenerProvider); + $ignored = new class() { + use EventVisitor; + }; + $event = new class() { + use EventVisitor; + }; + + // Given + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A')); + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1); + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1); + + // When + $dispatched = $bus->dispatch($event); + + // Then + expect($dispatched) + ->toBe($event) + ->and($event->visitors)->toBe(['C', 'A', 'B']) + ->and($ignored->visitors)->toBe([]) + ; +}); + +it('stops propagation of events', function () { + $listenerProvider = new PrioritizedListenerProvider(); + $bus = new EventDispatcher($listenerProvider); + $event = new class() implements StoppableEventInterface { + use EventVisitor; + }; + + // Given + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('A')); + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('B'), -1); + $listenerProvider->listenTo($event::class, fn (object $event) => $event->visit('C'), 1); + + // When + $dispatched = $bus->dispatch($event); + + // Then + expect($dispatched) + ->toBe($event) + ->and($event->visitors)->toBe(['C', 'A']) + ; +}); diff --git a/tests/Unit/Extractor/CSVExtractorTest.php b/tests/Unit/Extractor/CSVExtractorTest.php new file mode 100644 index 0000000..7c283e1 --- /dev/null +++ b/tests/Unit/Extractor/CSVExtractorTest.php @@ -0,0 +1,46 @@ +extract($state); +})->throws(ExtractException::class); + +it('iterates over a string containing CSV data', function () { + $state = new EtlState(); + $content = file_get_contents(dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $expected = require dirname(__DIR__, 2).'/data/10-biggest-cities.php'; + $extractor = new CSVExtractor($content, ['columns' => 'auto']); + + // When + $extractedItems = [...$extractor->extract($state)]; + + // Then + expect($extractedItems)->toBe($expected); +}); + +it('iterates over a file containing CSV data', function () { + $extractor = new CSVExtractor(options: ['columns' => 'auto']); + + // When + $state = new EtlState(source: 'file://'.dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $extractedItems = [...$extractor->extract($state)]; + + // Then + expect($extractedItems)->toHaveCount(10) + ->and($extractedItems[0]['city_english_name'] ?? null)->toBe('New York') + ->and($extractedItems[9]['city_english_name'] ?? null)->toBe('London'); +}); diff --git a/tests/Unit/Extractor/CallableExtractorTest.php b/tests/Unit/Extractor/CallableExtractorTest.php new file mode 100644 index 0000000..2455d86 --- /dev/null +++ b/tests/Unit/Extractor/CallableExtractorTest.php @@ -0,0 +1,20 @@ + ['foo', 'bar']; + + // When + $value = (new CallableExtractor($callable))->extract($state); + + // Then + expect($value)->toBe(['foo', 'bar']); +}); diff --git a/tests/Unit/Extractor/IterableExtractorTest.php b/tests/Unit/Extractor/IterableExtractorTest.php new file mode 100644 index 0000000..a575d04 --- /dev/null +++ b/tests/Unit/Extractor/IterableExtractorTest.php @@ -0,0 +1,28 @@ +extract($state)) + ->toBe(['foo', 'bar']); + + $state = new EtlState(source: ['bar', 'baz']); + expect($extractor->extract($state)) + ->toBe(['bar', 'baz']); +}); + +it('yells whenever source is not iterable', function () { + (new IterableExtractor())->extract(new EtlState(source: 'foo')); +}) + ->throws(ExtractException::class); diff --git a/tests/Unit/Extractor/JSONExtractorTest.php b/tests/Unit/Extractor/JSONExtractorTest.php new file mode 100644 index 0000000..161236e --- /dev/null +++ b/tests/Unit/Extractor/JSONExtractorTest.php @@ -0,0 +1,39 @@ +extract($state); + + // Then + expect([...$items])->toBe(null === $source ? [] : $expected); +})->with(function () { + $source = dirname(__DIR__, 2).'/data/10-biggest-cities.json'; + $content = file_get_contents($source); + yield ['source' => 'file://'.$source]; + yield ['source' => $content]; + yield ['source' => null]; +})->with(function () { + yield ['useConstructor' => true]; + yield ['useConstructor' => false]; +}); + +it('complains if content cannot be extracted', function () { + [...(new JSONExtractor())->extract(new EtlState(source: new stdClass()))]; +})->throws(ExtractException::class); diff --git a/tests/Unit/Extractor/TextLinesExtractorTest.php b/tests/Unit/Extractor/TextLinesExtractorTest.php new file mode 100644 index 0000000..7d5a73b --- /dev/null +++ b/tests/Unit/Extractor/TextLinesExtractorTest.php @@ -0,0 +1,54 @@ +extract($state); + + // Then + expect([...$items])->toBe($expected); +})->with(function () { + yield [ + 'options' => ['skipEmptyLines' => true], + 'expected' => ['foo', 'bar'], + ]; + yield [ + 'options' => [], + 'expected' => ['foo', 'bar'], + ]; + yield [ + 'options' => ['skipEmptyLines' => false], + 'expected' => ['foo', '', '', 'bar'], + ]; +})->with(function () { + yield ['useConstructor' => true]; + yield ['useConstructor' => false]; +}); + +it('returns an empty iterator when the content is null', function () { + $state = new EtlState(); + $extractor = new TextLinesExtractor(); + + // When + $items = $extractor->extract($state); + + expect([...$items])->toBe([]); +}); diff --git a/tests/Unit/Iterator/CSVIteratorTest.php b/tests/Unit/Iterator/CSVIteratorTest.php new file mode 100644 index 0000000..59faa7e --- /dev/null +++ b/tests/Unit/Iterator/CSVIteratorTest.php @@ -0,0 +1,144 @@ +toHaveCount(11) + ->and($rows[0])->toBe([ + 0 => 'city_english_name', + 1 => 'city_local_name', + 2 => 'country_iso_code', + 3 => 'continent', + 4 => 'population', + ]) + ->and($rows[3])->toBe([ + 0 => 'Tokyo', + 1 => '東京', + 2 => 'JP', + 3 => 'Asia', + 4 => 13929286, + ]); +}); + +it('can make columns automatically', function () { + $content = file_get_contents(dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $rows = [...new CSVIterator(new StrTokIterator($content), ['columns' => 'auto'])]; + + expect($rows)->toHaveCount(10) + ->and($rows[0])->toBe([ + 'city_english_name' => 'New York', + 'city_local_name' => 'New York', + 'country_iso_code' => 'US', + 'continent' => 'North America', + 'population' => 8537673, + ]) + ->and($rows[2])->toBe([ + 'city_english_name' => 'Tokyo', + 'city_local_name' => '東京', + 'country_iso_code' => 'JP', + 'continent' => 'Asia', + 'population' => 13929286, + ]); +}); + +it('can map user-defined columns', function () { + $content = file_get_contents(dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $rows = [ + ...new CSVIterator(new StrTokIterator($content), [ + 'columns' => [ + 'cityEnglishName', + 'cityLocalName', + 'countryIsoCode', + 'continent', + 'population', + ], + ]), + ]; + + expect($rows[1])->toBe([ + 'cityEnglishName' => 'New York', + 'cityLocalName' => 'New York', + 'countryIsoCode' => 'US', + 'continent' => 'North America', + 'population' => 8537673, + ]) + ->and($rows[3])->toBe([ + 'cityEnglishName' => 'Tokyo', + 'cityLocalName' => '東京', + 'countryIsoCode' => 'JP', + 'continent' => 'Asia', + 'population' => 13929286, + ]); +}); + +it('adds fields when the row has not enough columns', function () { + $content = file_get_contents(dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $rows = [ + ...new CSVIterator(new StrTokIterator($content), [ + 'columns' => [ + 'cityEnglishName', + 'cityLocalName', + 'countryIsoCode', + 'continent', + 'population', + 'misc', + ], + ]), + ]; + + expect($rows[1])->toBe([ + 'cityEnglishName' => 'New York', + 'cityLocalName' => 'New York', + 'countryIsoCode' => 'US', + 'continent' => 'North America', + 'population' => 8537673, + 'misc' => null, + ]) + ->and($rows[3])->toBe([ + 'cityEnglishName' => 'Tokyo', + 'cityLocalName' => '東京', + 'countryIsoCode' => 'JP', + 'continent' => 'Asia', + 'population' => 13929286, + 'misc' => null, + ]); +}); + +it('removes extra data whenever there are more fields than columns', function () { + $content = file_get_contents(dirname(__DIR__, 2).'/data/10-biggest-cities.csv'); + $rows = [ + ...new CSVIterator(new StrTokIterator($content), [ + 'columns' => [ + 'cityEnglishName', + 'cityLocalName', + 'countryIsoCode', + 'continent', + ], + ]), + ]; + + expect($rows[1])->toBe([ + 'cityEnglishName' => 'New York', + 'cityLocalName' => 'New York', + 'countryIsoCode' => 'US', + 'continent' => 'North America', + ]) + ->and($rows[3])->toBe([ + 'cityEnglishName' => 'Tokyo', + 'cityLocalName' => '東京', + 'countryIsoCode' => 'JP', + 'continent' => 'Asia', + ]); +}); diff --git a/tests/Unit/Iterator/PregSplitIteratorTest.php b/tests/Unit/Iterator/PregSplitIteratorTest.php new file mode 100644 index 0000000..5c5b25b --- /dev/null +++ b/tests/Unit/Iterator/PregSplitIteratorTest.php @@ -0,0 +1,28 @@ +toBe([ + 'foo', + '', + '', + 'bar', + ]); +}); diff --git a/tests/Unit/Iterator/StrTokIteratorTest.php b/tests/Unit/Iterator/StrTokIteratorTest.php new file mode 100644 index 0000000..34c0318 --- /dev/null +++ b/tests/Unit/Iterator/StrTokIteratorTest.php @@ -0,0 +1,26 @@ +toBe([ + 'foo', + 'bar', + ]); +}); diff --git a/tests/Unit/Loader/CSVLoaderTest.php b/tests/Unit/Loader/CSVLoaderTest.php new file mode 100644 index 0000000..f2bd0ca --- /dev/null +++ b/tests/Unit/Loader/CSVLoaderTest.php @@ -0,0 +1,82 @@ + 'auto'])); + $output = $executor->process($cities)->output; + expect($output)->toBe($destination); + + // @phpstan-ignore-next-line + $writtenContent = implode('', [...new SplFileObject($output, 'r')]); + // @phpstan-ignore-next-line + $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.csv', 'r')]); + + expect($writtenContent)->toBe($expectedContent); +}); + +it('loads items to a CSV string', function () { + $cities = require dirname(__DIR__, 2).'/data/10-biggest-cities.php'; + $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => 'auto'])); + $output = $executor->process($cities)->output; + + // @phpstan-ignore-next-line + $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.csv', 'r')]); + + expect($output)->toBe($expectedContent); +}); + +it('can write specific columns', function () { + $cities = require dirname(__DIR__, 2).'/data/10-biggest-cities.php'; + $initialColumns = [ + 'city_english_name', + 'city_local_name', + 'country_iso_code', + 'continent', + 'population', + ]; + $prettyColumns = [ + 'CityEnglishName', + 'CityLocalName', + 'CountryIsoCode', + 'Continent', + 'Population', + ]; + $executor = new EtlExecutor(loader: new CSVLoader(options: ['columns' => $prettyColumns])); + $output = $executor->process($cities)->output; + + $expectedContent = strtr( + implode('', [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.csv', 'r')]), // @phpstan-ignore-line + array_combine($initialColumns, $prettyColumns), + ); + + expect($output)->toBe($expectedContent); +}); + +it('can ignore columns', function () { + $cities = require dirname(__DIR__, 2).'/data/10-biggest-cities.php'; + $executor = new EtlExecutor(loader: new CSVLoader()); + $output = $executor->process($cities)->output; + + $lines = [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.csv', 'r')]; + unset($lines[0]); + $expectedContent = implode('', $lines); // @phpstan-ignore-line + + expect($output)->toBe($expectedContent); +}); diff --git a/tests/Unit/Loader/CallableLoaderTest.php b/tests/Unit/Loader/CallableLoaderTest.php new file mode 100644 index 0000000..169b577 --- /dev/null +++ b/tests/Unit/Loader/CallableLoaderTest.php @@ -0,0 +1,35 @@ +load('foo', $state); + $output = $loader->flush(false, $state); + + // Then + expect($output)->toBe(['foo']); +}); + +it('complains if inner loader is not callable', function () { + // Given + $state = new EtlState(); + $loader = new CallableLoader(); + $loader->load('foo', $state); +})->throws(LoadException::class, 'Invalid destination.'); diff --git a/tests/Unit/Loader/Doctrine/Book.php b/tests/Unit/Loader/Doctrine/Book.php new file mode 100644 index 0000000..f7a9de8 --- /dev/null +++ b/tests/Unit/Loader/Doctrine/Book.php @@ -0,0 +1,22 @@ +shouldReceive('getManagerForClass')->andReturn($manager); + $manager->shouldReceive('persist')->twice(); + $manager->shouldReceive('flush')->once(); + + $executor = (new EtlExecutor(options: new EtlConfiguration(flushEvery: 10))) + ->transformWith(function (array $book) { + yield new Book($book['id'], $book['name']); + }) + ->loadInto(new DoctrineORMLoader($registry)); + $executor->process([ + ['id' => 1, 'name' => 'Holy Bible'], + ['id' => 2, 'name' => 'Fifty Shades of Grey'], + ]); +}); + +it('complains if loaded item is not an object', function () { + $loader = new DoctrineORMLoader(Mockery::mock(ManagerRegistry::class)); + $loader->load([], new EtlState()); +})->throws(LoadException::class, 'Expecting object, got array.'); + +it('complains if loaded item is not a mapped Doctrine class', function () { + $registry = Mockery::mock(ManagerRegistry::class); + $registry->shouldReceive('getManagerForClass')->andReturn(null); + $loader = new DoctrineORMLoader($registry); + $loader->load(new stdClass(), new EtlState()); +})->throws(LoadException::class, 'Could not find manager for class stdClass.'); diff --git a/tests/Unit/Loader/JSONLoaderTest.php b/tests/Unit/Loader/JSONLoaderTest.php new file mode 100644 index 0000000..4996588 --- /dev/null +++ b/tests/Unit/Loader/JSONLoaderTest.php @@ -0,0 +1,41 @@ +process($cities)->output; + expect($output)->toBe($destination); + + // @phpstan-ignore-next-line + $writtenContent = implode('', [...new SplFileObject($output, 'r')]); + // @phpstan-ignore-next-line + $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.json', 'r')]); + + expect($writtenContent)->toBe($expectedContent); +}); + +it('loads items to a JSON string', function () { + $cities = require dirname(__DIR__, 2).'/data/10-biggest-cities.php'; + $executor = new EtlExecutor(loader: new JSONLoader()); + $output = $executor->process($cities)->output; + + // @phpstan-ignore-next-line + $expectedContent = implode('', [...new SplFileObject(dirname(__DIR__, 2).'/data/10-biggest-cities.json', 'r')]); + + expect($output)->toBe($expectedContent); +}); diff --git a/tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php b/tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php new file mode 100644 index 0000000..3a7d305 --- /dev/null +++ b/tests/Unit/Normalizer/EmptyStringToNullNormalizerTest.php @@ -0,0 +1,26 @@ + $value = $normalizer->normalize($value)); + + // Then + expect($strings)->toBe(['foo', null]); +}); diff --git a/tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php b/tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php new file mode 100644 index 0000000..337de6f --- /dev/null +++ b/tests/Unit/Normalizer/NumericStringToNumberNormalizerTest.php @@ -0,0 +1,28 @@ + $value = $normalizer->normalize($value)); + + // Then + expect($strings)->toBe(['foo', 12345, 12345.67, '']); +}); diff --git a/tests/Unit/Recipe/RecipeTest.php b/tests/Unit/Recipe/RecipeTest.php new file mode 100644 index 0000000..ed158e2 --- /dev/null +++ b/tests/Unit/Recipe/RecipeTest.php @@ -0,0 +1,27 @@ +withRecipe( + function (EtlExecutor $executor) use (&$toggle) { + return $executor->onInit(function () use (&$toggle) { + $toggle = 'on'; + }); + }, + ); + + // When + $executor->process([]); + + // Then + expect($toggle)->toBe('on'); +}); diff --git a/tests/Unit/Transformer/CallableTransformerTest.php b/tests/Unit/Transformer/CallableTransformerTest.php new file mode 100644 index 0000000..eb36845 --- /dev/null +++ b/tests/Unit/Transformer/CallableTransformerTest.php @@ -0,0 +1,22 @@ + yield strtoupper($value)); + + // When + $transformed = $transformer->transform('foo', $state); + + // Then + expect([...$transformed])->toBe(['FOO']); +}); diff --git a/tests/Unit/Transformer/NullTransformerTest.php b/tests/Unit/Transformer/NullTransformerTest.php new file mode 100644 index 0000000..fbecd0c --- /dev/null +++ b/tests/Unit/Transformer/NullTransformerTest.php @@ -0,0 +1,23 @@ +transform('foo', $state); + + // Then + expect(iterator_to_array($transformedItems))->toBe(['foo']); +}); diff --git a/tests/data/10-biggest-cities.csv b/tests/data/10-biggest-cities.csv new file mode 100644 index 0000000..45ff34d --- /dev/null +++ b/tests/data/10-biggest-cities.csv @@ -0,0 +1,11 @@ +city_english_name,city_local_name,country_iso_code,continent,population +"New York","New York",US,"North America",8537673 +"Los Angeles","Los Angeles",US,"North America",39776830 +Tokyo,東京,JP,Asia,13929286 +Shanghai,上海,CN,Asia,26317104 +Mumbai,मुंबई,IN,Asia,12442373 +Istanbul,İstanbul,TR,Europe,15469524 +Moscow,Москва,RU,Europe,12615279 +Cairo,القاهرة,EG,Africa,9121514 +Lima,Lima,PE,"South America",10141329 +London,London,GB,Europe,8908081 diff --git a/tests/data/10-biggest-cities.json b/tests/data/10-biggest-cities.json new file mode 100644 index 0000000..5b41fbe --- /dev/null +++ b/tests/data/10-biggest-cities.json @@ -0,0 +1,72 @@ +[ + { + "city_english_name": "New York", + "city_local_name": "New York", + "country_iso_code": "US", + "continent": "North America", + "population": 8537673 + }, + { + "city_english_name": "Los Angeles", + "city_local_name": "Los Angeles", + "country_iso_code": "US", + "continent": "North America", + "population": 39776830 + }, + { + "city_english_name": "Tokyo", + "city_local_name": "東京", + "country_iso_code": "JP", + "continent": "Asia", + "population": 13929286 + }, + { + "city_english_name": "Shanghai", + "city_local_name": "上海", + "country_iso_code": "CN", + "continent": "Asia", + "population": 26317104 + }, + { + "city_english_name": "Mumbai", + "city_local_name": "मुंबई", + "country_iso_code": "IN", + "continent": "Asia", + "population": 12442373 + }, + { + "city_english_name": "Istanbul", + "city_local_name": "İstanbul", + "country_iso_code": "TR", + "continent": "Europe", + "population": 15469524 + }, + { + "city_english_name": "Moscow", + "city_local_name": "Москва", + "country_iso_code": "RU", + "continent": "Europe", + "population": 12615279 + }, + { + "city_english_name": "Cairo", + "city_local_name": "القاهرة", + "country_iso_code": "EG", + "continent": "Africa", + "population": 9121514 + }, + { + "city_english_name": "Lima", + "city_local_name": "Lima", + "country_iso_code": "PE", + "continent": "South America", + "population": 10141329 + }, + { + "city_english_name": "London", + "city_local_name": "London", + "country_iso_code": "GB", + "continent": "Europe", + "population": 8908081 + } +] diff --git a/tests/data/10-biggest-cities.php b/tests/data/10-biggest-cities.php new file mode 100644 index 0000000..ee3c667 --- /dev/null +++ b/tests/data/10-biggest-cities.php @@ -0,0 +1,74 @@ + 'New York', + 'city_local_name' => 'New York', + 'country_iso_code' => 'US', + 'continent' => 'North America', + 'population' => 8537673, + ], + [ + 'city_english_name' => 'Los Angeles', + 'city_local_name' => 'Los Angeles', + 'country_iso_code' => 'US', + 'continent' => 'North America', + 'population' => 39776830, + ], + [ + 'city_english_name' => 'Tokyo', + 'city_local_name' => '東京', + 'country_iso_code' => 'JP', + 'continent' => 'Asia', + 'population' => 13929286, + ], + [ + 'city_english_name' => 'Shanghai', + 'city_local_name' => '上海', + 'country_iso_code' => 'CN', + 'continent' => 'Asia', + 'population' => 26317104, + ], + [ + 'city_english_name' => 'Mumbai', + 'city_local_name' => 'मुंबई', + 'country_iso_code' => 'IN', + 'continent' => 'Asia', + 'population' => 12442373, + ], + [ + 'city_english_name' => 'Istanbul', + 'city_local_name' => 'İstanbul', + 'country_iso_code' => 'TR', + 'continent' => 'Europe', + 'population' => 15469524, + ], + [ + 'city_english_name' => 'Moscow', + 'city_local_name' => 'Москва', + 'country_iso_code' => 'RU', + 'continent' => 'Europe', + 'population' => 12615279, + ], + [ + 'city_english_name' => 'Cairo', + 'city_local_name' => 'القاهرة', + 'country_iso_code' => 'EG', + 'continent' => 'Africa', + 'population' => 9121514, + ], + [ + 'city_english_name' => 'Lima', + 'city_local_name' => 'Lima', + 'country_iso_code' => 'PE', + 'continent' => 'South America', + 'population' => 10141329, + ], + [ + 'city_english_name' => 'London', + 'city_local_name' => 'London', + 'country_iso_code' => 'GB', + 'continent' => 'Europe', + 'population' => 8908081, + ], +]; diff --git a/tests/data/dictators.csv b/tests/data/dictators.csv deleted file mode 100644 index 97708cf..0000000 --- a/tests/data/dictators.csv +++ /dev/null @@ -1,3 +0,0 @@ -country,name -USA,"Donald Trump" -Russia,"Vladimir Poutine" diff --git a/tests/data/dictators.json b/tests/data/dictators.json deleted file mode 100644 index 705377e..0000000 --- a/tests/data/dictators.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "usa": { - "country": "USA", - "name": "Donald Trump" - }, - "russia": { - "country": "Russia", - "name": "Vladimir Poutine" - } -} \ No newline at end of file diff --git a/tests/data/vat.json b/tests/data/vat.json deleted file mode 100644 index 0e1387e..0000000 --- a/tests/data/vat.json +++ /dev/null @@ -1,489 +0,0 @@ -{ - "details": "http://github.com/adamcooke/vat-rates", - "version": "bd48c1", - "rates": [ - { - "name": "Germany", - "code": "DE", - "country_code": "DE", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 7.0, - "standard": 19.0 - } - } - ] - }, - { - "name": "Poland", - "code": "PL", - "country_code": "PL", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 8.0, - "standard": 23.0 - } - } - ] - }, - { - "name": "Hungary", - "code": "HU", - "country_code": "HU", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 18.0, - "standard": 27.0 - } - } - ] - }, - { - "name": "Slovenia", - "code": "SI", - "country_code": "SI", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 9.5, - "standard": 22.0 - } - } - ] - }, - { - "name": "Slovakia", - "code": "SK", - "country_code": "SK", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 10.0, - "standard": 20.0 - } - } - ] - }, - { - "name": "Portugal", - "code": "PT", - "country_code": "PT", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 6.0, - "reduced2": 13.0, - "standard": 23.0, - "parking": 13.0 - } - } - ] - }, - { - "name": "France", - "code": "FR", - "country_code": "FR", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "super_reduced": 2.1, - "reduced1": 5.5, - "reduced2": 10.0, - "standard": 20.0 - } - } - ] - }, - { - "name": "Denmark", - "code": "DK", - "country_code": "DK", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "standard": 25.0 - } - } - ] - }, - { - "name": "Romania", - "code": "RO", - "country_code": "RO", - "periods": [ - { - "effective_from": "2017-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 9.0, - "standard": 19.0 - } - }, - { - "effective_from": "2016-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 9.0, - "standard": 20.0 - } - }, - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 9.0, - "standard": 24.0 - } - } - ] - }, - { - "name": "United Kingdom", - "code": "UK", - "country_code": "GB", - "periods": [ - { - "effective_from": "2011-01-04", - "rates": { - "standard": 20.0, - "reduced": 5.0 - } - } - ] - }, - { - "name": "Sweden", - "code": "SE", - "country_code": "SE", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 6.0, - "reduced2": 12.0, - "standard": 25.0 - } - } - ] - }, - { - "name": "Croatia", - "code": "HR", - "country_code": "HR", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 13.0, - "standard": 25.0 - } - } - ] - }, - { - "name": "Finland", - "code": "FI", - "country_code": "FI", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 10.0, - "reduced2": 14.0, - "standard": 24.0 - } - } - ] - }, - { - "name": "Netherlands", - "code": "NL", - "country_code": "NL", - "periods": [ - { - "effective_from": "2012-10-01", - "rates": { - "reduced": 6.0, - "standard": 21.0 - } - }, - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 6.0, - "standard": 19.0 - } - } - ] - }, - { - "name": "Luxembourg", - "code": "LU", - "country_code": "LU", - "periods": [ - { - "effective_from": "2016-01-01", - "rates": { - "super_reduced": 3.0, - "reduced1": 8.0, - "standard": 17.0, - "parking": 13.0 - } - }, - { - "effective_from": "2015-01-01", - "rates": { - "super_reduced": 3.0, - "reduced1": 8.0, - "reduced2": 14.0, - "standard": 17.0, - "parking": 12.0 - } - }, - { - "effective_from": "0000-01-01", - "rates": { - "super_reduced": 3.0, - "reduced1": 6.0, - "reduced2": 12.0, - "standard": 15.0, - "parking": 12.0 - } - } - ] - }, - { - "name": "Belgium", - "code": "BE", - "country_code": "BE", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 6.0, - "reduced2": 12.0, - "standard": 21.0, - "parking": 12.0 - } - } - ] - }, - { - "name": "Spain", - "code": "ES", - "country_code": "ES", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "super_reduced": 4.0, - "reduced": 10.0, - "standard": 21.0 - } - } - ] - }, - { - "name": "Lithuania", - "code": "LT", - "country_code": "LT", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 9.0, - "standard": 21.0 - } - } - ] - }, - { - "name": "Greece", - "code": "EL", - "country_code": "GR", - "periods": [ - { - "effective_from": "2016-06-01", - "rates": { - "reduced1": 6.0, - "reduced2": 13.5, - "standard": 24.0 - } - }, - { - "effective_from": "2016-01-01", - "rates": { - "reduced1": 6.0, - "reduced2": 13.5, - "standard": 23.0 - } - }, - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 6.5, - "reduced2": 13.0, - "standard": 23.0 - } - } - ] - }, - { - "name": "Latvia", - "code": "LV", - "country_code": "LV", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 12.0, - "standard": 21.0 - } - } - ] - }, - { - "name": "Czech Republic", - "code": "CZ", - "country_code": "CZ", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 15.0, - "standard": 21.0 - } - } - ] - }, - { - "name": "Malta", - "code": "MT", - "country_code": "MT", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 7.0, - "standard": 18.0 - } - } - ] - }, - { - "name": "Italy", - "code": "IT", - "country_code": "IT", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "super_reduced": 4.0, - "reduced": 10.0, - "standard": 22.0 - } - } - ] - }, - { - "name": "Austria", - "code": "AT", - "country_code": "AT", - "periods": [ - { - "effective_from": "2016-01-01", - "rates": { - "reduced1": 10.0, - "reduced2": 13.0, - "standard": 20.0, - "parking": 13.0 - } - }, - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 10.0, - "standard": 20.0, - "parking": 12.0 - } - } - ] - }, - { - "name": "Estonia", - "code": "EE", - "country_code": "EE", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 9.0, - "standard": 20.0 - } - } - ] - }, - { - "name": "Bulgaria", - "code": "BG", - "country_code": "BG", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced": 9.0, - "standard": 20.0 - } - } - ] - }, - { - "name": "Cyprus", - "code": "CY", - "country_code": "CY", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "reduced1": 5.0, - "reduced2": 9.0, - "standard": 19.0 - } - } - ] - }, - { - "name": "Ireland", - "code": "IE", - "country_code": "IE", - "periods": [ - { - "effective_from": "0000-01-01", - "rates": { - "super_reduced": 4.8, - "reduced1": 9.0, - "reduced2": 13.5, - "standard": 23.0, - "parking": 13.5 - } - } - ] - } - ] -} \ No newline at end of file diff --git a/tests/functions.php b/tests/functions.php deleted file mode 100644 index b70b241..0000000 --- a/tests/functions.php +++ /dev/null @@ -1,26 +0,0 @@ -createEtl(); - } - - /** - * @test - * @expectedException \RuntimeException - * @expectedExceptionMessage The "flushEvery" option must be null or an integer > 0. - */ - public function it_yells_on_invalid_flush_setting() - { - $builder = EtlBuilder::init()->loadInto( - function () { - } - )->flushEvery(0); - $builder->createEtl(); - } - - /** - * @test - */ - public function it_builds_an_etl_object() - { - $builder = EtlBuilder::init( - null, - null, - function () { - } - ); - $etl = $builder->createEtl(); - $this->assertInstanceOf(Etl::class, $etl); - } - - /** - * @test - */ - public function it_correctly_builds_an_etl_object() - { - $extractor = new class implements ExtractorInterface - { - public function extract($input, Etl $etl): iterable - { - return $input['foos']; - } - }; - - $transformer = new CallableTransformer('strtoupper'); - - $loader = new class implements LoaderInterface - { - public $initiated; - public $storage; - public $committed; - public $rollback; - - public function reset() - { - $this->initiated = false; - $this->storage = []; - $this->committed = false; - $this->rollback = false; - return $this; - } - - /** - * @inheritDoc - */ - public function init(): void - { - $this->initiated = true; - } - - /** - * @inheritDoc - */ - public function load(\Generator $items, $key, Etl $etl): void - { - foreach ($items as $item) { - $this->storage[] = $item; - } - } - - /** - * @inheritDoc - */ - public function commit(bool $partial): void - { - $this->committed = true; - } - - /** - * @inheritDoc - */ - public function rollback(): void - { - $this->rollback = true; - } - }; - - $etl = EtlBuilder::init($extractor, $transformer, $loader)->createEtl(); - - $data = [ - 'foos' => [ - 'foo', - 'bar', - ], - ]; - - $etl->process($data); - - $this->assertTrue($loader->initiated); - $this->assertTrue($loader->committed); - $this->assertEquals(['FOO', 'BAR'], $loader->storage); - - $loader = $loader->reset(); - $etl = EtlBuilder::init($extractor, function ($item, $key, Etl $etl) { - $etl->stopProcessing(true); - yield; - }, - $loader)->createEtl(); - $etl->process($data); - - $this->assertTrue($loader->rollback); - } - - /** - * @test - */ - public function it_correctly_maps_events() - { - $data = ['foo']; - $calledEvents = []; - $logEvent = function (EtlEvent $event) use (&$calledEvents) { - $calledEvents[] = $event->getName(); - }; - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onStart($logEvent) - ->onExtract($logEvent) - ->onTransform($logEvent) - ->onLoad($logEvent) - ->onFlush($logEvent) - ->onSkip($logEvent) - ->onStop($logEvent) - ->onEnd($logEvent) - ->onRollback($logEvent) - ; - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals([ - EtlEvents::START, - EtlEvents::EXTRACT, - EtlEvents::TRANSFORM, - EtlEvents::LOAD, - EtlEvents::FLUSH, - EtlEvents::END, - ], $calledEvents); - } - - /** - * @test - */ - public function it_correctly_maps_skipping_events() - { - $data = ['foo', 'bar']; - $calledEvents = []; - $logEvent = function (EtlEvent $event) use (&$calledEvents) { - $calledEvents[] = $event->getName(); - }; - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onStart($logEvent) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getItem()) { - $event->getEtl()->skipCurrentItem(); - } - } - ) - ->onExtract($logEvent) - ->onTransform($logEvent) - ->onLoad($logEvent) - ->onFlush($logEvent) - ->onSkip($logEvent) - ->onStop($logEvent) - ->onEnd($logEvent) - ->onRollback($logEvent) - ; - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals([ - EtlEvents::START, - EtlEvents::EXTRACT, - EtlEvents::SKIP, - EtlEvents::EXTRACT, - EtlEvents::TRANSFORM, - EtlEvents::LOAD, - EtlEvents::FLUSH, - EtlEvents::END, - ], $calledEvents); - } - - /** - * @test - */ - public function it_correctly_maps_stop_events() - { - $data = ['foo', 'bar']; - $calledEvents = []; - $logEvent = function (EtlEvent $event) use (&$calledEvents) { - $calledEvents[] = $event->getName(); - }; - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onStart($logEvent) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getItem()) { - $event->getEtl()->stopProcessing(); - } - } - ) - ->onExtract($logEvent) - ->onTransform($logEvent) - ->onLoad($logEvent) - ->onFlush($logEvent) - ->onSkip($logEvent) - ->onStop($logEvent) - ->onEnd($logEvent) - ->onRollback($logEvent) - ; - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals([ - EtlEvents::START, - EtlEvents::EXTRACT, - EtlEvents::STOP, - EtlEvents::FLUSH, - EtlEvents::END, - ], $calledEvents); - } - - /** - * @test - */ - public function it_correctly_maps_rollback_events() - { - $data = ['foo', 'bar']; - $calledEvents = []; - $logEvent = function (EtlEvent $event) use (&$calledEvents) { - $calledEvents[] = $event->getName(); - }; - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onStart($logEvent) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getItem()) { - $event->getEtl()->stopProcessing(true); - } - } - ) - ->onExtract($logEvent) - ->onTransform($logEvent) - ->onLoad($logEvent) - ->onFlush($logEvent) - ->onSkip($logEvent) - ->onStop($logEvent) - ->onEnd($logEvent) - ->onRollback($logEvent) - ; - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals([ - EtlEvents::START, - EtlEvents::EXTRACT, - EtlEvents::STOP, - EtlEvents::ROLLBACK, - EtlEvents::END, - ], $calledEvents); - } - - /** - * @test - */ - public function loader_init_event_is_called_once() - { - $data = ['foo', 'bar', 'baz']; - $called_item = null; - $count = 0; - - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onLoaderInit( - function (ItemEvent $event) use (&$called_item, &$count) { - $count++; - $called_item = $event->getItem(); - } - ); - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals('foo', $called_item); - $this->assertSame(1, $count); - } - - /** - * @test - */ - public function loader_init_event_is_called_once_even_when_1st_item_is_skipped() - { - $data = ['foo', 'bar', 'baz']; - $called_item = null; - $count = 0; - - $builder = EtlBuilder::init()->loadInto(new NullLoader()) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getItem()) { - $event->getEtl()->skipCurrentItem(); - } - } - ) - ->onLoaderInit( - function (ItemEvent $event) use (&$called_item, &$count) { - $count++; - $called_item = $event->getItem(); - } - ); - - $etl = $builder->createEtl(); - $etl->process($data); - - $this->assertEquals('bar', $called_item); - $this->assertSame(1, $count); - } -} diff --git a/tests/src/EtlExceptionsTest.php b/tests/src/EtlExceptionsTest.php deleted file mode 100644 index 5e3361f..0000000 --- a/tests/src/EtlExceptionsTest.php +++ /dev/null @@ -1,171 +0,0 @@ -loadInto(new NullLoader()) - ->transformWith( - function ($item) { - if ($item instanceof \RuntimeException) { - throw $item; - } - yield $item; - } - ) - ->createEtl() - ; - - $etl->process($data()); - } - - /** - * @test - */ - public function exception_can_be_processed_on_extract() - { - - $data = [ - 'foo', - new \RuntimeException('Something wrong happened.'), - 'bar', - ]; - - $extractor = function (iterable $items): iterable { - - foreach ($items as $item) { - if ($item instanceof \RuntimeException) { - throw $item; - } - yield $item; - } - }; - - $etl = EtlBuilder::init() - ->loadInto($loader = new ArrayLoader($preserveKeys = false)) - ->extractFrom($extractor) - ->onExtractException( - function (ItemExceptionEvent $event) { - $event->ignoreException(); - } - ) - ->onEnd( - function (EndProcessEvent $event) use (&$counter) { - $counter = $event->getCounter(); - } - ) - ->createEtl(); - - $etl->process($data); - $this->assertEquals(['foo'], $loader->getArray()); - $this->assertEquals(1, $counter); - } - - /** - * @test - */ - public function exception_can_be_processed_on_transform() - { - - $data = function () { - yield 'foo'; - yield 'bar'; - yield 'baz'; - }; - $counter = null; - $etl = EtlBuilder::init() - ->loadInto($loader = new ArrayLoader($preserveKeys = false)) - ->transformWith( - function ($item) { - if ('bar' === $item) { - throw new \RuntimeException('I don\'t like bar.'); - } - yield $item; - } - ) - ->onTransformException( - function (ItemExceptionEvent $event) { - $event->ignoreException(); - } - ) - ->onEnd( - function (EndProcessEvent $event) use (&$counter) { - $counter = $event->getCounter(); - } - ) - ->createEtl() - ; - - $etl->process($data()); - - $this->assertEquals(['foo', 'baz'], $loader->getArray()); - $this->assertEquals(2, $counter); - } - - /** - * @test - */ - public function exception_can_be_processed_on_load() - { - - $data = function () { - yield 'foo'; - yield 'bar'; - yield 'baz'; - }; - $counter = null; - $array = []; - $etl = EtlBuilder::init() - ->loadInto( - function (\Generator $items) use (&$array) { - foreach ($items as $item) { - if ('bar' === $item) { - throw new \RuntimeException('I don\'t like bar.'); - } - } - $array[] = $item; - } - ) - ->onLoadException( - function (ItemExceptionEvent $event) { - $event->ignoreException(); - } - ) - ->onEnd( - function (EndProcessEvent $event) use (&$counter) { - $counter = $event->getCounter(); - } - ) - ->createEtl() - ; - - $etl->process($data()); - - $this->assertEquals(['foo', 'baz'], $array); - $this->assertEquals(2, $counter); - } -} diff --git a/tests/src/EventDispatcher/EventDispatcherTest.php b/tests/src/EventDispatcher/EventDispatcherTest.php deleted file mode 100644 index ff1e7fd..0000000 --- a/tests/src/EventDispatcher/EventDispatcherTest.php +++ /dev/null @@ -1,128 +0,0 @@ -dispatch($event); - - $this->assertEquals(['foo', 'bar'], $stack); - } - - /** - * @test - */ - public function it_knows_how_to_handle_priorities() - { - $event = new class extends EtlEvent - { - public function __construct() - { - parent::__construct(dummy_etl()); - } - - public function getName(): string - { - return 'foo'; - } - - }; - - $stack = []; - - $dispatcher = new EventDispatcher([ - new EventListener('foo', function () use (&$stack) { - $stack[] = 'foo'; - }, -50), - new EventListener('foo', function () use (&$stack) { - $stack[] = 'bar'; - }, 100), - new EventListener('foo', function () use (&$stack) { - $stack[] = 'baz'; - }), - ]); - - $dispatcher->dispatch($event); - - $this->assertEquals(['bar', 'baz', 'foo'], $stack); - } - - /** - * @test - */ - public function it_stops_propagation_when_asked_to() - { - $event = new class extends EtlEvent - { - public function __construct() - { - parent::__construct(dummy_etl()); - } - - public function getName(): string - { - return 'foo'; - } - - }; - - $stack = []; - - $dispatcher = new EventDispatcher([ - new EventListener('foo', function () use (&$stack) { - $stack[] = 'foo'; - }, -50), - new EventListener('foo', function () use (&$stack) { - $stack[] = 'bar'; - }, 100), - new EventListener('foo', function (EtlEvent $event) use (&$stack) { - $stack[] = 'baz'; - $event->stopPropagation(); - }), - ]); - - $dispatcher->dispatch($event); - - $this->assertEquals(['bar', 'baz'], $stack); - } -} diff --git a/tests/src/Iterator/CsvFileIteratorTest.php b/tests/src/Iterator/CsvFileIteratorTest.php deleted file mode 100644 index 2d36e02..0000000 --- a/tests/src/Iterator/CsvFileIteratorTest.php +++ /dev/null @@ -1,33 +0,0 @@ -assertCount(3, $iterator); - $this->assertEquals([ - [ - 'country', - 'name', - ], - [ - 'USA', - 'Donald Trump', - ], - [ - 'Russia', - 'Vladimir Poutine', - ], - ], array_values(iterator_to_array($iterator))); - } -} diff --git a/tests/src/Iterator/CsvStringIteratorTest.php b/tests/src/Iterator/CsvStringIteratorTest.php deleted file mode 100644 index 6619c8b..0000000 --- a/tests/src/Iterator/CsvStringIteratorTest.php +++ /dev/null @@ -1,33 +0,0 @@ -assertEquals([ - [ - 'country', - 'name', - ], - [ - 'USA', - 'Donald Trump', - ], - [ - 'Russia', - 'Vladimir Poutine', - ], - ], array_values(iterator_to_array($iterator))); - } -} diff --git a/tests/src/Iterator/FileLinesIteratorTest.php b/tests/src/Iterator/FileLinesIteratorTest.php deleted file mode 100644 index f6b1c7b..0000000 --- a/tests/src/Iterator/FileLinesIteratorTest.php +++ /dev/null @@ -1,28 +0,0 @@ -fwrite($item.\PHP_EOL); - } - - // Just to be sure - $this->assertSame([' foo '.\PHP_EOL, ' bar '.\PHP_EOL], \iterator_to_array($file)); - - $iterator = new FileLinesIterator($file); - $this->assertSame($data, \iterator_to_array($iterator)); - } -} diff --git a/tests/src/Iterator/KeysAwareCsvIteratorTest.php b/tests/src/Iterator/KeysAwareCsvIteratorTest.php deleted file mode 100644 index 97061d1..0000000 --- a/tests/src/Iterator/KeysAwareCsvIteratorTest.php +++ /dev/null @@ -1,110 +0,0 @@ -assertEquals([ - [ - 'country' => 'USA', - 'name' => 'Donald Trump', - ], - [ - 'country' => 'Russia', - 'name' => 'Vladimir Poutine', - ], - ], $result); - } - - public function testIteratorWithSpecifiedKeys() - { - $file = new \SplFileObject(TestSuite::getDataFile('dictators.csv')); - $iterator = new KeysAwareCsvIterator(new CsvFileIterator($file), ['Country', 'Name']); - $result = iterator_to_array($iterator); - $this->assertEquals([ - [ - 'Country' => 'USA', - 'Name' => 'Donald Trump', - ], - [ - 'Country' => 'Russia', - 'Name' => 'Vladimir Poutine', - ], - ], $result); - } - - public function testIteratorWithoutSkippingFirstRow() - { - $file = new \SplFileObject(TestSuite::getDataFile('dictators.csv')); - $iterator = new KeysAwareCsvIterator(new CsvFileIterator($file), ['Country', 'Name'], false); - $result = iterator_to_array($iterator); - $this->assertEquals([ - [ - 'Country' => 'country', - 'Name' => 'name', - ], - [ - 'Country' => 'USA', - 'Name' => 'Donald Trump', - ], - [ - 'Country' => 'Russia', - 'Name' => 'Vladimir Poutine', - ], - ], $result); - } - - public function testCreateFromText() - { - $text = file_get_contents(TestSuite::getDataFile('dictators.csv')); - $iterator = new KeysAwareCsvIterator(CsvStringIterator::createFromText($text)); - $result = iterator_to_array($iterator); - $this->assertEquals([ - [ - 'country' => 'USA', - 'name' => 'Donald Trump', - ], - [ - 'country' => 'Russia', - 'name' => 'Vladimir Poutine', - ], - ], $result); - } - - public function testMoreValuesThanKeys() - { - $csv = <<assertEquals([['name' => 'foo', 'description' => 'bar']], $result); - } - - public function testLessValuesThanKeys() - { - $csv = <<assertEquals([['name' => 'foo', 'description' => null]], $result); - } -} diff --git a/tests/src/Iterator/TextLinesIteratorTest.php b/tests/src/Iterator/TextLinesIteratorTest.php deleted file mode 100644 index e006999..0000000 --- a/tests/src/Iterator/TextLinesIteratorTest.php +++ /dev/null @@ -1,44 +0,0 @@ -text = <<text); - $this->assertEquals([ - 'foo', - 'bar' - ], iterator_to_array($iterator)); - } - - public function testIteratorWithoutSkippingEmptyLines() - { - - $iterator = new TextLinesIterator($this->text, false); - $this->assertEquals([ - 'foo', - '', - '', - 'bar' - ], iterator_to_array($iterator)); - } -} diff --git a/tests/src/Loader/ArrayLoaderTest.php b/tests/src/Loader/ArrayLoaderTest.php deleted file mode 100644 index daa54a5..0000000 --- a/tests/src/Loader/ArrayLoaderTest.php +++ /dev/null @@ -1,26 +0,0 @@ - 'bar', - 'bar' => 'baz' - ]; - - $loader = new ArrayLoader(); - foreach ($items as $key => $value) { - $loader->load(create_generator([$key => $value]), $key, dummy_etl()); - } - $this->assertEquals($items, $loader->getArray()); - } -} diff --git a/tests/src/Loader/CsvFileLoaderTest.php b/tests/src/Loader/CsvFileLoaderTest.php deleted file mode 100644 index f7fc1a2..0000000 --- a/tests/src/Loader/CsvFileLoaderTest.php +++ /dev/null @@ -1,33 +0,0 @@ - '|']); - $data = [ - ['Bill', 'Clinton'], - ['Richard', 'Nixon'], - ]; - - $loader->load(create_generator($data), null, dummy_etl()); - - $file->rewind(); - - $expected = [ - 'Bill|Clinton' . PHP_EOL, - 'Richard|Nixon' . PHP_EOL, - ]; - $this->assertEquals($expected, iterator_to_array($file)); - } -} diff --git a/tests/src/Loader/DoctrineORMLoaderTest.php b/tests/src/Loader/DoctrineORMLoaderTest.php deleted file mode 100644 index a138050..0000000 --- a/tests/src/Loader/DoctrineORMLoaderTest.php +++ /dev/null @@ -1,445 +0,0 @@ -id = $id; - $this->name = $name; - } - - public function getId() - { - return $this->id; - } - - public function getName() - { - return $this->name; - } - - }; - } - - /** - * Fakes a Doctrine Entity Repository. - * @param $className - * @return ObjectRepository - */ - private function fakeRepository($className): ObjectRepository - { - $fakeRepository = new class($className) implements ObjectRepository - { - - private $storage = []; - private $className = ''; - - public function __construct($className) - { - $this->className = $className; - } - - public function find($id) - { - return $this->storage[$id] ?? null; - } - - public function findAll() - { - return array_values($this->storage); - } - - public function findBy(array $criteria, array $orderBy = null, $limit = null, $offset = null) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function findOneBy(array $criteria) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getClassName() - { - return $this->className; - } - - public function store($object) - { - $this->storage[$object->getId()] = $object; - } - - public function remove($object) - { - if (false !== $k = array_search($object, $this->storage)) { - unset($this->storage[$k]); - } - } - }; - - return $fakeRepository; - } - - /** - * Fakes a Doctrine Object Manager. - * @param $repositories - * @return ObjectManager - */ - private function fakeObjectManager($repositories): ObjectManager - { - $fakeManager = new class($repositories) implements ObjectManager - { - - private $repositories = []; - private $tmpStorage = []; - - public function __construct(array $repositories) - { - $this->repositories = $repositories; - } - - /** - * @inheritDoc - */ - public function find($className, $id) - { - return $this->getRepository($className)->find($id); - } - - public function persist($object) - { - $this->tmpStorage[] = $object; - } - - public function remove($object) - { - $this->getRepository(get_class($object))->remove($object); - } - - public function merge($object) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function clear($objectName = null) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function detach($object) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function refresh($object) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function flush() - { - foreach ($this->tmpStorage as $o => $object) { - $this->getRepository(get_class($object))->store($object); - unset($this->tmpStorage[$o]); - } - } - - public function getRepository($className) - { - return $this->repositories[$className] ?? null; - } - - public function getClassMetadata($className) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getMetadataFactory() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function initializeObject($obj) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function contains($object) - { - $className = get_class($object); - return null !== $this->find($className, $object->getId()) || in_array($object, $this->tmpStorage); - } - - }; - - return $fakeManager; - } - - /** - * Fakes a Doctrine Manager Registry. - * @param $objectManagers - * @return ManagerRegistry - */ - private function fakeManagerRegistry($objectManagers): ManagerRegistry - { - $fakeRegistry = new class($objectManagers) implements ManagerRegistry - { - - private $objectManagers = []; - - public function __construct(array $objectManagers) - { - $this->objectManagers = $objectManagers; - } - - public function getDefaultConnectionName() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getConnection($name = null) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getConnections() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getConnectionNames() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getDefaultManagerName() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getManager($name = null) - { - return $this->objectManagers[$name] ?? null; - } - - public function getManagers() - { - return $this->objectManagers; - } - - public function resetManager($name = null) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getAliasNamespace($alias) - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getManagerNames() - { - throw new \LogicException(sprintf('%s is not implemented.', __METHOD__)); - } - - public function getRepository($persistentObject, $persistentManagerName = null) - { - foreach ($this->objectManagers as $manager) { - if (null !== $manager->getRepository(get_class($persistentObject))) { - return $manager; - } - } - return null; - } - - public function getManagerForClass($class) - { - foreach ($this->objectManagers as $manager) { - if (null !== $manager->getRepository($class)) { - return $manager; - } - } - return null; - } - - }; - - return $fakeRegistry; - } - - public function testFakeEntity() - { - $entity = $this->fakeEntity('foo', 'bar'); - $anotherEntity = $this->fakeEntity('bar', 'baz'); - $this->assertEquals(get_class($entity), get_class($anotherEntity)); - $this->assertEquals('foo', $entity->getId()); - $this->assertEquals('bar', $entity->getName()); - $this->assertEquals('bar', $anotherEntity->getId()); - $this->assertEquals('baz', $anotherEntity->getName()); - } - - public function testFakeRepository() - { - - $entity = $this->fakeEntity('foo', 'bar'); - $className = get_class($entity); - - $repository = $this->fakeRepository($className); - $this->assertNull($repository->find($entity->getId())); - - $repository->store($entity); - $this->assertNotNull($repository->find($entity->getId())); - $this->assertSame($repository->find($entity->getId()), $entity); - - $repository->remove($entity); - $this->assertNull($repository->find($entity->getId())); - } - - public function testFakeObjectManager() - { - $entity = $this->fakeEntity('foo', 'bar'); - $className = get_class($entity); - $repository = $this->fakeRepository($className); - $em = $this->fakeObjectManager([$className => $repository]); - - $this->assertNull($em->find($className, $entity->getId())); - $this->assertFalse($em->contains($entity)); - - // Test persistence - $em->persist($entity); - $this->assertNull($em->find($className, $entity->getId())); - $this->assertTrue($em->contains($entity)); - - // Test Flush - $em->flush(); - $this->assertNotNull($em->find($className, $entity->getId())); - $this->assertTrue($em->contains($entity)); - } - - public function testFakeRegistry() - { - $entity = $this->fakeEntity('foo', 'bar'); - $className = get_class($entity); - $repository = $this->fakeRepository($className); - $em = $this->fakeObjectManager([$className => $repository]); - $registry = $this->fakeManagerRegistry(['default' => $em]); - $this->assertSame($registry->getManagerForClass($className), $em); - } - - public function testLoaderWithDefaultSettings() - { - $entity = $this->fakeEntity('foo', 'bar'); - $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); - - $registry = $this->fakeManagerRegistry( - [ - 'default' => $em = $this->fakeObjectManager([ - $className => $repository = $this->fakeRepository($className) - ]) - ] - ); - - // The storage should be empty - $this->assertFalse($em->contains($entity)); - $this->assertFalse($em->contains($anotherEntity)); - $this->assertNull($repository->find($entity->getId())); - $this->assertNull($repository->find($anotherEntity->getId())); - - // Try to load 1st entity. - $loader = new DoctrineORMLoader($registry); - $loader->load(create_generator([$entity->getId() => $entity]), $entity->getId(), dummy_etl()); - $loader->commit(false); - $this->assertTrue($em->contains($entity)); - $this->assertNotNull($repository->find($entity->getId())); - $this->assertTrue($em->contains($entity)); - $this->assertNotNull($repository->find($entity->getId())); - - // Try to load 2nd entity - $loader->load(create_generator([$anotherEntity->getId() => $anotherEntity]), $anotherEntity->getId(), dummy_etl()); - $loader->commit(false); - $this->assertTrue($em->contains($anotherEntity)); - $this->assertNotNull($repository->find($anotherEntity->getId())); - } - - public function testLoaderWithBufferedFlush() - { - $entity = $this->fakeEntity('foo', 'bar'); - $anotherEntity = $this->fakeEntity('bar', 'baz'); - $className = get_class($entity); - - $registry = $this->fakeManagerRegistry( - [ - 'default' => $em = $this->fakeObjectManager([ - $className => $repository = $this->fakeRepository($className) - ]) - ] - ); - - // The storage should be empty - $this->assertFalse($em->contains($entity)); - $this->assertFalse($em->contains($anotherEntity)); - $this->assertNull($repository->find($entity->getId())); - $this->assertNull($repository->find($anotherEntity->getId())); - - $loader = new DoctrineORMLoader($registry); - $loader->load(create_generator([$entity->getId() => $entity]), $entity->getId(), dummy_etl()); - $this->assertTrue($em->contains($entity)); // After load, the entity should be present in the unit of work - $this->assertNull($repository->find($entity->getId())); // But it should not be flushed yet - - - $loader->load(create_generator([$anotherEntity->getId() => $anotherEntity]), $anotherEntity->getId(), dummy_etl()); - $this->assertTrue($em->contains($anotherEntity)); // After load, the entity should be present in the unit of work - $this->assertNull($repository->find($anotherEntity->getId())); // But it should not be flushed yet - - $loader->commit(false); - - // Both entities should be flushed now - $this->assertNotNull($repository->find($entity->getId())); - $this->assertNotNull($repository->find($anotherEntity->getId())); - } - - - /** - * @expectedException \InvalidArgumentException - * @expectedExceptionMessageRegExp #The transformed data should return a generator of entities.# - */ - public function testInvalidData() - { - $registry = $this->fakeManagerRegistry([]); - $loader = new DoctrineORMLoader($registry); - $loader->load(create_generator(['foo' => 'bar']), null, dummy_etl()); - } - - /** - * @expectedException \RuntimeException - * @expectedExceptionMessageRegExp #Unable to locate Doctrine manager# - */ - public function testInvalidEntityManager() - { - $registry = $this->fakeManagerRegistry([]); - $loader = new DoctrineORMLoader($registry); - $loader->load(create_generator([new \stdClass()]), null, dummy_etl()); - } -} diff --git a/tests/src/Loader/FileLoaderTest.php b/tests/src/Loader/FileLoaderTest.php deleted file mode 100644 index 6addf96..0000000 --- a/tests/src/Loader/FileLoaderTest.php +++ /dev/null @@ -1,61 +0,0 @@ - 'bar', - 'bar' => 'baz' - ]; - $file = new SplTempFileObject(); - $loader = FileLoader::toFile($file); - - foreach ($items as $key => $value) { - $loader->load(create_generator([$key => $value]), $key, dummy_etl()); - } - - $file->rewind(); - $this->assertEquals(implode(\PHP_EOL, [ - 'bar', - 'baz', - '' - ]), implode('', iterator_to_array($file))); - } - - /** - * @test - */ - public function it_concats_all_strings() - { - $items = [ - 'foo' => 'bar', - 'bar' => 'baz' - ]; - $file = new SplTempFileObject(); - $loader = FileLoader::toFile($file, ['eol' => FileLoader::NO_EOL]); - - foreach ($items as $key => $value) { - $loader->load(create_generator([$key => $value]), $key, dummy_etl()); - } - - $file->rewind(); - $this->assertEquals(implode('', [ - 'bar', - 'baz' - ]), implode('', iterator_to_array($file))); - } -} diff --git a/tests/src/Loader/FlushableLoaderTest.php b/tests/src/Loader/FlushableLoaderTest.php deleted file mode 100644 index 51a336b..0000000 --- a/tests/src/Loader/FlushableLoaderTest.php +++ /dev/null @@ -1,159 +0,0 @@ -loadInto($this->loader) - ->onFlush( - function (FlushEvent $event) use (&$numCalls, &$expected) { - $this->assertEquals($expected[$numCalls], $this->loader->getItems()); - $numCalls++; - } - ) - ->createEtl(); - - $etl->process(['foo', 'bar', 'baz']); - $this->assertEquals(1, $numCalls); - } - - public function testItFlushesEveryTime() - { - $numCalls = 0; - $expected = [ - ['foo'], - ['foo', 'bar'], - ['foo', 'bar', 'baz'], - ['foo', 'bar', 'baz'], - ]; - - $etl = EtlBuilder::init() - ->loadInto($this->loader) - ->flushEvery(1) - ->onFlush( - function (FlushEvent $event) use (&$numCalls, &$expected) { - $this->assertEquals($expected[$numCalls], $this->loader->getItems()); - $numCalls++; - } - ) - ->createEtl(); - - $etl->process(['foo', 'bar', 'baz']); - $this->assertEquals(4, $numCalls); - } - - public function testItFlushesEvery2Items() - { - $numCalls = 0; - $expected = [ - ['foo', 'bar'], - ['foo', 'bar', 'baz', 'bat'], - ['foo', 'bar', 'baz', 'bat', 'batman'], - ]; - - $etl = EtlBuilder::init() - ->loadInto($this->loader) - ->flushEvery(2) - ->onFlush( - function (FlushEvent $event) use (&$numCalls, &$expected) { - $this->assertEquals($expected[$numCalls], $this->loader->getItems()); - $numCalls++; - } - ) - ->createEtl(); - - $etl->process(['foo', 'bar', 'baz', 'bat', 'batman']); - $this->assertEquals(3, $numCalls); - } - - public function testEarlyFlushCanBeTriggered() - { - $numCalls = 0; - $expected = [ - ['foo'], - ['foo', 'bar'], - ['foo', 'bar', 'baz', 'bat'], - ['foo', 'bar', 'baz', 'bat', 'batman'], - ]; - - $etl = EtlBuilder::init() - ->loadInto($this->loader) - ->flushEvery(2) - ->onLoad( - function (ItemEvent $event) { - if (0 === $event->getKey()) { - $event->getEtl()->triggerFlush(); - } - } - ) - ->onFlush( - function (FlushEvent $event) use (&$numCalls, &$expected) { - $this->assertEquals($expected[$numCalls], $this->loader->getItems()); - $numCalls++; - } - ) - ->createEtl(); - - $etl->process(['foo', 'bar', 'baz', 'bat', 'batman']); - $this->assertEquals(4, $numCalls); - } - - public function setUp() - { - $this->loader = new class implements LoaderInterface - { - private $tmpStorage; - private $storage; - public function init(): void - { - $this->tmpStorage = []; - $this->storage = []; - } - - public function load(\Generator $items, $key, Etl $etl): void - { - foreach ($items as $item) { - $this->tmpStorage[] = $item; - } - } - - public function commit(bool $partial): void - { - while (null !== ($item = \array_shift($this->tmpStorage))) { - $this->storage[] = $item; - } - } - - public function rollback(): void - { - } - - public function getItems() - { - return $this->storage; - } - - }; - } -} diff --git a/tests/src/Loader/JsonFileLoaderTest.php b/tests/src/Loader/JsonFileLoaderTest.php deleted file mode 100644 index 2e3b17f..0000000 --- a/tests/src/Loader/JsonFileLoaderTest.php +++ /dev/null @@ -1,40 +0,0 @@ - $value) { - $loader->load(create_generator([$key => $value]), $key, dummy_etl()); - } - $loader->commit(false); - $file->rewind(); - $content = ''; - while (!$file->eof()) { - $content .= $file->fgets(); - } - $this->assertEquals(json_encode($data), trim($content)); - } -} diff --git a/tests/src/Recipe/LoggerRecipeTest.php b/tests/src/Recipe/LoggerRecipeTest.php deleted file mode 100644 index 34c195c..0000000 --- a/tests/src/Recipe/LoggerRecipeTest.php +++ /dev/null @@ -1,186 +0,0 @@ -createLogger(); - $builder = EtlBuilder::init() - ->loadInto(new NullLoader()) - ->useRecipe(new LoggerRecipe($logger)); - $etl = $builder->createEtl(); - $etl->process([ - 'foo' => 'bar', - 'bar' => 'baz', - ]); - - $expected = [ - 'Starting ETL...', - 'Extracted foo.', - 'Transformed foo.', - 'Initializing loader...', - 'Loaded foo.', - 'Extracted bar.', - 'Transformed bar.', - 'Loaded bar.', - 'Flushed 2 items.', - 'ETL completed on 2 items.', - ]; - - $this->assertEquals($expected, $logger->stack); - } - - /** - * @test - */ - public function it_also_logs_skipping_items() - { - $logger = $this->createLogger(); - $builder = EtlBuilder::init() - ->loadInto(new NullLoader()) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getKey()) { - $event->getEtl()->skipCurrentItem(); - } - } - ) - ->useRecipe(new LoggerRecipe($logger)) - ; - $etl = $builder->createEtl(); - $etl->process([ - 'foo' => 'bar', - 'bar' => 'baz', - ]); - - $expected = [ - 'Starting ETL...', - 'Extracted foo.', - 'Skipping item foo.', - 'Extracted bar.', - 'Transformed bar.', - 'Initializing loader...', - 'Loaded bar.', - 'Flushed 1 items.', - 'ETL completed on 1 items.', - ]; - - $this->assertEquals($expected, $logger->stack); - } - - /** - * @test - */ - public function it_also_logs_stop_event() - { - $logger = $this->createLogger(); - $builder = EtlBuilder::init() - ->loadInto(new NullLoader()) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getKey()) { - $event->getEtl()->stopProcessing(); - } - } - ) - ->useRecipe(new LoggerRecipe($logger)) - ; - $etl = $builder->createEtl(); - $etl->process([ - 'foo' => 'bar', - 'bar' => 'baz', - ]); - - $expected = [ - 'Starting ETL...', - 'Extracted foo.', - 'Stopping on item foo.', - 'Flushed 0 items.', - 'ETL completed on 0 items.', - ]; - - $this->assertEquals($expected, $logger->stack); - } - - /** - * @test - */ - public function it_also_logs_rollback_event() - { - $logger = $this->createLogger(); - $builder = EtlBuilder::init() - ->loadInto(new NullLoader()) - ->onExtract( - function (ItemEvent $event) { - if ('foo' === $event->getKey()) { - $event->getEtl()->stopProcessing(true); - } - } - ) - ->useRecipe(new LoggerRecipe($logger)) - ; - $etl = $builder->createEtl(); - $etl->process([ - 'foo' => 'bar', - 'bar' => 'baz', - ]); - - $expected = [ - 'Starting ETL...', - 'Extracted foo.', - 'Stopping on item foo.', - 'Rollback 0 items.', - 'ETL completed on 0 items.', - ]; - - $this->assertEquals($expected, $logger->stack); - } - - private function createLogger(): LoggerInterface - { - return new class implements LoggerInterface - { - public $stack = []; - public function emergency($message, array $context = []) - { - } - public function alert($message, array $context = []) - { - } - public function critical($message, array $context = []) - { - } - public function error($message, array $context = []) - { - } - public function warning($message, array $context = []) - { - } - public function notice($message, array $context = []) - { - } - public function info($message, array $context = []) - { - } - public function debug($message, array $context = []) - { - } - public function log($level, $message, array $context = []) - { - $this->stack[] = $message; - } - }; - } -} diff --git a/tests/src/TestSuite.php b/tests/src/TestSuite.php deleted file mode 100644 index 64f81dc..0000000 --- a/tests/src/TestSuite.php +++ /dev/null @@ -1,18 +0,0 @@ -transform($item, 0, dummy_etl()); - $this->assertSame('caps are hell', \iterator_to_array($transformed)[0]); - } -}