From b408762084b32a2a9015c81a1d5d07e5e40a2a26 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 21 Oct 2022 23:09:18 +0200 Subject: [PATCH 01/38] Working --- src/JsonDecoder/StringOnlyDecoder.php | 27 +++++++++++++ src/Parser.php | 38 +++++++++++++++++-- src/Tokens.php | 15 ++++++++ .../JsonDecoder/StringOnlyDecoderTest.php | 32 ++++++++++++++++ test/JsonMachineTest/ParserTest.php | 17 +++++++++ 5 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 src/JsonDecoder/StringOnlyDecoder.php create mode 100644 test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php diff --git a/src/JsonDecoder/StringOnlyDecoder.php b/src/JsonDecoder/StringOnlyDecoder.php new file mode 100644 index 0000000..1702008 --- /dev/null +++ b/src/JsonDecoder/StringOnlyDecoder.php @@ -0,0 +1,27 @@ +innerDecoder = $innerDecoder; + } + + public function decode($jsonValue) + { + if (is_string($jsonValue)) { + return $this->innerDecoder->decode($jsonValue); + } + + return new ValidResult($jsonValue); + } +} diff --git a/src/Parser.php b/src/Parser.php index 8a18265..30d7ac2 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,6 +4,7 @@ namespace JsonMachine; +use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; @@ -11,6 +12,7 @@ use JsonMachine\Exception\UnexpectedEndSyntaxErrorException; use JsonMachine\JsonDecoder\ExtJsonDecoder; use JsonMachine\JsonDecoder\ItemDecoder; +use JsonMachine\JsonDecoder\StringOnlyDecoder; use Traversable; class Parser implements \IteratorAggregate, PositionAware @@ -52,21 +54,28 @@ class Parser implements \IteratorAggregate, PositionAware /** @var bool */ private $hasSingleJsonPointer; + /** @var bool */ + private bool $recursive; + /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 * @param ItemDecoder $jsonDecoder * * @throws InvalidArgumentException */ - public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonDecoder = null) + public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonDecoder = null, $recursive = false) { $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); + if ($recursive) { + $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); + } $this->hasSingleJsonPointer = (count($jsonPointers) === 1); $this->jsonPointers = array_combine($jsonPointers, $jsonPointers); $this->paths = $this->buildPaths($this->jsonPointers); + $this->recursive = $recursive; } private function buildPaths(array $jsonPointers): array @@ -141,7 +150,12 @@ public function getIterator() ) ) ) { - $jsonBuffer .= $token; + if ($this->recursive && ($token == '{' || $token == '[')) { + $jsonBuffer = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $token = ' '; + } else { + $jsonBuffer .= $token; + } } // todo move this switch to the top just after the syntax check to be a correct FSM switch ($token[0]) { @@ -212,7 +226,7 @@ public function getIterator() $expectedType = 96; // 96 = self::AFTER_ARRAY_VALUE; } } - if ($currentLevel > $iteratorLevel) { + if ($currentLevel > $iteratorLevel && ! $this->recursive) { continue; // a valid json chunk is not completed yet } if ($jsonBuffer !== '') { @@ -244,6 +258,9 @@ public function getIterator() $subtreeEnded = true; break; } + if ($currentLevel < 0) { + break; + } } if ($token === null) { @@ -262,6 +279,19 @@ public function getIterator() $this->currentPath = null; } + /** + * @return void + */ + private function remainingTokens() + { + /** @var \Iterator $iterator */ + $iterator = $this->tokens instanceOf IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + while ($iterator->valid()) { + yield $iterator->current(); + $iterator->next(); + } + } + private function tokenTypes() { $allBytes = []; @@ -346,7 +376,7 @@ public function getMatchedJsonPointer(): string */ private function error($msg, $token, $exception = SyntaxErrorException::class) { - throw new $exception($msg." '".$token."'", $this->tokens->getPosition()); + throw new $exception($msg." '".$token."'", method_exists($this->tokens, 'getPosition') ? $this->tokens->getPosition() : ''); } /** diff --git a/src/Tokens.php b/src/Tokens.php index 2ba96c8..8ac8a31 100644 --- a/src/Tokens.php +++ b/src/Tokens.php @@ -11,6 +11,9 @@ class Tokens implements \IteratorAggregate, PositionAware /** @var iterable */ private $jsonChunks; + /** @var Generator */ + private $generator; + /** * @param iterable $jsonChunks */ @@ -24,6 +27,18 @@ public function __construct($jsonChunks) */ #[\ReturnTypeWillChange] public function getIterator() + { + if ( ! $this->generator) { + $this->generator = $this->innerGenerator(); + } + + return $this->generator; + } + + /** + * @return Generator + */ + public function innerGenerator() { $insignificantBytes = $this->insignificantBytes(); $tokenBoundaries = $this->tokenBoundaries(); diff --git a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php new file mode 100644 index 0000000..35946de --- /dev/null +++ b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php @@ -0,0 +1,32 @@ +assertSame('value', $decoder->decode('"value"')->getValue()); + } + + public function testDoesNotPassParserIntoInnerDecoder() + { + $innerDecoder = new ExtJsonDecoder(); + $decoder = new StringOnlyDecoder($innerDecoder); + $parser = new Parser(new \ArrayObject(['[]'])); + + $this->assertSame($parser, $decoder->decode($parser)->getValue()); + } +} diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 6065450..b3ba233 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -4,6 +4,7 @@ namespace JsonMachineTest; +use Generator; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; use JsonMachine\Exception\SyntaxErrorException; @@ -526,4 +527,20 @@ public function testThrowsMeaningfulErrorOnIncorrectTokens() foreach ($parser as $index => $item) { } } + + public function testRecursiveIteration() + { + $parser = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); + + foreach ($parser as $object) { + $this->assertInstanceOf(Parser::class, $object); + foreach ($object as $key => $values) { + $this->assertInstanceOf(Parser::class, $values); + $this->assertSame("numbers", $key); + foreach ($values as $fortyTwo) { + $this->assertSame(42, $fortyTwo); + } + } + } + } } From 1466ddbf020c32f81e17caabcc52dbd6592c63c6 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 11:48:27 +0200 Subject: [PATCH 02/38] \JsonMachineTest\ParserTest::testZigZagRecursiveIteration --- test/JsonMachineTest/ParserTest.php | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index b3ba233..38ead32 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -14,6 +14,7 @@ use JsonMachine\StringChunks; use JsonMachine\Tokens; use JsonMachine\TokensWithDebugging; +use Traversable; /** * @covers \JsonMachine\Parser @@ -530,12 +531,12 @@ public function testThrowsMeaningfulErrorOnIncorrectTokens() public function testRecursiveIteration() { - $parser = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); + $array = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); - foreach ($parser as $object) { - $this->assertInstanceOf(Parser::class, $object); + foreach ($array as $object) { + $this->assertInstanceOf(Traversable::class, $object); foreach ($object as $key => $values) { - $this->assertInstanceOf(Parser::class, $values); + $this->assertInstanceOf(Traversable::class, $values); $this->assertSame("numbers", $key); foreach ($values as $fortyTwo) { $this->assertSame(42, $fortyTwo); @@ -543,4 +544,28 @@ public function testRecursiveIteration() } } } + + public function testZigZagRecursiveIteration() + { + $objectKeysToVisit = ['numbers', 'string', 'more numbers']; + $objectKeysVisited = []; + $valuesToVisit = [41, 42, 'text', 43]; + $valuesVisited = []; + + $array = new Parser(new Tokens(['[{"numbers": [41, 42], "string": ["text"], "more numbers": [43]}]']), '', null, true); + + foreach ($array as $object) { + $this->assertInstanceOf(Traversable::class, $object); + foreach ($object as $key => $values) { + $objectKeysVisited[] = $key; + $this->assertInstanceOf(Traversable::class, $values); + foreach ($values as $value) { + $valuesVisited[] = $value; + } + } + } + + $this->assertSame($objectKeysToVisit, $objectKeysVisited); + $this->assertSame($valuesToVisit, $valuesVisited); + } } From e21abf9342c597c969cea416f95d9f8b6c8fb419 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 12:17:52 +0200 Subject: [PATCH 03/38] Added 'recursive' option --- src/Items.php | 3 ++- src/ItemsOptions.php | 6 ++++++ test/JsonMachineTest/ItemsOptionsTest.php | 1 + test/JsonMachineTest/ItemsTest.php | 13 +++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Items.php b/src/Items.php index 1dfb958..6c932ad 100644 --- a/src/Items.php +++ b/src/Items.php @@ -63,7 +63,8 @@ public function __construct($bytesIterator, array $options = []) $this->chunks ), $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder() + $this->jsonDecoder ?: new ExtJsonDecoder(), + $options['recursive'] ); } diff --git a/src/ItemsOptions.php b/src/ItemsOptions.php index 0e528b9..8dc9c0c 100644 --- a/src/ItemsOptions.php +++ b/src/ItemsOptions.php @@ -66,12 +66,18 @@ private function opt_debug(bool $debug) return $debug; } + private function opt_recursive(bool $recursive) + { + return $recursive; + } + public static function defaultOptions(): array { return [ 'pointer' => '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, + 'recursive' => false, ]; } } diff --git a/test/JsonMachineTest/ItemsOptionsTest.php b/test/JsonMachineTest/ItemsOptionsTest.php index 3dd1cf4..3a4434e 100644 --- a/test/JsonMachineTest/ItemsOptionsTest.php +++ b/test/JsonMachineTest/ItemsOptionsTest.php @@ -53,6 +53,7 @@ private function defaultOptions() 'pointer' => '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, + 'recursive' => false, ]; } diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index d0d06d7..6ecb0ca 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -6,6 +6,7 @@ use JsonMachine\Items; use JsonMachine\JsonDecoder\PassThruDecoder; +use Traversable; /** * @covers \JsonMachine\Items @@ -139,4 +140,16 @@ public function testGetJsonPointers() $this->assertSame(['/one', '/two'], $items->getJsonPointers()); } + + public function testRecursiveIteration() + { + $items = Items::fromString('[[":)"]]', ['recursive' => true]); + + foreach ($items as $emojis) { + $this->assertInstanceOf(Traversable::class, $emojis); + foreach ($emojis as $emoji) { + $this->assertSame(":)", $emoji); + } + } + } } From 64b5f2315763ed299195c97f28acb06fe37ae8c8 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 20:09:14 +0200 Subject: [PATCH 04/38] Documentation --- CHANGELOG.md | 3 ++- README.md | 61 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a275597..50130f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## master -Nothing yet +### Added +- Recursive iteration via `recursive` option.
diff --git a/README.md b/README.md index 2a42cca..9b3957a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ for PHP >=7.0. See [TL;DR](#tl-dr). No dependencies in production except optiona + [Parsing nested values in arrays](#parsing-nested-values) + [Parsing a single scalar value](#getting-scalar-values) + [Parsing multiple subtrees](#parsing-multiple-subtrees) + + [Recursive iteration](#recursive) + [What is JSON Pointer anyway?](#json-pointer) * [Options](#options) * [Parsing streaming responses from a JSON API](#parsing-json-stream-api-responses) @@ -320,6 +321,39 @@ foreach ($fruits as $key => $value) { } ``` + +### Recursive iteration (BETA) +Recursive iteration can be enabled via `recursive` option set to `true`. +Every JSON iterable that JSON Machine encounters will then be yielded as a lazy instance of `Traversable`. +No JSON vector will be materialized and kept in memory. +The only PHP values you get materialized will be scalar values. +Let's see an example with many, many users with many, many friends + +```php + true]); +foreach ($users as $user) { // $user instanceof Traversable, not an array/object + foreach ($user as $userField => $userValue) { + if ($userField == 'friends') { + foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object + foreach ($user as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object + // do whatever you want here + // maybe rather use PHP's Recursive*Iterators + } + } + } + } +} +``` + +> You **MUST** iterate such lazy `Traversable`s in real time. +> **NEVER** skip an iteration of such `Traversable` and +> **NEVER** keep references to such past `Traversable`s to iterate them later +> or you end up (almost) like [this guy](https://xkcd.com/292/). + ### What is JSON Pointer anyway? It's a way of addressing one item in JSON document. See the [JSON Pointer RFC 6901](https://tools.ietf.org/html/rfc6901). @@ -347,6 +381,7 @@ Some examples: Options may change how a JSON is parsed. Array of options is the second parameter of all `Items::from*` functions. Available options are: - `pointer` - A JSON Pointer string that tells which part of the document you want to iterate. +- `recursive` - Bool. Any JSON array/object the parser hits will not be decoded but served lazily as a `Traversable`. Default `false`. - `decoder` - An instance of `ItemDecoder` interface. - `debug` - `true` or `false` to enable or disable the debug mode. When the debug mode is enabled, data such as line, column and position in the document are available during parsing or in exceptions. Keeping debug disabled adds slight @@ -518,30 +553,14 @@ but you forgot to specify a JSON Pointer. See [Parsing a subtree](#parsing-a-sub ### "That didn't help" The other reason may be, that one of the items you iterate is itself so huge it cannot be decoded at once. For example, you iterate over users and one of them has thousands of "friend" objects in it. -Use `PassThruDecoder` which does not decode an item, get the json string of the user -and parse it iteratively yourself using `Items::fromString()`. - -```php - new PassThruDecoder]); -foreach ($users as $user) { - foreach (Items::fromString($user, ['pointer' => "/friends"]) as $friend) { - // process friends one by one - } -} -``` +The most efficient solution is to set `recursive` option to `true`. +See [Recursive iteration](#recursive). ### "I am still out of luck" -It probably means that the JSON string `$user` itself or one of the friends are too big and do not fit in memory. -However, you can try this approach recursively. Parse `"/friends"` with `PassThruDecoder` getting one `$friend` -json string at a time and then parse that using `Items::fromString()`... If even that does not help, -there's probably no solution yet via JSON Machine. A feature is planned which will enable you to iterate -any structure fully recursively and strings will be served as streams. +It probably means that a single JSON string itself is too big to fit in memory. +For example very big file encoded as base64. +In that case you will probably be still out of luck until JSON Machine supports yielding of scalar values as PHP streams. ## Installation From aba5205393029e002d81dbafef4a0022d17157e6 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 20:20:17 +0200 Subject: [PATCH 05/38] Build fixed --- src/JsonDecoder/StringOnlyDecoder.php | 2 -- src/Parser.php | 4 ++-- test/JsonMachineTest/ItemsTest.php | 2 +- test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php | 3 ++- test/JsonMachineTest/ParserTest.php | 3 +-- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/JsonDecoder/StringOnlyDecoder.php b/src/JsonDecoder/StringOnlyDecoder.php index 1702008..ebdf544 100644 --- a/src/JsonDecoder/StringOnlyDecoder.php +++ b/src/JsonDecoder/StringOnlyDecoder.php @@ -4,8 +4,6 @@ namespace JsonMachine\JsonDecoder; -use JsonMachine\Parser; - class StringOnlyDecoder implements ItemDecoder { /** @var ItemDecoder */ diff --git a/src/Parser.php b/src/Parser.php index 30d7ac2..89f15ad 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -55,7 +55,7 @@ class Parser implements \IteratorAggregate, PositionAware private $hasSingleJsonPointer; /** @var bool */ - private bool $recursive; + private $recursive; /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 @@ -285,7 +285,7 @@ public function getIterator() private function remainingTokens() { /** @var \Iterator $iterator */ - $iterator = $this->tokens instanceOf IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + $iterator = $this->tokens instanceof IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; while ($iterator->valid()) { yield $iterator->current(); $iterator->next(); diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index 6ecb0ca..842c13c 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -148,7 +148,7 @@ public function testRecursiveIteration() foreach ($items as $emojis) { $this->assertInstanceOf(Traversable::class, $emojis); foreach ($emojis as $emoji) { - $this->assertSame(":)", $emoji); + $this->assertSame(':)', $emoji); } } } diff --git a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php index 35946de..5d7812e 100644 --- a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php +++ b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php @@ -1,9 +1,10 @@ assertInstanceOf(Traversable::class, $object); foreach ($object as $key => $values) { $this->assertInstanceOf(Traversable::class, $values); - $this->assertSame("numbers", $key); + $this->assertSame('numbers', $key); foreach ($values as $fortyTwo) { $this->assertSame(42, $fortyTwo); } From ccd4a7d39084e485ccb611e5a4a42319b44f6a8f Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:05:47 +0200 Subject: [PATCH 06/38] Tokens reverted. Iterator memoization moved from Tokens to Parser --- src/Parser.php | 23 ++++++++++++++++------- src/Tokens.php | 15 --------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 89f15ad..fb7f5b1 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,6 +4,8 @@ namespace JsonMachine; +use Generator; +use Iterator; use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\Exception\JsonMachineException; @@ -36,6 +38,9 @@ class Parser implements \IteratorAggregate, PositionAware /** @var Traversable */ private $tokens; + /** @var Iterator */ + private $tokensIterator; + /** @var ItemDecoder */ private $jsonDecoder; @@ -59,7 +64,7 @@ class Parser implements \IteratorAggregate, PositionAware /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 - * @param ItemDecoder $jsonDecoder + * @param bool $recursive * * @throws InvalidArgumentException */ @@ -68,6 +73,8 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; + $this->tokensIterator = $tokens instanceof IteratorAggregate ? $tokens->getIterator() : $tokens; + $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); if ($recursive) { $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); @@ -86,7 +93,7 @@ private function buildPaths(array $jsonPointers): array } /** - * @return \Generator + * @return Generator * * @throws PathNotFoundException */ @@ -114,7 +121,7 @@ public function getIterator() $iteratorLevel = 0; // local variables for faster name lookups - $tokens = $this->tokens; + $tokens = $this->tokensIterator; foreach ($tokens as $token) { if ($currentPathChanged) { @@ -280,12 +287,11 @@ public function getIterator() } /** - * @return void + * @return Generator */ private function remainingTokens() { - /** @var \Iterator $iterator */ - $iterator = $this->tokens instanceof IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + $iterator = $this->tokensIterator; while ($iterator->valid()) { yield $iterator->current(); $iterator->next(); @@ -376,7 +382,10 @@ public function getMatchedJsonPointer(): string */ private function error($msg, $token, $exception = SyntaxErrorException::class) { - throw new $exception($msg." '".$token."'", method_exists($this->tokens, 'getPosition') ? $this->tokens->getPosition() : ''); + throw new $exception( + $msg." '".$token."'", + $this->tokens instanceof PositionAware ? $this->tokens->getPosition() : '' + ); } /** diff --git a/src/Tokens.php b/src/Tokens.php index 8ac8a31..2ba96c8 100644 --- a/src/Tokens.php +++ b/src/Tokens.php @@ -11,9 +11,6 @@ class Tokens implements \IteratorAggregate, PositionAware /** @var iterable */ private $jsonChunks; - /** @var Generator */ - private $generator; - /** * @param iterable $jsonChunks */ @@ -27,18 +24,6 @@ public function __construct($jsonChunks) */ #[\ReturnTypeWillChange] public function getIterator() - { - if ( ! $this->generator) { - $this->generator = $this->innerGenerator(); - } - - return $this->generator; - } - - /** - * @return Generator - */ - public function innerGenerator() { $insignificantBytes = $this->insignificantBytes(); $tokenBoundaries = $this->tokenBoundaries(); From adcfcde790e83a5399251ddac042e3fbb4a2cac4 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:19:24 +0200 Subject: [PATCH 07/38] Removed useless condition --- src/Parser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index fb7f5b1..e3e00fd 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -233,7 +233,7 @@ public function getIterator() $expectedType = 96; // 96 = self::AFTER_ARRAY_VALUE; } } - if ($currentLevel > $iteratorLevel && ! $this->recursive) { + if ($currentLevel > $iteratorLevel) { continue; // a valid json chunk is not completed yet } if ($jsonBuffer !== '') { From d40b0408e6295f6a656f91ed7278e5e26818e33c Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:22:56 +0200 Subject: [PATCH 08/38] $jsonBuffer -> $jsonValue --- src/Parser.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index e3e00fd..f1814a8 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -109,7 +109,7 @@ public function getIterator() $pointersFound = []; $currentLevel = -1; $stack = [$currentLevel => null]; - $jsonBuffer = ''; + $jsonValue = ''; $key = null; $objectKeyExpected = false; $inObject = true; // hack to make "!$inObject" in first iteration work. Better code structure? @@ -158,10 +158,10 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonBuffer = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $jsonValue = new self($this->remainingTokens(), '', $this->jsonDecoder, true); $token = ' '; } else { - $jsonBuffer .= $token; + $jsonValue .= $token; } } // todo move this switch to the top just after the syntax check to be a correct FSM @@ -236,9 +236,9 @@ public function getIterator() if ($currentLevel > $iteratorLevel) { continue; // a valid json chunk is not completed yet } - if ($jsonBuffer !== '') { - $valueResult = $this->jsonDecoder->decode($jsonBuffer); - $jsonBuffer = ''; + if ($jsonValue !== '') { + $valueResult = $this->jsonDecoder->decode($jsonValue); + $jsonValue = ''; if ( ! $valueResult->isOk()) { $this->error($valueResult->getErrorMessage(), $token); } From bda567afdedfa4a4acc04a12302a30c00281892d Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sun, 23 Oct 2022 22:11:25 +0200 Subject: [PATCH 09/38] Finishing of an unfinished sub-iterator for convenience --- README.md | 7 +++---- src/Parser.php | 20 +++++++++++++++++--- test/JsonMachineTest/ParserTest.php | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9b3957a..75e084e 100644 --- a/README.md +++ b/README.md @@ -349,10 +349,9 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object } ``` -> You **MUST** iterate such lazy `Traversable`s in real time. -> **NEVER** skip an iteration of such `Traversable` and -> **NEVER** keep references to such past `Traversable`s to iterate them later -> or you end up (almost) like [this guy](https://xkcd.com/292/). +> If you skip iteration of such lazy deeper-level `Traversable` and advance to a next value, you will not be able to iterate it later. +> JSON Machine must iterate it the background to be able to read next value. +> Such an attempt will result in closed generator exception. ### What is JSON Pointer anyway? diff --git a/src/Parser.php b/src/Parser.php index f1814a8..8b1bef0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -158,7 +158,7 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $jsonValue = (new self($this->remainingTokens(), '', $this->jsonDecoder, true))->getIterator(); $token = ' '; } else { $jsonValue .= $token; @@ -242,17 +242,22 @@ public function getIterator() if ( ! $valueResult->isOk()) { $this->error($valueResult->getErrorMessage(), $token); } + $value = $valueResult->getValue(); if ($iteratorStruct == '[') { - yield $valueResult->getValue(); + yield $value; } else { $keyResult = $this->jsonDecoder->decode($key); if ( ! $keyResult->isOk()) { $this->error($keyResult->getErrorMessage(), $key); } - yield $keyResult->getValue() => $valueResult->getValue(); + yield $keyResult->getValue() => $value; unset($keyResult); } + if ($value instanceof Iterator && $value->valid()) { + $this->eatAllRemainingTokens($value); + } unset($valueResult); + unset($value); } if ( ! array_diff($jsonPointerPath, $currentPath) @@ -298,6 +303,15 @@ private function remainingTokens() } } + private function eatAllRemainingTokens(Iterator $iterator) + { + foreach ($iterator as $item) { + if ($item instanceof Iterator) { + $this->eatAllRemainingTokens($item); + } + } + } + private function tokenTypes() { $allBytes = []; diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index fb7e782..a0d7722 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -567,4 +567,21 @@ public function testZigZagRecursiveIteration() $this->assertSame($objectKeysToVisit, $objectKeysVisited); $this->assertSame($valuesToVisit, $valuesVisited); } + + public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser() + { + $iterator = new Parser(new Tokens(['[1,[{},2,3],4]']), '', null, true); + $array = []; + + foreach ($iterator as $item) { + $array[] = $item; + } + + $this->assertSame(1, $array[0]); + $this->assertInstanceOf(Traversable::class, $array[1]); + $this->assertSame(4, $array[2]); + + $this->expectExceptionMessage('generator'); + iterator_to_array($array[1]); + } } From da88e4da50ae9d2b6f809b5b942c10ff498ed3a3 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 24 Oct 2022 13:29:32 +0200 Subject: [PATCH 10/38] Readme fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 75e084e..c252163 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,7 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object foreach ($user as $userField => $userValue) { if ($userField == 'friends') { foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object - foreach ($user as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object + foreach ($friend as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object // do whatever you want here // maybe rather use PHP's Recursive*Iterators } From b22c24d33055fb5e26416758fc1b1b42badfebcb Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 24 Oct 2022 22:45:47 +0200 Subject: [PATCH 11/38] NestedIterator skeleton --- src/NestedIterator.php | 68 +++++++++++++++++++ test/JsonMachineTest/NestedIteratorTest.php | 75 +++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/NestedIterator.php create mode 100644 test/JsonMachineTest/NestedIteratorTest.php diff --git a/src/NestedIterator.php b/src/NestedIterator.php new file mode 100644 index 0000000..ce81754 --- /dev/null +++ b/src/NestedIterator.php @@ -0,0 +1,68 @@ +iterator = $iterator; + } + + #[\ReturnTypeWillChange] + public function current() + { + return $this->iterator->current(); + } + + #[\ReturnTypeWillChange] + public function next() + { + return $this->iterator->next(); + } + + #[\ReturnTypeWillChange] + public function key() + { + return $this->iterator->key(); + } + + #[\ReturnTypeWillChange] + public function valid() + { + return $this->iterator->valid(); + } + + #[\ReturnTypeWillChange] + public function rewind() + { + $this->iterator->rewind(); + } + + #[\ReturnTypeWillChange] + public function hasChildren() + { + return $this->iterator->current() instanceof Iterator; + } + + #[\ReturnTypeWillChange] + public function getChildren() + { + return $this->hasChildren() ? new self($this->current()) : null; + } + + public function advanceTo($key) + { + } + + public function materialize() + { + } +} diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php new file mode 100644 index 0000000..1fcbd0c --- /dev/null +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -0,0 +1,75 @@ +assertSame([1, 2, 3], $result); + } + + public function testHasChildrenIgnoresArrays() + { + $generator = function () {yield from [1, [2], 3]; }; + $iterator = new NestedIterator($generator()); + + foreach ($iterator as $item) { + $this->assertFalse($iterator->hasChildren()); + } + } + + public function testHasChildrenFollowsIterators() + { + $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; + $iterator = new NestedIterator($generator()); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->hasChildren(); + } + + $this->assertSame([false, true, false], $result); + } + + public function testGetChildrenReturnsNestedIterator() + { + $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; + $iterator = new NestedIterator($generator()); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->getChildren(); + } + + $this->assertSame(null, $result[0]); + $this->assertInstanceOf(NestedIterator::class, $result[1]); + $this->assertSame(null, $result[2]); + } + + public function testGetChildrenReturnsCorrectItems() + { + $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; + $iterator = new RecursiveIteratorIterator( + new NestedIterator($generator()) + ); + + $result = iterator_to_array($iterator, false); + + $this->assertSame([1, 2, 3], $result); + } +} From 3b3129bf14b330f08a81dda005237dc095684296 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 26 Oct 2022 23:21:02 +0200 Subject: [PATCH 12/38] advanceToKey() --- src/NestedIterator.php | 16 ++++++++++++++-- test/JsonMachineTest/NestedIteratorTest.php | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index ce81754..59f7165 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -5,6 +5,7 @@ namespace JsonMachine; use Iterator; +use JsonMachine\Exception\JsonMachineException; class NestedIterator implements \RecursiveIterator { @@ -58,11 +59,22 @@ public function getChildren() return $this->hasChildren() ? new self($this->current()) : null; } - public function advanceTo($key) + public function advanceToKey($key) { + $iterator = $this->iterator; + + while ($key !== $iterator->key() && $iterator->valid()) { + $iterator->next(); + } + + if ($key !== $iterator->key()) { + throw new JsonMachineException("Key '$key' was not found."); + } + + return $iterator->current(); } - public function materialize() + public function toArray() { } } diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 1fcbd0c..69ffbd4 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -72,4 +72,24 @@ public function testGetChildrenReturnsCorrectItems() $this->assertSame([1, 2, 3], $result); } + + public function testAdvanceToKeyWorks() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new NestedIterator($generator()); + + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(2, $iterator->advanceToKey('two')); + $this->assertSame(3, $iterator->advanceToKey('three')); + } + + public function testAdvanceToKeyThrows() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new NestedIterator($generator()); + + $this->expectExceptionMessage('not found'); + $iterator->advanceToKey('four'); + } } From 55b26de19d7276c37320faed24984b50a0bee74b Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 27 Oct 2022 14:56:12 +0200 Subject: [PATCH 13/38] toArray() --- src/NestedIterator.php | 16 ++++++++++++++- test/JsonMachineTest/NestedIteratorTest.php | 22 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index 59f7165..b132a4b 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -74,7 +74,21 @@ public function advanceToKey($key) return $iterator->current(); } - public function toArray() + public function toArray(): array { + return self::toArrayRecursive($this); + } + + private static function toArrayRecursive(\Traversable $traversable): array + { + $array = []; + foreach ($traversable as $key => $value) { + if ($value instanceof \Traversable) { + $value = self::toArrayRecursive($value); + } + $array[$key] = $value; + } + + return $array; } } diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 69ffbd4..891a4e3 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -4,6 +4,8 @@ namespace JsonMachineTest; +use Generator; +use Iterator; use JsonMachine\NestedIterator; use PHPUnit\Framework\TestCase; use RecursiveIteratorIterator; @@ -92,4 +94,24 @@ public function testAdvanceToKeyThrows() $this->expectExceptionMessage('not found'); $iterator->advanceToKey('four'); } + + public function testToArray() + { + $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; + $iterator = new NestedIterator($generator($generator(['42']))); + + $expected = [ + 'one' => 1, + 'two' => 2, + 'i' => [ + 'one' => 1, + 'two' => 2, + 'i' => ['42'], + 'three' => 3 + ], + 'three' => 3 + ]; + + $this->assertSame($expected, $iterator->toArray()); + } } From fd46d467bd2a54e2a3e00c55a7be6434dae56f50 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 30 Nov 2023 17:41:20 +0100 Subject: [PATCH 14/38] PHPStan fixes + testRecursiveIterationYieldsNestedIterator --- src/NestedIterator.php | 2 +- src/Parser.php | 20 ++++++++++++++++---- test/JsonMachineTest/NestedIteratorTest.php | 6 ++---- test/JsonMachineTest/ParserTest.php | 10 ++++++++++ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index b132a4b..a40b76f 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -26,7 +26,7 @@ public function current() #[\ReturnTypeWillChange] public function next() { - return $this->iterator->next(); + $this->iterator->next(); } #[\ReturnTypeWillChange] diff --git a/src/Parser.php b/src/Parser.php index fbc3595..d93085d 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -38,7 +38,7 @@ class Parser implements \IteratorAggregate, PositionAware /** @var Traversable */ private $tokens; - /** @var Iterator */ + /** @var Iterator */ private $tokensIterator; /** @var ItemDecoder */ @@ -64,7 +64,6 @@ class Parser implements \IteratorAggregate, PositionAware /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 - * @param bool $recursive * * @throws InvalidArgumentException */ @@ -73,7 +72,13 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; - $this->tokensIterator = $tokens instanceof IteratorAggregate ? $tokens->getIterator() : $tokens; + if ($tokens instanceof IteratorAggregate) { + $this->tokensIterator = $tokens->getIterator(); + } elseif ($tokens instanceof Iterator) { + $this->tokensIterator = $tokens; + } else { + throw new InvalidArgumentException('$tokens must be either an instance of Iterator or IteratorAggregate.'); + } $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); if ($recursive) { @@ -158,7 +163,14 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = (new self($this->remainingTokens(), '', $this->jsonDecoder, true))->getIterator(); + $jsonValue = new NestedIterator( + (new self( + $this->remainingTokens(), + '', + $this->jsonDecoder, + true + ))->getIterator() + ); $token = ' '; } else { $jsonValue .= $token; diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 891a4e3..faeffa9 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -4,8 +4,6 @@ namespace JsonMachineTest; -use Generator; -use Iterator; use JsonMachine\NestedIterator; use PHPUnit\Framework\TestCase; use RecursiveIteratorIterator; @@ -107,9 +105,9 @@ public function testToArray() 'one' => 1, 'two' => 2, 'i' => ['42'], - 'three' => 3 + 'three' => 3, ], - 'three' => 3 + 'three' => 3, ]; $this->assertSame($expected, $iterator->toArray()); diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 62983c4..b14b342 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -9,6 +9,7 @@ use JsonMachine\Exception\SyntaxErrorException; use JsonMachine\Exception\UnexpectedEndSyntaxErrorException; use JsonMachine\JsonDecoder\ExtJsonDecoder; +use JsonMachine\NestedIterator; use JsonMachine\Parser; use JsonMachine\StringChunks; use JsonMachine\Tokens; @@ -589,4 +590,13 @@ public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEnd $this->expectExceptionMessage('generator'); iterator_to_array($array[1]); } + + public function testRecursiveIterationYieldsNestedIterator() + { + $iterator = new Parser(new Tokens(['[[1]]']), '', null, true); + + foreach ($iterator as $item) { + $this->assertInstanceOf(NestedIterator::class, $item); + } + } } From 01fc4347027493cf7bf5ac4ae908a1fb60d985e7 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 30 Nov 2023 18:47:04 +0100 Subject: [PATCH 15/38] Readme update --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 65168d3..ee57db5 100644 --- a/README.md +++ b/README.md @@ -322,8 +322,8 @@ foreach ($fruits as $key => $value) { ### Recursive iteration (BETA) Recursive iteration can be enabled via `recursive` option set to `true`. -Every JSON iterable that JSON Machine encounters will then be yielded as a lazy instance of `Traversable`. -No JSON vector will be materialized and kept in memory. +Every JSON iterable that JSON Machine encounters will then be yielded as an instance of `NestedIterator`. +No JSON array or object will be materialized and kept in memory. The only PHP values you get materialized will be scalar values. Let's see an example with many, many users with many, many friends @@ -335,11 +335,10 @@ use JsonMachine\Items; $users = Items::fromFile('users.json', ['recursive' => true]); foreach ($users as $user) { // $user instanceof Traversable, not an array/object foreach ($user as $userField => $userValue) { - if ($userField == 'friends') { + if ($userField === 'friends') { foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object foreach ($friend as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object // do whatever you want here - // maybe rather use PHP's Recursive*Iterators } } } @@ -347,7 +346,8 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object } ``` -> If you skip iteration of such lazy deeper-level `Traversable` and advance to a next value, you will not be able to iterate it later. +> If you break an iteration of such lazy deeper-level (i.e. you skip some `"friends"` via `break`) +> and advance to a next value (i.e. next `user`), you will not be able to iterate it later. > JSON Machine must iterate it the background to be able to read next value. > Such an attempt will result in closed generator exception. @@ -555,8 +555,8 @@ See [Recursive iteration](#recursive). ### "I am still out of luck" -It probably means that a single JSON string itself is too big to fit in memory. -For example very big file encoded as base64. +It probably means that a single JSON scalar string itself is too big to fit in memory. +For example very big base64-encoded file. In that case you will probably be still out of luck until JSON Machine supports yielding of scalar values as PHP streams. From 754d3609c08de7c9a2d09df3187ed4454dde9eee Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 1 Dec 2023 13:21:20 +0100 Subject: [PATCH 16/38] RecursiveItems facade --- src/FacadeTrait.php | 168 +++++++++++++++++++++++++++++ src/Items.php | 154 +------------------------- src/RecursiveItems.php | 18 ++++ test/JsonMachineTest/ItemsTest.php | 14 +-- 4 files changed, 190 insertions(+), 164 deletions(-) create mode 100644 src/FacadeTrait.php create mode 100644 src/RecursiveItems.php diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php new file mode 100644 index 0000000..dbd8057 --- /dev/null +++ b/src/FacadeTrait.php @@ -0,0 +1,168 @@ +chunks = $bytesIterator; + $this->jsonPointer = $options['pointer']; + $this->jsonDecoder = $options['decoder']; + $this->debugEnabled = $options['debug']; + + if ($this->debugEnabled) { + $tokensClass = TokensWithDebugging::class; + } else { + $tokensClass = Tokens::class; + } + + $this->parser = new Parser( + new $tokensClass( + $this->chunks + ), + $this->jsonPointer, + $this->jsonDecoder ?: new ExtJsonDecoder(), + $this->recursive() + ); + } + + /** + * @param string $string + * + * @return self + * + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []) + { + return new self(new StringChunks($string), $options); + } + + /** + * @param string $file + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromFile($file, array $options = []) + { + return new self(new FileChunks($file), $options); + } + + /** + * @param resource $stream + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromStream($stream, array $options = []) + { + return new self(new StreamChunks($stream), $options); + } + + /** + * @param iterable $iterable + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []) + { + return new self($iterable, $options); + } + + /** + * @return \Generator + * + * @throws Exception\PathNotFoundException + */ + #[\ReturnTypeWillChange] + public function getIterator() + { + return $this->parser->getIterator(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getPosition() + { + return $this->parser->getPosition(); + } + + public function getJsonPointers(): array + { + return $this->parser->getJsonPointers(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getCurrentJsonPointer(): string + { + return $this->parser->getCurrentJsonPointer(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getMatchedJsonPointer(): string + { + return $this->parser->getMatchedJsonPointer(); + } + + /** + * @return bool + */ + public function isDebugEnabled() + { + return $this->debugEnabled; + } +} diff --git a/src/Items.php b/src/Items.php index 37f2d10..5749f8a 100644 --- a/src/Items.php +++ b/src/Items.php @@ -4,163 +4,15 @@ namespace JsonMachine; -use JsonMachine\Exception\InvalidArgumentException; -use JsonMachine\JsonDecoder\ExtJsonDecoder; -use JsonMachine\JsonDecoder\ItemDecoder; - /** * Entry-point facade for JSON Machine. */ final class Items implements \IteratorAggregate, PositionAware { - /** - * @var iterable - */ - private $chunks; - - /** - * @var string - */ - private $jsonPointer; - - /** - * @var ItemDecoder|null - */ - private $jsonDecoder; - - /** - * @var Parser - */ - private $parser; - - /** - * @var bool - */ - private $debugEnabled; - - /** - * @param iterable $bytesIterator - * - * @throws InvalidArgumentException - */ - public function __construct($bytesIterator, array $options = []) - { - $options = new ItemsOptions($options); - - $this->chunks = $bytesIterator; - $this->jsonPointer = $options['pointer']; - $this->jsonDecoder = $options['decoder']; - $this->debugEnabled = $options['debug']; - - if ($this->debugEnabled) { - $tokensClass = TokensWithDebugging::class; - } else { - $tokensClass = Tokens::class; - } - - $this->parser = new Parser( - new $tokensClass( - $this->chunks - ), - $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder(), - $options['recursive'] - ); - } - - /** - * @param string $string - * - * @return self - * - * @throws InvalidArgumentException - */ - public static function fromString($string, array $options = []) - { - return new self(new StringChunks($string), $options); - } - - /** - * @param string $file - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromFile($file, array $options = []) - { - return new self(new FileChunks($file), $options); - } - - /** - * @param resource $stream - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromStream($stream, array $options = []) - { - return new self(new StreamChunks($stream), $options); - } - - /** - * @param iterable $iterable - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromIterable($iterable, array $options = []) - { - return new self($iterable, $options); - } - - /** - * @return \Generator - * - * @throws Exception\PathNotFoundException - */ - #[\ReturnTypeWillChange] - public function getIterator() - { - return $this->parser->getIterator(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getPosition() - { - return $this->parser->getPosition(); - } - - public function getJsonPointers(): array - { - return $this->parser->getJsonPointers(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getCurrentJsonPointer(): string - { - return $this->parser->getCurrentJsonPointer(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getMatchedJsonPointer(): string - { - return $this->parser->getMatchedJsonPointer(); - } + use FacadeTrait; - /** - * @return bool - */ - public function isDebugEnabled() + protected function recursive(): bool { - return $this->debugEnabled; + return false; } } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php new file mode 100644 index 0000000..3d470ce --- /dev/null +++ b/src/RecursiveItems.php @@ -0,0 +1,18 @@ +assertSame(3, iterator_count($items)); } - - public function testRecursiveIteration() - { - $items = Items::fromString('[[":)"]]', ['recursive' => true]); - - foreach ($items as $emojis) { - $this->assertInstanceOf(Traversable::class, $emojis); - foreach ($emojis as $emoji) { - $this->assertSame(':)', $emoji); - } - } - } } From cf83311f16867ee198ba28f5727e38a46abdb009 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 26 Aug 2024 21:43:32 +0200 Subject: [PATCH 17/38] NestedIterator replaced with RecursiveItems --- src/FacadeTrait.php | 126 +++++-------------- src/Items.php | 65 +++++++++- src/Parser.php | 16 ++- src/RecursiveItems.php | 114 ++++++++++++++++- test/JsonMachineTest/ItemsTest.php | 1 - test/JsonMachineTest/NestedIteratorTest.php | 15 --- test/JsonMachineTest/ParserTest.php | 9 -- test/JsonMachineTest/RecursiveItemsTest.json | 1 + test/JsonMachineTest/RecursiveItemsTest.php | 75 +++++++++++ 9 files changed, 286 insertions(+), 136 deletions(-) create mode 100644 test/JsonMachineTest/RecursiveItemsTest.json create mode 100644 test/JsonMachineTest/RecursiveItemsTest.php diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php index dbd8057..412e984 100644 --- a/src/FacadeTrait.php +++ b/src/FacadeTrait.php @@ -10,21 +10,6 @@ trait FacadeTrait { - /** - * @var iterable - */ - private $chunks; - - /** - * @var string - */ - private $jsonPointer; - - /** - * @var ItemDecoder|null - */ - private $jsonDecoder; - /** * @var Parser */ @@ -35,100 +20,34 @@ trait FacadeTrait */ private $debugEnabled; - /** - * @todo Make private when PHP 7 stops being supported - */ - protected abstract function recursive(): bool; + public function isDebugEnabled(): bool + { + return $this->debugEnabled; + } /** * @param iterable $bytesIterator * * @throws InvalidArgumentException */ - public function __construct($bytesIterator, array $options = []) + private static function createParser($bytesIterator, ItemsOptions $options, bool $recursive): Parser { - $options = new ItemsOptions($options); - - $this->chunks = $bytesIterator; - $this->jsonPointer = $options['pointer']; - $this->jsonDecoder = $options['decoder']; - $this->debugEnabled = $options['debug']; - - if ($this->debugEnabled) { + if ($options['debug']) { $tokensClass = TokensWithDebugging::class; } else { $tokensClass = Tokens::class; } - $this->parser = new Parser( + return new Parser( new $tokensClass( - $this->chunks + $bytesIterator ), - $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder(), - $this->recursive() + $options['pointer'], + $options['decoder'] ?: new ExtJsonDecoder(), + $recursive ); } - /** - * @param string $string - * - * @return self - * - * @throws InvalidArgumentException - */ - public static function fromString($string, array $options = []) - { - return new self(new StringChunks($string), $options); - } - - /** - * @param string $file - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromFile($file, array $options = []) - { - return new self(new FileChunks($file), $options); - } - - /** - * @param resource $stream - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromStream($stream, array $options = []) - { - return new self(new StreamChunks($stream), $options); - } - - /** - * @param iterable $iterable - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromIterable($iterable, array $options = []) - { - return new self($iterable, $options); - } - - /** - * @return \Generator - * - * @throws Exception\PathNotFoundException - */ - #[\ReturnTypeWillChange] - public function getIterator() - { - return $this->parser->getIterator(); - } - /** * @throws Exception\JsonMachineException */ @@ -159,10 +78,23 @@ public function getMatchedJsonPointer(): string } /** - * @return bool + * @param string $string */ - public function isDebugEnabled() - { - return $this->debugEnabled; - } + abstract public static function fromString($string, array $options = []): self; + + /** + * @param string $file + */ + abstract public static function fromFile($file, array $options = []): self; + + /** + * @param resource $stream + */ + abstract public static function fromStream($stream, array $options = []): self; + + /** + * @param iterable $iterable + */ + abstract public static function fromIterable($iterable, array $options = []): self; + } diff --git a/src/Items.php b/src/Items.php index 5749f8a..2136223 100644 --- a/src/Items.php +++ b/src/Items.php @@ -4,6 +4,8 @@ namespace JsonMachine; +use JsonMachine\Exception\InvalidArgumentException; + /** * Entry-point facade for JSON Machine. */ @@ -11,8 +13,67 @@ final class Items implements \IteratorAggregate, PositionAware { use FacadeTrait; - protected function recursive(): bool + /** + * @param iterable $bytesIterator + * + * @throws InvalidArgumentException + */ + public function __construct($bytesIterator, array $options = []) + { + $options = new ItemsOptions($options); + $this->debugEnabled = $options['debug']; + + $this->parser = $this->createParser($bytesIterator, $options, false); + } + + /** + * @param string $string + * + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []): self + { + return new self(new StringChunks($string), $options); + } + + /** + * @param string $file + * + * @throws Exception\InvalidArgumentException + */ + public static function fromFile($file, array $options = []): self + { + return new self(new FileChunks($file), $options); + } + + /** + * @param resource $stream + * + * @throws Exception\InvalidArgumentException + */ + public static function fromStream($stream, array $options = []): self + { + return new self(new StreamChunks($stream), $options); + } + + /** + * @param iterable $iterable + * + * @throws Exception\InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []): self + { + return new self($iterable, $options); + } + + /** + * @return \Generator + * + * @throws Exception\PathNotFoundException + */ + #[\ReturnTypeWillChange] + public function getIterator() { - return false; + return $this->parser->getIterator(); } } diff --git a/src/Parser.php b/src/Parser.php index d93085d..80691aa 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -163,14 +163,12 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = new NestedIterator( - (new self( - $this->remainingTokens(), - '', - $this->jsonDecoder, - true - ))->getIterator() - ); + $jsonValue = (new self( + $this->remainingTokens(), + '', + $this->jsonDecoder, + true + ))->$this->getIterator(); $token = ' '; } else { $jsonValue .= $token; @@ -400,7 +398,7 @@ public function getCurrentJsonPointer(): string */ public function getMatchedJsonPointer(): string { - if ($this->matchedJsonPointer === null) { + if ($this->isOutsideGenerator()) { throw new JsonMachineException(__METHOD__.' must be called inside a loop'); } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index 3d470ce..f8e1325 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -4,15 +4,123 @@ namespace JsonMachine; +use Iterator; +use JsonMachine\Exception\InvalidArgumentException; + /** * Entry-point facade for recursive iteration. */ -final class RecursiveItems implements \IteratorAggregate, PositionAware +final class RecursiveItems implements \RecursiveIterator, PositionAware { use FacadeTrait; - protected function recursive(): bool + /** @var Parser */ + private $parser; + + /** @var ItemsOptions */ + private $options; + + /** @var Iterator */ + private $parserIterator; + + public function __construct(Parser $parser, ItemsOptions $options) + { + $this->parser = $parser; + $this->options = $options; + $this->debugEnabled = $options['debug']; + } + + /** + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser(new StringChunks($string), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromFile($file, array $options = []): self { - return true; + $options = new ItemsOptions($options); + return new self( + self::createParser(new FileChunks($file), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromStream($stream, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser(new StreamChunks($stream), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser($iterable, $options, true), + $options + ); + } + + public function current() + { + $current = $this->parserIterator->current(); + if ($current instanceof Parser) { + return new self($current, $this->options); + } + + return $current; + } + + public function next() + { + $this->parserIterator->next(); + } + + public function key() + { + return $this->parserIterator->key(); + } + + public function valid(): bool + { + return $this->parserIterator->valid(); + } + + public function rewind() + { + $this->parserIterator = $this->parser->getIterator(); + $this->parserIterator->rewind(); + } + + public function hasChildren(): bool + { + return $this->current() instanceof self; + } + + public function getChildren() + { + $current = $this->current(); + if ($current instanceof self) { + return $current; + } + + return null; } } diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index b68668a..9c4ab18 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -9,7 +9,6 @@ /** * @covers \JsonMachine\Items - * @covers \JsonMachine\RecursiveItems */ class ItemsTest extends \PHPUnit_Framework_TestCase { diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index faeffa9..e053505 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -46,21 +46,6 @@ public function testHasChildrenFollowsIterators() $this->assertSame([false, true, false], $result); } - public function testGetChildrenReturnsNestedIterator() - { - $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; - $iterator = new NestedIterator($generator()); - - $result = []; - foreach ($iterator as $item) { - $result[] = $iterator->getChildren(); - } - - $this->assertSame(null, $result[0]); - $this->assertInstanceOf(NestedIterator::class, $result[1]); - $this->assertSame(null, $result[2]); - } - public function testGetChildrenReturnsCorrectItems() { $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index b14b342..6a7d117 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -590,13 +590,4 @@ public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEnd $this->expectExceptionMessage('generator'); iterator_to_array($array[1]); } - - public function testRecursiveIterationYieldsNestedIterator() - { - $iterator = new Parser(new Tokens(['[[1]]']), '', null, true); - - foreach ($iterator as $item) { - $this->assertInstanceOf(NestedIterator::class, $item); - } - } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.json b/test/JsonMachineTest/RecursiveItemsTest.json new file mode 100644 index 0000000..bfb8d7b --- /dev/null +++ b/test/JsonMachineTest/RecursiveItemsTest.json @@ -0,0 +1 @@ +{"path": {"key":["value"]}} diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php new file mode 100644 index 0000000..2cf678c --- /dev/null +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -0,0 +1,75 @@ + $args[1], + 'decoder' => $args[2], + 'debug' => $args[3], + ], + ]); + $this->assertSame($expected, iterator_to_array($iterator)); + } + + public function data_testFactories() + { + foreach ([true, false] as $debug) { + foreach ([ + [RecursiveItems::class, 'fromStream', fopen('data://text/plain,{"path": {"key":["value"]}}', 'r'), '/path', null, $debug], + [RecursiveItems::class, 'fromString', '{"path": {"key":["value"]}}', '/path', null, $debug], + [RecursiveItems::class, 'fromFile', __DIR__.'/RecursiveItemsTest.json', '/path', null, $debug], + [RecursiveItems::class, 'fromIterable', ['{"path": {"key', '":["value"]}}'], '/path', null, $debug], + [RecursiveItems::class, 'fromIterable', new \ArrayIterator(['{"path": {"key', '":["value"]}}']), '/path', null, $debug], + ] as $case) { + yield $case; + } + } + } + + public function testRecursiveIteration() + { + $items = RecursiveItems::fromString('[[":)"]]'); + + foreach ($items as $emojis) { + $this->assertInstanceOf(RecursiveItems::class, $emojis); + foreach ($emojis as $emoji) { + $this->assertSame(':)', $emoji); + } + } + } + + public function testGetChildrenReturnsNestedIterator() + { + $iterator = RecursiveItems::fromString("[1,[],1]"); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->getChildren(); + } + + $this->assertSame(null, $result[0]); + $this->assertInstanceOf(RecursiveItems::class, $result[1]); + $this->assertSame(null, $result[2]); + } + + public function testCurrentReturnsSameInstanceOfParser() + { + + } +} From ef546c57a8b9651d99cbdb64108527c1307d2164 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Tue, 19 Nov 2024 14:33:05 +0100 Subject: [PATCH 18/38] Merged master into recursive --- CHANGELOG.md | 25 +++++++---- Makefile | 5 +-- README.md | 2 +- build/build-image.sh | 2 +- build/composer-lt-7.2.json | 20 --------- build/composer-update.sh | 10 +---- composer.json | 4 +- src/Exception/UnexpectedValueException.php | 9 ++++ src/FacadeTrait.php | 2 - src/ItemsOptions.php | 41 ++++++++++++++++++- src/Parser.php | 13 +++--- src/RecursiveItems.php | 4 ++ .../Exception/SyntaxErrorExceptionTest.php | 10 ++++- test/JsonMachineTest/ItemsOptionsTest.php | 6 +++ test/JsonMachineTest/ParserTest.php | 39 +++++++++++++++++- test/JsonMachineTest/RecursiveItemsTest.php | 17 ++++---- 16 files changed, 144 insertions(+), 65 deletions(-) delete mode 100644 build/composer-lt-7.2.json create mode 100644 src/Exception/UnexpectedValueException.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d42b4b..f643e6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## master ### Added +- Support for PHP 8.4 +- Exception on misspelled option name suggests a correct one. - Recursive iteration via `recursive` option. +### Fixed +- Wrong key when combining list and scalar value pointers (#110). Thanks [@daniel-sc](https://github.com/daniel-sc) +### Removed +- Removed support for PHP 7.0, 7.1 +### Added
@@ -19,13 +26,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for PHP 8.3 - Added PHPStan to build pipeline ### Fixed -- Fixed the case when non-intersecting pointers were considered intersecting (#106). Thanks @XedinUnknown +- Fixed the case when non-intersecting pointers were considered intersecting (#106). Thanks [@XedinUnknown](https://github.com/XedinUnknown)
## 1.1.3 - 2022-10-12 ### Fixed -- Fix the parsing of nested sub-trees that use wildcards (#83). Thanks @cerbero90 +- Fix the parsing of nested sub-trees that use wildcards (#83). Thanks [@cerbero90](https://github.com/cerbero90)
@@ -47,7 +54,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## 1.1.0 - 2022-02-19 ### Added -- Autoloading without Composer. Thanks @a-sync. +- Autoloading without Composer. Thanks [@a-sync](https://github.com/a-sync).
@@ -89,9 +96,9 @@ They are not used anymore as described in previous point. - `DebugLexer` renamed to `TokensWithDebugging` ### Added -- Multiple JSON Pointers can be specified as an array in `pointer` option. See README. Thanks @fwolfsjaeger. +- Multiple JSON Pointers can be specified as an array in `pointer` option. See README. Thanks [@fwolfsjaeger](https://github.com/fwolfsjaeger). - New methods available during iteration: `Items::getCurrentJsonPointer()` and `Items::getMatchedJsonPointer()` -to track where you are. See README. Thanks @fwolfsjaeger. +to track where you are. See README. Thanks [@fwolfsjaeger](https://github.com/fwolfsjaeger). ### Fixed - Incorrect position information of `TokensWithDebugging::getPosition()`. Was constantly off by 1-2 bytes. @@ -128,7 +135,7 @@ https://stackoverflow.com/questions/63706550 ## 0.7.0 ### New features -- Use a `-` in json pointer as a wildcard for an array index. Example: `/users/-/id`. Thanks @cerbero90 +- Use a `-` in json pointer as a wildcard for an array index. Example: `/users/-/id`. Thanks [@cerbero90](https://github.com/cerbero90)
@@ -174,12 +181,12 @@ unless you use them directly for some reason. ## 0.4.0 ### New features - [Decoders](README.md#decoders) -- PHP 8 support (thanks @snapshotpl) +- PHP 8 support (thanks [@snapshotpl](https://github.com/snapshotpl)) ### BC breaks - `ext-json` is not required in `composer.json` anymore, because custom decoder might not need it. However **built-in decoders depend on it** so it must be present if you use them. -- All exceptions now extend `JsonMachineException` (thanks @gabimem) -- Throws `UnexpectedEndSyntaxErrorException` on an unexpected end of JSON structure (thanks @gabimem) +- All exceptions now extend `JsonMachineException` (thanks [@gabimem](https://github.com/gabimem)) +- Throws `UnexpectedEndSyntaxErrorException` on an unexpected end of JSON structure (thanks [@gabimem](https://github.com/gabimem)) - Function `httpClientChunks()` is **deprecated** so that compatibility with Symfony HttpClient is not on the shoulders of JSON Machine maintainer. The code is simple and everyone can make their own function and maintain it. The code was moved to [examples](examples/symfonyHttpClient.php). diff --git a/Makefile b/Makefile index a4f6d5d..27afa1d 100644 --- a/Makefile +++ b/Makefile @@ -5,15 +5,14 @@ LATEST_PHP := 8.0 3.1.1 COVERAGE_PHP := 7.4 3.1.1 define PHP_VERSIONS -"7.0 2.7.2"\ -"7.1 2.9.8"\ "7.2 3.1.1"\ "7.3 3.1.1"\ "7.4 3.1.1"\ "8.0 3.1.1"\ "8.1 3.1.1"\ "8.2 3.2.0"\ -"8.3-rc 3.3.0alpha3" +"8.3 3.3.2"\ +"8.4-rc 3.4.0beta1" endef define DOCKER_RUN diff --git a/README.md b/README.md index ee57db5..64c4b4e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # JSON Machine Very easy to use and memory efficient drop-in replacement for inefficient iteration of big JSON files or streams -for PHP >=7.0. See [TL;DR](#tl-dr). No dependencies in production except optional `ext-json`. README in sync with the code +for PHP >=7.2. See [TL;DR](#tl-dr). No dependencies in production except optional `ext-json`. README in sync with the code [![Build Status](https://github.com/halaxa/json-machine/actions/workflows/makefile.yml/badge.svg)](https://github.com/halaxa/json-machine/actions) [![codecov](https://img.shields.io/codecov/c/gh/halaxa/json-machine?label=phpunit%20%40covers)](https://codecov.io/gh/halaxa/json-machine) diff --git a/build/build-image.sh b/build/build-image.sh index bef1785..f541f8f 100755 --- a/build/build-image.sh +++ b/build/build-image.sh @@ -37,7 +37,7 @@ printf " && pecl install xdebug-$XDEBUG_VERSION \ && docker-php-ext-enable xdebug \ && docker-php-ext-enable opcache \ - && wget https://getcomposer.org/download/2.2.18/composer.phar -O /usr/local/bin/composer \ + && wget https://getcomposer.org/download/2.8.1/composer.phar -O /usr/local/bin/composer \ && chmod +x /usr/local/bin/composer " | docker build --quiet --tag "$CONTAINER_NAME" - > /dev/null diff --git a/build/composer-lt-7.2.json b/build/composer-lt-7.2.json deleted file mode 100644 index 6d6ba1f..0000000 --- a/build/composer-lt-7.2.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "name": "halaxa/json-machine", - "config": { - "lock": false, - "sort-packages": true - }, - "require": { - "php": "<7.2" - }, - "require-dev": { - "ext-json": "*", - "phpunit/phpunit": "^5.0" - }, - "autoload": { - "psr-4": {"JsonMachine\\": "src/"} - }, - "autoload-dev": { - "psr-4": {"JsonMachineTest\\": "test/JsonMachineTest"} - } -} diff --git a/build/composer-update.sh b/build/composer-update.sh index 77da018..fe46c0b 100755 --- a/build/composer-update.sh +++ b/build/composer-update.sh @@ -1,10 +1,4 @@ #!/usr/bin/env sh -if [ $(php -r "echo PHP_VERSION_ID;") -lt 70200 ] -then - set -x - COMPOSER=build/composer-lt-7.2.json composer --quiet update -else - set -x - composer --quiet update -fi +set -x +composer --quiet update diff --git a/composer.json b/composer.json index aba62e7..03002ea 100644 --- a/composer.json +++ b/composer.json @@ -21,12 +21,12 @@ "sort-packages": true }, "require": { - "php": "7.0 - 8.3" + "php": "7.2 - 8.4" }, "require-dev": { "ext-json": "*", "friendsofphp/php-cs-fixer": "^3.0", - "phpstan/phpstan": "^1.10", + "phpstan/phpstan": "^2.0", "phpunit/phpunit": "^8.0" }, "suggest": { diff --git a/src/Exception/UnexpectedValueException.php b/src/Exception/UnexpectedValueException.php new file mode 100644 index 0000000..8c23554 --- /dev/null +++ b/src/Exception/UnexpectedValueException.php @@ -0,0 +1,9 @@ + $optionValue) { if ( ! isset(self::defaultOptions()[$optionName])) { - throw new InvalidArgumentException("Option '$optionName' does not exist."); + $exceptionMessage = "Option '$optionName' does not exist."; + $suggestion = self::getSuggestion(array_keys(self::defaultOptions()), $optionName); + if ($suggestion) { + $exceptionMessage .= " Did you mean '$suggestion'?"; + } + throw new InvalidArgumentException($exceptionMessage); } $this->options[$optionName] = $this->{"opt_$optionName"}($optionValue); } @@ -51,15 +56,17 @@ private function validateOptions(array $options) private function opt_pointer($pointer) { if (is_array($pointer)) { + /** @phpstan-ignore expr.resultUnused */ (function (string ...$p) {})(...$pointer); } else { + /** @phpstan-ignore expr.resultUnused */ (function (string $p) {})($pointer); } return $pointer; } - private function opt_decoder(ItemDecoder $decoder = null) + private function opt_decoder(?ItemDecoder $decoder = null) { return $decoder; } @@ -83,4 +90,34 @@ public static function defaultOptions(): array 'recursive' => false, ]; } + + /** + * From Nette ObjectHelpers. + * + * @see https://github.com/nette/utils/blob/master/src/Utils/ObjectHelpers.php + * + * Finds the best suggestion (for 8-bit encoding). + * + * @param (\ReflectionFunctionAbstract|\ReflectionParameter|\ReflectionClass|\ReflectionProperty|string)[] $possibilities + * + * @internal + */ + private static function getSuggestion(array $possibilities, string $value): ?string + { + $norm = preg_replace($re = '#^(get|set|has|is|add)(?=[A-Z])#', '+', $value); + $best = null; + $min = (strlen($value) / 4 + 1) * 10 + .1; + foreach (array_unique($possibilities, SORT_REGULAR) as $item) { + $item = $item instanceof \Reflector ? $item->name : $item; + if ($item !== $value && ( + ($len = levenshtein($item, $value, 10, 11, 10)) < $min + || ($len = levenshtein(preg_replace($re, '*', $item), $norm, 10, 11, 10)) < $min + )) { + $min = $len; + $best = $item; + } + } + + return $best; + } } diff --git a/src/Parser.php b/src/Parser.php index 80691aa..dd7cff0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -64,10 +64,11 @@ class Parser implements \IteratorAggregate, PositionAware /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 + * @param ?ItemDecoder $jsonDecoder * * @throws InvalidArgumentException */ - public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonDecoder = null, $recursive = false) + public function __construct(Traversable $tokens, $jsonPointer = '', ?ItemDecoder $jsonDecoder = null, $recursive = false) { $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); @@ -133,9 +134,11 @@ public function getIterator() $currentPathChanged = false; $jsonPointerPath = $this->getMatchingJsonPointerPath(); $iteratorLevel = count($jsonPointerPath); + $iteratorStruct = null; } $tokenType = $tokenTypes[$token[0]]; if (0 == ($tokenType & $expectedType)) { + var_dump($expectedType); $this->error('Unexpected symbol', $token); } $isValue = ($tokenType | 23) == 23; // 23 = self::ANY_VALUE @@ -163,13 +166,13 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = (new self( + $jsonValue = new self( $this->remainingTokens(), '', $this->jsonDecoder, true - ))->$this->getIterator(); - $token = ' '; + ); +// $token = ' '; } else { $jsonValue .= $token; } @@ -398,7 +401,7 @@ public function getCurrentJsonPointer(): string */ public function getMatchedJsonPointer(): string { - if ($this->isOutsideGenerator()) { + if ($this->matchedJsonPointer === null) { throw new JsonMachineException(__METHOD__.' must be called inside a loop'); } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index f8e1325..446a4c7 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -36,6 +36,7 @@ public function __construct(Parser $parser, ItemsOptions $options) public static function fromString($string, array $options = []): self { $options = new ItemsOptions($options); + return new self( self::createParser(new StringChunks($string), $options, true), $options @@ -48,6 +49,7 @@ public static function fromString($string, array $options = []): self public static function fromFile($file, array $options = []): self { $options = new ItemsOptions($options); + return new self( self::createParser(new FileChunks($file), $options, true), $options @@ -60,6 +62,7 @@ public static function fromFile($file, array $options = []): self public static function fromStream($stream, array $options = []): self { $options = new ItemsOptions($options); + return new self( self::createParser(new StreamChunks($stream), $options, true), $options @@ -72,6 +75,7 @@ public static function fromStream($stream, array $options = []): self public static function fromIterable($iterable, array $options = []): self { $options = new ItemsOptions($options); + return new self( self::createParser($iterable, $options, true), $options diff --git a/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php b/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php index 2be2902..681f150 100644 --- a/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php +++ b/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php @@ -16,7 +16,13 @@ public function testMessageContainsDataFromConstructor() { $exception = new SyntaxErrorException('msg 42', 24); - $this->assertContains('msg 42', $exception->getMessage()); - $this->assertContains('24', $exception->getMessage()); + $assertMethod = 'assertContains'; + /* @phpstan-ignore function.alreadyNarrowedType */ + if (method_exists($this, 'assertStringContainsString')) { + $assertMethod = 'assertStringContainsString'; + } + + $this->$assertMethod('msg 42', $exception->getMessage()); + $this->$assertMethod('24', $exception->getMessage()); } } diff --git a/test/JsonMachineTest/ItemsOptionsTest.php b/test/JsonMachineTest/ItemsOptionsTest.php index 3a4434e..3d110c6 100644 --- a/test/JsonMachineTest/ItemsOptionsTest.php +++ b/test/JsonMachineTest/ItemsOptionsTest.php @@ -63,6 +63,12 @@ public function testThrowsOnUnknownOption() new ItemsOptions(['invalid_option_name' => 'value']); } + + public function testSuggestsCorrectOption() + { + $this->expectExceptionMessage("'debug'"); + new ItemsOptions(['degub' => true]); + } } class InvalidValue diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 6a7d117..ccfd67f 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -9,7 +9,6 @@ use JsonMachine\Exception\SyntaxErrorException; use JsonMachine\Exception\UnexpectedEndSyntaxErrorException; use JsonMachine\JsonDecoder\ExtJsonDecoder; -use JsonMachine\NestedIterator; use JsonMachine\Parser; use JsonMachine\StringChunks; use JsonMachine\Tokens; @@ -109,6 +108,44 @@ public function data_testSyntax() ['id' => '2'], ], ], + 'ISSUE-110-vector-first' => [ + ['/items', '/total'], + '{ + "items": [ + ["test1"], + ["test2"] + ], + "total": 2 + }', + [ + [0 => ['test1']], + [1 => ['test2']], + ['total' => 2], + ], + ], + 'ISSUE-110-scalar-first' => [ + ['/items', '/total'], + '{ + "total": 2, + "items": [ + ["test1"], + ["test2"] + ] + }', + [ + ['total' => 2], + [0 => ['test1']], + [1 => ['test2']], + ], + ], + 'ISSUE-100' => [ + ['/results/-/color'], + '{"results":[{"name":"apple","color":"red"},{"name":"pear","color":"yellow"}]}', + [ + ['color' => 'red'], + ['color' => 'yellow'], + ], + ], ]; } diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index 2cf678c..5a11498 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -14,7 +14,7 @@ class RecursiveItemsTest extends \PHPUnit_Framework_TestCase /** * @dataProvider data_testFactories */ - public function testFactories($expected, $methodName, ...$args) + public function testFactories($methodName, ...$args) { $iterator = call_user_func_array(RecursiveItems::class."::$methodName", [ $args[0], @@ -24,18 +24,18 @@ public function testFactories($expected, $methodName, ...$args) 'debug' => $args[3], ], ]); - $this->assertSame($expected, iterator_to_array($iterator)); + $this->assertInstanceOf(RecursiveItems::class, $iterator); } public function data_testFactories() { foreach ([true, false] as $debug) { foreach ([ - [RecursiveItems::class, 'fromStream', fopen('data://text/plain,{"path": {"key":["value"]}}', 'r'), '/path', null, $debug], - [RecursiveItems::class, 'fromString', '{"path": {"key":["value"]}}', '/path', null, $debug], - [RecursiveItems::class, 'fromFile', __DIR__.'/RecursiveItemsTest.json', '/path', null, $debug], - [RecursiveItems::class, 'fromIterable', ['{"path": {"key', '":["value"]}}'], '/path', null, $debug], - [RecursiveItems::class, 'fromIterable', new \ArrayIterator(['{"path": {"key', '":["value"]}}']), '/path', null, $debug], + ['fromStream', fopen('data://text/plain,{"path": {"key":["value"]}}', 'r'), '/path', null, $debug], + ['fromString', '{"path": {"key":["value"]}}', '/path', null, $debug], + ['fromFile', __DIR__.'/RecursiveItemsTest.json', '/path', null, $debug], + ['fromIterable', ['{"path": {"key', '":["value"]}}'], '/path', null, $debug], + ['fromIterable', new \ArrayIterator(['{"path": {"key', '":["value"]}}']), '/path', null, $debug], ] as $case) { yield $case; } @@ -56,7 +56,7 @@ public function testRecursiveIteration() public function testGetChildrenReturnsNestedIterator() { - $iterator = RecursiveItems::fromString("[1,[],1]"); + $iterator = RecursiveItems::fromString('[1,[],1]'); $result = []; foreach ($iterator as $item) { @@ -70,6 +70,5 @@ public function testGetChildrenReturnsNestedIterator() public function testCurrentReturnsSameInstanceOfParser() { - } } From 8da949f2ad31efeb6a00ff93372a0ca7b5716c2c Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 20 Nov 2024 11:43:19 +0100 Subject: [PATCH 19/38] Fixed failing testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser --- src/Parser.php | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index dd7cff0..b9c37b0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -41,6 +41,9 @@ class Parser implements \IteratorAggregate, PositionAware /** @var Iterator */ private $tokensIterator; + /** @var Generator */ + private $generator; + /** @var ItemDecoder */ private $jsonDecoder; @@ -98,13 +101,21 @@ private function buildPaths(array $jsonPointers): array }, $jsonPointers); } + #[\ReturnTypeWillChange] + public function getIterator(): Generator + { + if ( ! $this->generator) { + $this->generator = $this->createGenerator(); + } + + return $this->generator; + } + /** - * @return Generator - * + * @throws InvalidArgumentException * @throws PathNotFoundException */ - #[\ReturnTypeWillChange] - public function getIterator() + private function createGenerator(): Generator { $tokenTypes = $this->tokenTypes(); @@ -138,7 +149,6 @@ public function getIterator() } $tokenType = $tokenTypes[$token[0]]; if (0 == ($tokenType & $expectedType)) { - var_dump($expectedType); $this->error('Unexpected symbol', $token); } $isValue = ($tokenType | 23) == 23; // 23 = self::ANY_VALUE @@ -172,7 +182,7 @@ public function getIterator() $this->jsonDecoder, true ); -// $token = ' '; + $token = ' '; } else { $jsonValue .= $token; } @@ -266,8 +276,8 @@ public function getIterator() yield $keyResult->getValue() => $value; unset($keyResult); } - if ($value instanceof Iterator && $value->valid()) { - $this->eatAllRemainingTokens($value); + if ($value instanceof Parser) { + $value->ensureIterationComplete(); } unset($valueResult); unset($value); @@ -316,12 +326,14 @@ private function remainingTokens() } } - private function eatAllRemainingTokens(Iterator $iterator) + public function ensureIterationComplete(): void { - foreach ($iterator as $item) { - if ($item instanceof Iterator) { - $this->eatAllRemainingTokens($item); - } + $generator = $this->getIterator(); + + while ($generator->valid()) { + $generator->key(); + $generator->current(); + $generator->next(); } } From 610a127390a0e6a0c5d00df8d5f1a6c75662c310 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 20 Nov 2024 13:03:43 +0100 Subject: [PATCH 20/38] Removed empty test --- test/JsonMachineTest/ParserTest.php | 17 +++++++++++++++-- test/JsonMachineTest/RecursiveItemsTest.php | 4 ---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index ccfd67f..c79dfe7 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -611,9 +611,12 @@ public function testZigZagRecursiveIteration() $this->assertSame($valuesToVisit, $valuesVisited); } - public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser() + /** + * @dataProvider data_testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser + */ + public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser(string $json) { - $iterator = new Parser(new Tokens(['[1,[{},2,3],4]']), '', null, true); + $iterator = new Parser(new Tokens([$json]), '', null, true); $array = []; foreach ($iterator as $item) { @@ -627,4 +630,14 @@ public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEnd $this->expectExceptionMessage('generator'); iterator_to_array($array[1]); } + + public function data_testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser() + { + return [ + ['[1,[{},2,3],4]'], + ['[1,[[],2,3],4]'], + ['[1,[{"key": "value"},2,3],4]'], + ['[1,[[null, true, "string"],2,3],4]'], + ]; + } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index 5a11498..7bd1d1c 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -67,8 +67,4 @@ public function testGetChildrenReturnsNestedIterator() $this->assertInstanceOf(RecursiveItems::class, $result[1]); $this->assertSame(null, $result[2]); } - - public function testCurrentReturnsSameInstanceOfParser() - { - } } From 8dd061cfb310da4ec387318ba9c64c7f62081f88 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 20 Nov 2024 13:16:45 +0100 Subject: [PATCH 21/38] Code hack fixed --- src/Parser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index b9c37b0..3c51669 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -129,7 +129,7 @@ private function createGenerator(): Generator $jsonValue = ''; $key = null; $objectKeyExpected = false; - $inObject = true; // hack to make "!$inObject" in first iteration work. Better code structure? + $inObject = null; $expectedType = self::OBJECT_START | self::ARRAY_START; $subtreeEnded = false; $token = null; @@ -152,7 +152,7 @@ private function createGenerator(): Generator $this->error('Unexpected symbol', $token); } $isValue = ($tokenType | 23) == 23; // 23 = self::ANY_VALUE - if ( ! $inObject && $isValue && $currentLevel < $iteratorLevel) { + if ($inObject === false && $isValue && $currentLevel < $iteratorLevel) { $currentPathChanged = ! $this->hasSingleJsonPointer; $currentPath[$currentLevel] = isset($currentPath[$currentLevel]) ? $currentPath[$currentLevel] + 1 : 0; $currentPathWildcard[$currentLevel] = preg_match('/^(?:\d+|-)$/S', $jsonPointerPath[$currentLevel]) ? '-' : $currentPath[$currentLevel]; From 85aeb9ae91b5e228d37255451ff77949bb2fdb2d Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 20 Nov 2024 21:55:21 +0100 Subject: [PATCH 22/38] Parser::getPosition() works inside nested collections --- src/Parser.php | 10 +++---- src/ResumableIteratorAggregateProxy.php | 37 +++++++++++++++++++++++++ src/Tokens.php | 15 ++++++++++ test/JsonMachineTest/ParserTest.php | 18 +++++++++++- 4 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 src/ResumableIteratorAggregateProxy.php diff --git a/src/Parser.php b/src/Parser.php index 3c51669..0684080 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -177,7 +177,7 @@ private function createGenerator(): Generator ) { if ($this->recursive && ($token == '{' || $token == '[')) { $jsonValue = new self( - $this->remainingTokens(), + new ResumableIteratorAggregateProxy($this->tokens), // could single shared instance work? '', $this->jsonDecoder, true @@ -434,17 +434,15 @@ private function error($msg, $token, $exception = SyntaxErrorException::class) } /** + * Returns JSON bytes read so far + * * @return int * * @throws JsonMachineException */ public function getPosition() { - if ($this->tokens instanceof PositionAware) { - return $this->tokens->getPosition(); - } - - throw new JsonMachineException('Provided tokens iterable must implement PositionAware to call getPosition on it.'); + return $this->tokens->getPosition(); } private static function jsonPointerToPath(string $jsonPointer): array diff --git a/src/ResumableIteratorAggregateProxy.php b/src/ResumableIteratorAggregateProxy.php new file mode 100644 index 0000000..844e2b6 --- /dev/null +++ b/src/ResumableIteratorAggregateProxy.php @@ -0,0 +1,37 @@ +iteratorAggregate = $iteratorAggregate; + } + + public function getIterator() + { + $iterator = $this->iteratorAggregate->getIterator(); + while ($iterator->valid()) { + yield $iterator->key() => $iterator->current(); + $iterator->next(); + } + } + + public function __call($name, $arguments) + { + return $this->iteratorAggregate->$name(...$arguments); + } +} diff --git a/src/Tokens.php b/src/Tokens.php index 2ba96c8..74057cd 100644 --- a/src/Tokens.php +++ b/src/Tokens.php @@ -5,12 +5,17 @@ namespace JsonMachine; use Generator; +use JsonMachine\Exception\InvalidArgumentException; +use JsonMachine\Exception\PathNotFoundException; class Tokens implements \IteratorAggregate, PositionAware { /** @var iterable */ private $jsonChunks; + /** @var Generator */ + private $generator; + /** * @param iterable $jsonChunks */ @@ -24,6 +29,16 @@ public function __construct($jsonChunks) */ #[\ReturnTypeWillChange] public function getIterator() + { + if ( ! $this->generator) { + $this->generator = $this->createGenerator(); + } + + return $this->generator; + } + + + private function createGenerator(): Generator { $insignificantBytes = $this->insignificantBytes(); $tokenBoundaries = $this->tokenBoundaries(); diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index c79dfe7..524ad52 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -4,6 +4,7 @@ namespace JsonMachineTest; +use Error; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; use JsonMachine\Exception\SyntaxErrorException; @@ -557,7 +558,7 @@ public function testGetPositionThrowsIfTokensDoNotSupportGetPosition() { $parser = new Parser(new \ArrayObject()); - $this->expectException(JsonMachineException::class); + $this->expectException(Error::class); $parser->getPosition(); } @@ -640,4 +641,19 @@ public function data_testRecursiveParserDoesNotRequireChildParserToBeIteratedToT ['[1,[[null, true, "string"],2,3],4]'], ]; } + + public function testGetPositionWorksInsideRecursion() + { + $parser = new Parser( + new Tokens(new \ArrayIterator(["[[11,12]]"])), + "", + null, + true + ); + + foreach ($parser as $item) { + /** @var $item Parser */ + $this->assertSame(0, $item->getPosition()); + } + } } From f8fad15425a80da300134de62c8aeda7b5c87e0f Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 21 Nov 2024 12:18:18 +0100 Subject: [PATCH 23/38] wip --- src/FacadeTrait.php | 23 +----- src/Items.php | 21 +++++ src/RecursiveItems.php | 71 +++++++++++++++- test/JsonMachineTest/NestedIteratorTest.php | 51 ------------ test/JsonMachineTest/RecursiveItemsTest.php | 90 +++++++++++++++++++++ 5 files changed, 180 insertions(+), 76 deletions(-) diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php index 81c504d..2ab05a9 100644 --- a/src/FacadeTrait.php +++ b/src/FacadeTrait.php @@ -29,7 +29,7 @@ public function isDebugEnabled(): bool * * @throws InvalidArgumentException */ - private static function createParser($bytesIterator, ItemsOptions $options, bool $recursive): Parser + private static function createParser(iterable $bytesIterator, ItemsOptions $options, bool $recursive): Parser { if ($options['debug']) { $tokensClass = TokensWithDebugging::class; @@ -55,27 +55,6 @@ public function getPosition() return $this->parser->getPosition(); } - public function getJsonPointers(): array - { - return $this->parser->getJsonPointers(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getCurrentJsonPointer(): string - { - return $this->parser->getCurrentJsonPointer(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getMatchedJsonPointer(): string - { - return $this->parser->getMatchedJsonPointer(); - } - /** * @param string $string */ diff --git a/src/Items.php b/src/Items.php index 2136223..20cd832 100644 --- a/src/Items.php +++ b/src/Items.php @@ -76,4 +76,25 @@ public function getIterator() { return $this->parser->getIterator(); } + + public function getJsonPointers(): array + { + return $this->parser->getJsonPointers(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getCurrentJsonPointer(): string + { + return $this->parser->getCurrentJsonPointer(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getMatchedJsonPointer(): string + { + return $this->parser->getMatchedJsonPointer(); + } } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index 446a4c7..6c98f88 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -5,7 +5,10 @@ namespace JsonMachine; use Iterator; +use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; +use JsonMachine\Exception\JsonMachineException; +use LogicException; /** * Entry-point facade for recursive iteration. @@ -14,7 +17,7 @@ final class RecursiveItems implements \RecursiveIterator, PositionAware { use FacadeTrait; - /** @var Parser */ + /** @var IteratorAggregate */ private $parser; /** @var ItemsOptions */ @@ -23,8 +26,12 @@ final class RecursiveItems implements \RecursiveIterator, PositionAware /** @var Iterator */ private $parserIterator; - public function __construct(Parser $parser, ItemsOptions $options) + public function __construct(IteratorAggregate $parser, ?ItemsOptions $options = null) { + if ( ! $options) { + $options = new ItemsOptions(); + } + $this->parser = $parser; $this->options = $options; $this->debugEnabled = $options['debug']; @@ -85,8 +92,16 @@ public static function fromIterable($iterable, array $options = []): self public function current() { $current = $this->parserIterator->current(); - if ($current instanceof Parser) { + if ($current instanceof IteratorAggregate) { return new self($current, $this->options); + } elseif ( ! is_scalar($current)) { + throw new JsonMachineException( + sprintf( + '%s only accepts scalar or IteratorAggregate values. %s given.', + self::class, + is_object($current) ? get_class($current) : gettype($current) + ) + ); } return $current; @@ -127,4 +142,54 @@ public function getChildren() return null; } + + /** + * Finds the desired key on this level and returns its value. + * It moves the internal cursor to it so subsequent calls to self::current() returns the same value. + * + * @param $key + * @return mixed + * @throws JsonMachineException When the key is not found on this level. + */ + public function advanceToKey($key) + { + if ( ! $this->parserIterator) { + $this->rewind(); + } + $iterator = $this->parserIterator; + + while ($key !== $iterator->key() && $iterator->valid()) { + $iterator->next(); + } + + if ($key !== $iterator->key()) { + throw new JsonMachineException("Key '$key' was not found."); + } + + return $iterator->current(); + } + + /** + * Recursively materializes this iterator level to array. + * Moves its internal pointer to the end. + * + * @return array + */ + public function toArray(): array + { + return self::toArrayRecursive($this); + } + + private static function toArrayRecursive(\Traversable $traversable): array + { + $array = []; + foreach ($traversable as $key => $value) { + if ($value instanceof \Traversable) { + $value = self::toArrayRecursive($value); + } + $array[$key] = $value; + } + + return $array; + } } diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index e053505..f54fb6f 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -33,18 +33,7 @@ public function testHasChildrenIgnoresArrays() } } - public function testHasChildrenFollowsIterators() - { - $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; - $iterator = new NestedIterator($generator()); - - $result = []; - foreach ($iterator as $item) { - $result[] = $iterator->hasChildren(); - } - $this->assertSame([false, true, false], $result); - } public function testGetChildrenReturnsCorrectItems() { @@ -57,44 +46,4 @@ public function testGetChildrenReturnsCorrectItems() $this->assertSame([1, 2, 3], $result); } - - public function testAdvanceToKeyWorks() - { - $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; - $iterator = new NestedIterator($generator()); - - $this->assertSame(1, $iterator->advanceToKey('one')); - $this->assertSame(1, $iterator->advanceToKey('one')); - $this->assertSame(2, $iterator->advanceToKey('two')); - $this->assertSame(3, $iterator->advanceToKey('three')); - } - - public function testAdvanceToKeyThrows() - { - $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; - $iterator = new NestedIterator($generator()); - - $this->expectExceptionMessage('not found'); - $iterator->advanceToKey('four'); - } - - public function testToArray() - { - $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; - $iterator = new NestedIterator($generator($generator(['42']))); - - $expected = [ - 'one' => 1, - 'two' => 2, - 'i' => [ - 'one' => 1, - 'two' => 2, - 'i' => ['42'], - 'three' => 3, - ], - 'three' => 3, - ]; - - $this->assertSame($expected, $iterator->toArray()); - } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index 7bd1d1c..c5975f9 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -4,6 +4,9 @@ namespace JsonMachineTest; +use Iterator; +use IteratorAggregate; +use JsonMachine\ItemsOptions; use JsonMachine\RecursiveItems; /** @@ -67,4 +70,91 @@ public function testGetChildrenReturnsNestedIterator() $this->assertInstanceOf(RecursiveItems::class, $result[1]); $this->assertSame(null, $result[2]); } + + public function testAdvanceToKeyWorks() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new RecursiveItems(toIteratorAggregate($generator())); + + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(2, $iterator->advanceToKey('two')); + $this->assertSame(3, $iterator->advanceToKey('three')); + } + + public function testAdvanceToKeyThrows() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new RecursiveItems(toIteratorAggregate($generator())); + + $this->expectExceptionMessage('not found'); + $iterator->advanceToKey('four'); + } + + public function testToArray() + { + $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; + $iterator = new RecursiveItems( + toIteratorAggregate($generator( + toIteratorAggregate($generator( + toIteratorAggregate(new \ArrayIterator(['42'])) + )) + )) + ); + + $expected = [ + 'one' => 1, + 'two' => 2, + 'i' => [ + 'one' => 1, + 'two' => 2, + 'i' => ['42'], + 'three' => 3, + ], + 'three' => 3, + ]; + + $this->assertSame($expected, $iterator->toArray()); + } + + public function testHasChildrenFollowsIterators() + { + $generator = function () {yield from [1, toIteratorAggregate(new \ArrayIterator([])), 3]; }; + $iterator = new RecursiveItems(toIteratorAggregate($generator())); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->hasChildren(); + } + + $this->assertSame([false, true, false], $result); + } + + public function testToArrayThrowsMeaningfulErrorWhenIteratorIsAlreadyOpen() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new RecursiveItems(toIteratorAggregate($generator())); + + $iterator->rewind(); + $iterator->next(); + $iterator->rewind(); + $iterator->next(); + + var_dump($iterator->toArray()); + } +} + +function toIteratorAggregate(Iterator $iterator): IteratorAggregate +{ + return new class($iterator) implements IteratorAggregate { + private $iterator; + public function __construct(Iterator $iterator) + { + $this->iterator = $iterator; + } + public function getIterator() + { + return $this->iterator; + } + }; } From 8f4507a57a61a4517bb4d940494cf0810df6ef50 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 21 Nov 2024 17:22:47 +0100 Subject: [PATCH 24/38] testToArrayThrowsMeaningfulErrorWhenIteratorIsAlreadyOpen --- composer.json | 2 +- src/RecursiveItems.php | 28 +++++++++++++++++---- src/ResumableIteratorAggregateProxy.php | 2 +- test/JsonMachineTest/RecursiveItemsTest.php | 9 +++---- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/composer.json b/composer.json index 03002ea..9ed9f28 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,7 @@ "require-dev": { "ext-json": "*", "friendsofphp/php-cs-fixer": "^3.0", - "phpstan/phpstan": "^2.0", + "phpstan/phpstan": "^1.0", "phpunit/phpunit": "^8.0" }, "suggest": { diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index 6c98f88..c981557 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -89,6 +89,10 @@ public static function fromIterable($iterable, array $options = []): self ); } + /** + * @return mixed Move to return type when PHP 7 support is dropped + */ + #[\ReturnTypeWillChange] public function current() { $current = $this->parserIterator->current(); @@ -107,11 +111,15 @@ public function current() return $current; } - public function next() + public function next(): void { $this->parserIterator->next(); } + /** + * @return mixed Move to return type when PHP 7 support is dropped + */ + #[\ReturnTypeWillChange] public function key() { return $this->parserIterator->key(); @@ -122,7 +130,7 @@ public function valid(): bool return $this->parserIterator->valid(); } - public function rewind() + public function rewind(): void { $this->parserIterator = $this->parser->getIterator(); $this->parserIterator->rewind(); @@ -133,7 +141,7 @@ public function hasChildren(): bool return $this->current() instanceof self; } - public function getChildren() + public function getChildren(): ?\RecursiveIterator { $current = $this->current(); if ($current instanceof self) { @@ -177,14 +185,24 @@ public function advanceToKey($key) */ public function toArray(): array { + try { + $this->rewind(); + } catch (\Exception $e) { + if (false !== strpos($e->getMessage(), 'generator')){ + throw new JsonMachineException( + 'Method toArray() can only be called before any items in the collection have been accessed.' + ); + } + } + return self::toArrayRecursive($this); } - private static function toArrayRecursive(\Traversable $traversable): array + private static function toArrayRecursive(self $traversable): array { $array = []; foreach ($traversable as $key => $value) { - if ($value instanceof \Traversable) { + if ($value instanceof self) { $value = self::toArrayRecursive($value); } $array[$key] = $value; diff --git a/src/ResumableIteratorAggregateProxy.php b/src/ResumableIteratorAggregateProxy.php index 844e2b6..0923132 100644 --- a/src/ResumableIteratorAggregateProxy.php +++ b/src/ResumableIteratorAggregateProxy.php @@ -21,7 +21,7 @@ public function __construct(IteratorAggregate $iteratorAggregate) $this->iteratorAggregate = $iteratorAggregate; } - public function getIterator() + public function getIterator(): \Traversable { $iterator = $this->iteratorAggregate->getIterator(); while ($iterator->valid()) { diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index c5975f9..9b7216e 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -132,15 +132,14 @@ public function testHasChildrenFollowsIterators() public function testToArrayThrowsMeaningfulErrorWhenIteratorIsAlreadyOpen() { - $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $generator = function () {yield 'one' => 1; yield 'two' => 2; yield 'three' => 3; }; $iterator = new RecursiveItems(toIteratorAggregate($generator())); - $iterator->rewind(); - $iterator->next(); $iterator->rewind(); $iterator->next(); - var_dump($iterator->toArray()); + $this->expectExceptionMessage('toArray()'); + $iterator->toArray(); } } @@ -152,7 +151,7 @@ public function __construct(Iterator $iterator) { $this->iterator = $iterator; } - public function getIterator() + public function getIterator(): \Traversable { return $this->iterator; } From e96be16d0c8c885e71dfe9b8a062753a4092cb88 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 21 Nov 2024 17:51:39 +0100 Subject: [PATCH 25/38] cs-fix --- src/FacadeTrait.php | 2 -- src/Parser.php | 2 +- src/RecursiveItems.php | 9 +++++---- src/ResumableIteratorAggregateProxy.php | 8 ++++---- src/Tokens.php | 3 --- test/JsonMachineTest/NestedIteratorTest.php | 2 -- test/JsonMachineTest/ParserTest.php | 6 +++--- test/JsonMachineTest/RecursiveItemsTest.php | 9 +++++++-- 8 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php index 2ab05a9..c6e2b4f 100644 --- a/src/FacadeTrait.php +++ b/src/FacadeTrait.php @@ -25,8 +25,6 @@ public function isDebugEnabled(): bool } /** - * @param iterable $bytesIterator - * * @throws InvalidArgumentException */ private static function createParser(iterable $bytesIterator, ItemsOptions $options, bool $recursive): Parser diff --git a/src/Parser.php b/src/Parser.php index 0684080..d99d744 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -434,7 +434,7 @@ private function error($msg, $token, $exception = SyntaxErrorException::class) } /** - * Returns JSON bytes read so far + * Returns JSON bytes read so far. * * @return int * diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index c981557..9c1381c 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -8,7 +8,6 @@ use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\Exception\JsonMachineException; -use LogicException; /** * Entry-point facade for recursive iteration. @@ -156,8 +155,10 @@ public function getChildren(): ?\RecursiveIterator * It moves the internal cursor to it so subsequent calls to self::current() returns the same value. * * @param $key + * * @return mixed - * @throws JsonMachineException When the key is not found on this level. + * + * @throws JsonMachineException when the key is not found on this level */ public function advanceToKey($key) { @@ -181,14 +182,14 @@ public function advanceToKey($key) * Recursively materializes this iterator level to array. * Moves its internal pointer to the end. * - * @return array + * @throws JsonMachineException */ public function toArray(): array { try { $this->rewind(); } catch (\Exception $e) { - if (false !== strpos($e->getMessage(), 'generator')){ + if (false !== strpos($e->getMessage(), 'generator')) { throw new JsonMachineException( 'Method toArray() can only be called before any items in the collection have been accessed.' ); diff --git a/src/ResumableIteratorAggregateProxy.php b/src/ResumableIteratorAggregateProxy.php index 0923132..607068f 100644 --- a/src/ResumableIteratorAggregateProxy.php +++ b/src/ResumableIteratorAggregateProxy.php @@ -1,19 +1,19 @@ -generator; } - private function createGenerator(): Generator { $insignificantBytes = $this->insignificantBytes(); diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index f54fb6f..cc88fe2 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -33,8 +33,6 @@ public function testHasChildrenIgnoresArrays() } } - - public function testGetChildrenReturnsCorrectItems() { $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 524ad52..c11adcd 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -645,14 +645,14 @@ public function data_testRecursiveParserDoesNotRequireChildParserToBeIteratedToT public function testGetPositionWorksInsideRecursion() { $parser = new Parser( - new Tokens(new \ArrayIterator(["[[11,12]]"])), - "", + new Tokens(new \ArrayIterator(['[[11,12]]'])), + '', null, true ); foreach ($parser as $item) { - /** @var $item Parser */ + /* @var $item Parser */ $this->assertSame(0, $item->getPosition()); } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index 9b7216e..49e151a 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -6,7 +6,6 @@ use Iterator; use IteratorAggregate; -use JsonMachine\ItemsOptions; use JsonMachine\RecursiveItems; /** @@ -132,7 +131,11 @@ public function testHasChildrenFollowsIterators() public function testToArrayThrowsMeaningfulErrorWhenIteratorIsAlreadyOpen() { - $generator = function () {yield 'one' => 1; yield 'two' => 2; yield 'three' => 3; }; + $generator = function () { + yield 'one' => 1; + yield 'two' => 2; + yield 'three' => 3; + }; $iterator = new RecursiveItems(toIteratorAggregate($generator())); $iterator->rewind(); @@ -147,10 +150,12 @@ function toIteratorAggregate(Iterator $iterator): IteratorAggregate { return new class($iterator) implements IteratorAggregate { private $iterator; + public function __construct(Iterator $iterator) { $this->iterator = $iterator; } + public function getIterator(): \Traversable { return $this->iterator; From 99e219c3ba0539ad1216d0a8de66fccc623089c2 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 21 Nov 2024 19:05:34 +0100 Subject: [PATCH 26/38] composer update on build --- CHANGELOG.md | 2 +- Makefile | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f643e6d..e524571 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## master ### Added - Support for PHP 8.4 +- Recursive iteration via new facade `RecursiveItems`. - Exception on misspelled option name suggests a correct one. -- Recursive iteration via `recursive` option. ### Fixed - Wrong key when combining list and scalar value pointers (#110). Thanks [@daniel-sc](https://github.com/daniel-sc) ### Removed diff --git a/Makefile b/Makefile index 27afa1d..6505723 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ help: @grep -E '^[-a-zA-Z0-9_\.\/]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[32m%-15s\033[0m\t%s\n", $$1, $$2}' -build: composer-validate cs-check phpstan tests-all ## Run all necessary stuff before commit. +build: composer-update cs-check phpstan tests-all ## Run all necessary stuff before commit. tests: ## Run tests on recent PHP version. Pass args to phpunit via ARGS="" @@ -66,8 +66,8 @@ performance-tests: ## Run performance tests @$(call DOCKER_RUN,$(LATEST_PHP),composer performance-tests) -composer-validate: ## Validate composer.json contents - @$(call DOCKER_RUN,$(LATEST_PHP),composer validate) +composer-update: ## Validate composer.json contents + @$(call DOCKER_RUN,$(LATEST_PHP),composer update) release: .env build From 6b2b6e2320a4da1c8872c92a5c4d4553248fdfe1 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 22 Nov 2024 13:07:55 +0100 Subject: [PATCH 27/38] Readme merge fix --- CHANGELOG.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126ad47..cd54e85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,14 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## master ### Added -- Support for PHP 8.4 - Recursive iteration via new facade `RecursiveItems`. -- Exception on misspelled option name suggests a correct one. -### Fixed -- Wrong key when combining list and scalar value pointers (#110). Thanks [@daniel-sc](https://github.com/daniel-sc) -### Removed -- Removed support for PHP 7.0, 7.1 -### Added
From 05dc2eb0fd7d5c2ec7546770f3a288fd6f0ab2b3 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 22 Nov 2024 13:13:38 +0100 Subject: [PATCH 28/38] dropped compatibility with older phpunit --- .../Exception/SyntaxErrorExceptionTest.php | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php b/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php index 681f150..b98ff30 100644 --- a/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php +++ b/test/JsonMachineTest/Exception/SyntaxErrorExceptionTest.php @@ -16,13 +16,7 @@ public function testMessageContainsDataFromConstructor() { $exception = new SyntaxErrorException('msg 42', 24); - $assertMethod = 'assertContains'; - /* @phpstan-ignore function.alreadyNarrowedType */ - if (method_exists($this, 'assertStringContainsString')) { - $assertMethod = 'assertStringContainsString'; - } - - $this->$assertMethod('msg 42', $exception->getMessage()); - $this->$assertMethod('24', $exception->getMessage()); + $this->assertStringContainsString('msg 42', $exception->getMessage()); + $this->assertStringContainsString('24', $exception->getMessage()); } } From 644fe90245352c51a606d4a27cad0be6a5b262b3 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 22 Nov 2024 19:26:04 +0100 Subject: [PATCH 29/38] Fix build --- .php-cs-fixer.dist.php | 2 ++ composer.json | 3 ++- src/FacadeTrait.php | 9 +++++++-- src/Items.php | 2 -- src/ItemsOptions.php | 2 -- src/Parser.php | 25 +++++++------------------ src/RecursiveItems.php | 9 ++++++--- src/ResumableIteratorAggregateProxy.php | 25 ++++++++++++++++++++++--- src/functions.php | 16 ++++++++++++++++ test/JsonMachineTest/ParserTest.php | 4 ++-- 10 files changed, 64 insertions(+), 33 deletions(-) create mode 100644 src/functions.php diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php index 410916d..b3eba16 100644 --- a/.php-cs-fixer.dist.php +++ b/.php-cs-fixer.dist.php @@ -15,6 +15,8 @@ 'visibility_required' => false, 'php_unit_test_class_requires_covers' => true, 'declare_strict_types' => true, + 'phpdoc_to_comment' => false, // todo remove when we move to GeneratorAggregate + ]) ->setFinder($finder) ; diff --git a/composer.json b/composer.json index 9ed9f28..ce27b67 100644 --- a/composer.json +++ b/composer.json @@ -35,7 +35,8 @@ }, "autoload": { "psr-4": {"JsonMachine\\": "src/"}, - "exclude-from-classmap": ["src/autoloader.php"] + "exclude-from-classmap": ["src/autoloader.php"], + "files": ["src/functions.php"] }, "autoload-dev": { "psr-4": {"JsonMachineTest\\": "test/JsonMachineTest"} diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php index c6e2b4f..ac50177 100644 --- a/src/FacadeTrait.php +++ b/src/FacadeTrait.php @@ -6,6 +6,7 @@ use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\JsonDecoder\ExtJsonDecoder; +use LogicException; trait FacadeTrait { @@ -46,11 +47,15 @@ private static function createParser(iterable $bytesIterator, ItemsOptions $opti } /** - * @throws Exception\JsonMachineException + * Returns JSON bytes read so far. */ public function getPosition() { - return $this->parser->getPosition(); + if ($this->parser instanceof PositionAware) { + return $this->parser->getPosition(); + } + + throw new LogicException('getPosition() may only be called on PositionAware'); } /** diff --git a/src/Items.php b/src/Items.php index 20cd832..ca183c6 100644 --- a/src/Items.php +++ b/src/Items.php @@ -68,8 +68,6 @@ public static function fromIterable($iterable, array $options = []): self /** * @return \Generator - * - * @throws Exception\PathNotFoundException */ #[\ReturnTypeWillChange] public function getIterator() diff --git a/src/ItemsOptions.php b/src/ItemsOptions.php index db4698c..e21a86c 100644 --- a/src/ItemsOptions.php +++ b/src/ItemsOptions.php @@ -56,10 +56,8 @@ private function validateOptions(array $options) private function opt_pointer($pointer) { if (is_array($pointer)) { - /** @phpstan-ignore expr.resultUnused */ (function (string ...$p) {})(...$pointer); } else { - /** @phpstan-ignore expr.resultUnused */ (function (string $p) {})($pointer); } diff --git a/src/Parser.php b/src/Parser.php index d99d744..e0315d1 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -15,6 +15,7 @@ use JsonMachine\JsonDecoder\ExtJsonDecoder; use JsonMachine\JsonDecoder\ItemDecoder; use JsonMachine\JsonDecoder\StringOnlyDecoder; +use LogicException; use Traversable; class Parser implements \IteratorAggregate, PositionAware @@ -314,18 +315,6 @@ private function createGenerator(): Generator $this->currentPath = null; } - /** - * @return Generator - */ - private function remainingTokens() - { - $iterator = $this->tokensIterator; - while ($iterator->valid()) { - yield $iterator->current(); - $iterator->next(); - } - } - public function ensureIterationComplete(): void { $generator = $this->getIterator(); @@ -429,20 +418,20 @@ private function error($msg, $token, $exception = SyntaxErrorException::class) { throw new $exception( $msg." '".$token."'", - $this->tokens instanceof PositionAware ? $this->tokens->getPosition() : '' + $this->getPosition() ); } /** * Returns JSON bytes read so far. - * - * @return int - * - * @throws JsonMachineException */ public function getPosition() { - return $this->tokens->getPosition(); + if ($this->tokens instanceof PositionAware) { + return $this->tokens->getPosition(); + } + + throw new LogicException('getPosition() may only be called on PositionAware'); } private static function jsonPointerToPath(string $jsonPointer): array diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index 9c1381c..bee99df 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -4,6 +4,8 @@ namespace JsonMachine; +use Exception; +use Generator; use Iterator; use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; @@ -22,7 +24,7 @@ final class RecursiveItems implements \RecursiveIterator, PositionAware /** @var ItemsOptions */ private $options; - /** @var Iterator */ + /** @var Generator|Iterator */ private $parserIterator; public function __construct(IteratorAggregate $parser, ?ItemsOptions $options = null) @@ -131,7 +133,7 @@ public function valid(): bool public function rewind(): void { - $this->parserIterator = $this->parser->getIterator(); + $this->parserIterator = toIterator($this->parser->getIterator()); $this->parserIterator->rewind(); } @@ -187,8 +189,9 @@ public function advanceToKey($key) public function toArray(): array { try { + /** @throws Exception */ $this->rewind(); - } catch (\Exception $e) { + } catch (Exception $e) { if (false !== strpos($e->getMessage(), 'generator')) { throw new JsonMachineException( 'Method toArray() can only be called before any items in the collection have been accessed.' diff --git a/src/ResumableIteratorAggregateProxy.php b/src/ResumableIteratorAggregateProxy.php index 607068f..dee5f7b 100644 --- a/src/ResumableIteratorAggregateProxy.php +++ b/src/ResumableIteratorAggregateProxy.php @@ -4,26 +4,33 @@ namespace JsonMachine; +use InvalidArgumentException; use IteratorAggregate; +use LogicException; /** * Allows to resume iteration of the inner IteratorAggregate via foreach, which would be otherwise impossible as * foreach implicitly calls reset(). This Iterator does not pass the reset() call to the inner Iterator thus enabling * to follow up on a previous iteation. */ -class ResumableIteratorAggregateProxy implements IteratorAggregate +class ResumableIteratorAggregateProxy implements IteratorAggregate, PositionAware { /** @var IteratorAggregate */ private $iteratorAggregate; - public function __construct(IteratorAggregate $iteratorAggregate) + public function __construct(\Traversable $iteratorAggregate) { + // todo remove when the whole system moves to GeneratorAggregate + if ( ! $iteratorAggregate instanceof IteratorAggregate) { + throw new InvalidArgumentException('$iteratorAggregate must be an instance of IteratorAggregate'); + } + $this->iteratorAggregate = $iteratorAggregate; } public function getIterator(): \Traversable { - $iterator = $this->iteratorAggregate->getIterator(); + $iterator = toIterator($this->iteratorAggregate->getIterator()); while ($iterator->valid()) { yield $iterator->key() => $iterator->current(); $iterator->next(); @@ -34,4 +41,16 @@ public function __call($name, $arguments) { return $this->iteratorAggregate->$name(...$arguments); } + + /** + * Returns JSON bytes read so far. + */ + public function getPosition() + { + if ($this->iteratorAggregate instanceof PositionAware) { + return $this->iteratorAggregate->getPosition(); + } + + throw new LogicException('getPosition() may only be called on PositionAware'); + } } diff --git a/src/functions.php b/src/functions.php new file mode 100644 index 0000000..dd45953 --- /dev/null +++ b/src/functions.php @@ -0,0 +1,16 @@ +getIterator()); + } + + if ($traversable instanceof Iterator) { + return $traversable; + } + + throw new \LogicException('Cannot turn Traversable into Iterator'); +} diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index c11adcd..29666ae 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -4,7 +4,6 @@ namespace JsonMachineTest; -use Error; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; use JsonMachine\Exception\SyntaxErrorException; @@ -14,6 +13,7 @@ use JsonMachine\StringChunks; use JsonMachine\Tokens; use JsonMachine\TokensWithDebugging; +use LogicException; use Traversable; /** @@ -558,7 +558,7 @@ public function testGetPositionThrowsIfTokensDoNotSupportGetPosition() { $parser = new Parser(new \ArrayObject()); - $this->expectException(Error::class); + $this->expectException(LogicException::class); $parser->getPosition(); } From 085785773a6ff0e4673de43a84b6afee5f0e15f8 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 11:31:17 +0100 Subject: [PATCH 30/38] phpstan version fix --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index ce27b67..ba79156 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,7 @@ "require-dev": { "ext-json": "*", "friendsofphp/php-cs-fixer": "^3.0", - "phpstan/phpstan": "^1.0", + "phpstan/phpstan": "^1.10", "phpunit/phpunit": "^8.0" }, "suggest": { From f051ff57c0e932817625cf5be5ee3b526c69e433 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 12:57:14 +0100 Subject: [PATCH 31/38] Readme update --- README.md | 93 ++++++++++++++++++++++++++++++++++-------- src/RecursiveItems.php | 4 +- 2 files changed, 79 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 64c4b4e..78799a6 100644 --- a/README.md +++ b/README.md @@ -320,25 +320,51 @@ foreach ($fruits as $key => $value) { ``` -### Recursive iteration (BETA) -Recursive iteration can be enabled via `recursive` option set to `true`. -Every JSON iterable that JSON Machine encounters will then be yielded as an instance of `NestedIterator`. -No JSON array or object will be materialized and kept in memory. -The only PHP values you get materialized will be scalar values. -Let's see an example with many, many users with many, many friends +### Recursive iteration +Use `RecursiveItems` instead of `Items`. +When `RecursiveItems` encounters a list or dict in the JSON, it returns a new instance of `RecursiveItems` +which can then be iterated over and the cycle repeats. +Thus, it never returns a PHP array or object, but only either scalar values or `RecursiveItems`. +No JSON vector will ever be fully loaded into memory at once. +This feature is advantageous when the JSON has a complex structure +that is difficult or even impossible to iterate over with just `Items` and JSON pointers. + +Let's see an example with many, many users with many, many friends: +```json +[ + { + "username": "user", + "e-mail": "user@example.com", + "friends": [ + { + "username": "friend1", + "e-mail": "friend1@example.com" + }, + { + "username": "friend2", + "e-mail": "friend2@example.com" + } + ] + } +] +``` ```php true]); -foreach ($users as $user) { // $user instanceof Traversable, not an array/object - foreach ($user as $userField => $userValue) { - if ($userField === 'friends') { - foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object - foreach ($friend as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object - // do whatever you want here +use JsonMachine\RecursiveItems + +$users = RecursiveItems::fromFile('users.json'); +foreach ($users as $user) { + /** @var $user RecursiveItems */ + foreach ($user as $field => $value) { + if ($field === 'friends') { + /** @var $value RecursiveItems */ + foreach ($value as $friend) { + /** @var $friend RecursiveItems */ + foreach ($friend as $friendField => $friendValue) { + $friendField == 'username'; + $friendValue == 'friend1'; } } } @@ -351,6 +377,42 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object > JSON Machine must iterate it the background to be able to read next value. > Such an attempt will result in closed generator exception. +#### Convenience methods of `RecursiveItems` +- `toArray(): array` +If you are sure that a certain instance of RecursiveItems is pointing to a memory-manageable data structure +(for example, $friend), you can call `$friend->toArray()`, and the item will materialize into a PHP array. + +- `advanceToKey(int|string $key): scalar|RecursiveItems` +When searching for a specific key in a collection (for example, `$user["friends"]`), +you do not need to use a loop and a condition to search for it. +Instead, you can simply call `$user->advanceToKey("friends")`. +It will iterate for you and return the value at this key. + +The previous example could thus be simplified as follows: +```php +advanceToKey('friends') as $friend) { + /** @var $friend RecursiveItems */ + $friendArray = $friend->toArray(); + $friendArray['username'] == 'friend1'; + } +} +``` + +#### Also `RecursiveItems implements \RecursiveIterator` +So you can use for example PHP's builtin tools to work over `\RecursiveIterator` like those: + +- [RecursiveCallbackFilterIterator](https://www.php.net/manual/en/class.recursivecallbackfilteriterator.php) +- [RecursiveFilterIterator](https://www.php.net/manual/en/class.recursivefilteriterator.php) +- [RecursiveRegexIterator](https://www.php.net/manual/en/class.recursiveregexiterator.php) +- [RecursiveTreeIterator](https://www.php.net/manual/en/class.recursivetreeiterator.php) + ### What is JSON Pointer anyway? It's a way of addressing one item in JSON document. See the [JSON Pointer RFC 6901](https://tools.ietf.org/html/rfc6901). @@ -378,7 +440,6 @@ Some examples: Options may change how a JSON is parsed. Array of options is the second parameter of all `Items::from*` functions. Available options are: - `pointer` - A JSON Pointer string that tells which part of the document you want to iterate. -- `recursive` - Bool. Any JSON array/object the parser hits will not be decoded but served lazily as a `Traversable`. Default `false`. - `decoder` - An instance of `ItemDecoder` interface. - `debug` - `true` or `false` to enable or disable the debug mode. When the debug mode is enabled, data such as line, column and position in the document are available during parsing or in exceptions. Keeping debug disabled adds slight diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index bee99df..e8895be 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -156,9 +156,9 @@ public function getChildren(): ?\RecursiveIterator * Finds the desired key on this level and returns its value. * It moves the internal cursor to it so subsequent calls to self::current() returns the same value. * - * @param $key + * @param string|int $key * - * @return mixed + * @return scalar|self * * @throws JsonMachineException when the key is not found on this level */ From da15ab2467b73e944c7bdc2b0844a775fb618587 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 13:09:45 +0100 Subject: [PATCH 32/38] NestedIterator remnants deleted --- src/NestedIterator.php | 94 --------------------- test/JsonMachineTest/NestedIteratorTest.php | 47 ----------- 2 files changed, 141 deletions(-) delete mode 100644 src/NestedIterator.php delete mode 100644 test/JsonMachineTest/NestedIteratorTest.php diff --git a/src/NestedIterator.php b/src/NestedIterator.php deleted file mode 100644 index a40b76f..0000000 --- a/src/NestedIterator.php +++ /dev/null @@ -1,94 +0,0 @@ -iterator = $iterator; - } - - #[\ReturnTypeWillChange] - public function current() - { - return $this->iterator->current(); - } - - #[\ReturnTypeWillChange] - public function next() - { - $this->iterator->next(); - } - - #[\ReturnTypeWillChange] - public function key() - { - return $this->iterator->key(); - } - - #[\ReturnTypeWillChange] - public function valid() - { - return $this->iterator->valid(); - } - - #[\ReturnTypeWillChange] - public function rewind() - { - $this->iterator->rewind(); - } - - #[\ReturnTypeWillChange] - public function hasChildren() - { - return $this->iterator->current() instanceof Iterator; - } - - #[\ReturnTypeWillChange] - public function getChildren() - { - return $this->hasChildren() ? new self($this->current()) : null; - } - - public function advanceToKey($key) - { - $iterator = $this->iterator; - - while ($key !== $iterator->key() && $iterator->valid()) { - $iterator->next(); - } - - if ($key !== $iterator->key()) { - throw new JsonMachineException("Key '$key' was not found."); - } - - return $iterator->current(); - } - - public function toArray(): array - { - return self::toArrayRecursive($this); - } - - private static function toArrayRecursive(\Traversable $traversable): array - { - $array = []; - foreach ($traversable as $key => $value) { - if ($value instanceof \Traversable) { - $value = self::toArrayRecursive($value); - } - $array[$key] = $value; - } - - return $array; - } -} diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php deleted file mode 100644 index cc88fe2..0000000 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ /dev/null @@ -1,47 +0,0 @@ -assertSame([1, 2, 3], $result); - } - - public function testHasChildrenIgnoresArrays() - { - $generator = function () {yield from [1, [2], 3]; }; - $iterator = new NestedIterator($generator()); - - foreach ($iterator as $item) { - $this->assertFalse($iterator->hasChildren()); - } - } - - public function testGetChildrenReturnsCorrectItems() - { - $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; - $iterator = new RecursiveIteratorIterator( - new NestedIterator($generator()) - ); - - $result = iterator_to_array($iterator, false); - - $this->assertSame([1, 2, 3], $result); - } -} From 7c62a01c72b7955e86c8d005ca8f525cacf09881 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 13:30:02 +0100 Subject: [PATCH 33/38] Performance improvements --- composer.json | 2 +- src/Parser.php | 28 +++++++++++++++++++++------- test/performance/testPerformance.php | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/composer.json b/composer.json index ba79156..e93a746 100644 --- a/composer.json +++ b/composer.json @@ -13,7 +13,7 @@ "tests-coverage": "build/composer-update.sh && XDEBUG_MODE=coverage vendor/bin/phpunit --coverage-clover clover.xml", "cs-check": "build/composer-update.sh && vendor/bin/php-cs-fixer fix --dry-run --verbose --allow-risky=yes", "cs-fix": "build/composer-update.sh && vendor/bin/php-cs-fixer fix --verbose --allow-risky=yes", - "phpstan": "build/composer-update.sh && vendor/bin/phpstan analyse", + "phpstan": "build/composer-update.sh && vendor/bin/phpstan --memory-limit=-1 analyse", "performance-tests": "php -d xdebug.mode=off -d opcache.enable_cli=1 -d opcache.jit_buffer_size=100M test/performance/testPerformance.php" }, "config": { diff --git a/src/Parser.php b/src/Parser.php index e0315d1..57b6ac4 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -66,6 +66,9 @@ class Parser implements \IteratorAggregate, PositionAware /** @var bool */ private $recursive; + /** @var array */ + private static $allBytes; + /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 * @param ?ItemDecoder $jsonDecoder @@ -74,7 +77,16 @@ class Parser implements \IteratorAggregate, PositionAware */ public function __construct(Traversable $tokens, $jsonPointer = '', ?ItemDecoder $jsonDecoder = null, $recursive = false) { - $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); + if ($jsonPointer) { + $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); + $this->hasSingleJsonPointer = (count($jsonPointers) === 1); + $this->jsonPointers = array_combine($jsonPointers, $jsonPointers); + $this->paths = $this->buildPaths($this->jsonPointers); + } else { + $this->hasSingleJsonPointer = true; + $this->jsonPointers = ['' => '']; + $this->paths = ['' => []]; + } $this->tokens = $tokens; if ($tokens instanceof IteratorAggregate) { @@ -89,9 +101,6 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ?ItemDecoder if ($recursive) { $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); } - $this->hasSingleJsonPointer = (count($jsonPointers) === 1); - $this->jsonPointers = array_combine($jsonPointers, $jsonPointers); - $this->paths = $this->buildPaths($this->jsonPointers); $this->recursive = $recursive; } @@ -118,7 +127,11 @@ public function getIterator(): Generator */ private function createGenerator(): Generator { - $tokenTypes = $this->tokenTypes(); + if ( ! self::$allBytes) { + self::$allBytes = $this->tokenTypes(); + } + + $tokenTypes = self::$allBytes; $iteratorStruct = null; $currentPath = &$this->currentPath; @@ -320,8 +333,9 @@ public function ensureIterationComplete(): void $generator = $this->getIterator(); while ($generator->valid()) { - $generator->key(); - $generator->current(); +// var_dump(is_object($generator->current()) ? get_class($generator->current()) : $generator->current()); +// $generator->key(); +// $generator->current(); $generator->next(); } } diff --git a/test/performance/testPerformance.php b/test/performance/testPerformance.php index fc7c50f..5e540d5 100644 --- a/test/performance/testPerformance.php +++ b/test/performance/testPerformance.php @@ -2,7 +2,12 @@ declare(strict_types=1); +use JsonMachine\FileChunks; use JsonMachine\Items; +use JsonMachine\JsonDecoder\ExtJsonDecoder; +use JsonMachine\Parser; +use JsonMachine\RecursiveItems; +use JsonMachine\Tokens; require_once __DIR__.'/../../vendor/autoload.php'; @@ -27,6 +32,19 @@ ini_set('memory_limit', '-1'); // for json_decode use case $decoders = [ + 'RecursiveItems::fromFile()' => function ($file) { + return RecursiveItems::fromFile($file); + }, + 'Parser recursive' => function ($file) { + return new Parser( + new Tokens( + new FileChunks($file) + ), + '', + new ExtJsonDecoder(), + true + ); + }, 'Items::fromFile()' => function ($file) { return Items::fromFile($file); }, @@ -69,6 +87,7 @@ function createBigJsonFile() $f = fopen($tmpJson, 'w'); $separator = ''; fputs($f, '['); +// for ($i = 0; $i < 1; ++$i) { for ($i = 0; $i < 6000; ++$i) { fputs($f, $separator); fputs($f, file_get_contents(__DIR__.'/twitter_example_'.($i % 2).'.json')); From 1df75dd78247ec0c7d3cea1eae4670b0f1e473df Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 16:17:14 +0100 Subject: [PATCH 34/38] Dead code removal --- src/Exception/UnexpectedValueException.php | 9 --------- src/ItemsOptions.php | 6 ------ test/JsonMachineTest/ItemsOptionsTest.php | 1 - 3 files changed, 16 deletions(-) delete mode 100644 src/Exception/UnexpectedValueException.php diff --git a/src/Exception/UnexpectedValueException.php b/src/Exception/UnexpectedValueException.php deleted file mode 100644 index 8c23554..0000000 --- a/src/Exception/UnexpectedValueException.php +++ /dev/null @@ -1,9 +0,0 @@ - '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, - 'recursive' => false, ]; } diff --git a/test/JsonMachineTest/ItemsOptionsTest.php b/test/JsonMachineTest/ItemsOptionsTest.php index 3d110c6..a317a9a 100644 --- a/test/JsonMachineTest/ItemsOptionsTest.php +++ b/test/JsonMachineTest/ItemsOptionsTest.php @@ -53,7 +53,6 @@ private function defaultOptions() 'pointer' => '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, - 'recursive' => false, ]; } From 21bea751d0108e48de35408ecdfe189ea6ed0539 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 23 Nov 2024 22:18:44 +0100 Subject: [PATCH 35/38] Recursive focused performace optimizations. Ops outside the main foreach in Parser now matter as new Parser is created for each level --- src/JsonDecoder/ExtJsonDecoder.php | 14 ++++++++++++++ src/JsonDecoder/StringOnlyDecoder.php | 14 ++++++++++++++ src/Parser.php | 22 ++++++++++++---------- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/JsonDecoder/ExtJsonDecoder.php b/src/JsonDecoder/ExtJsonDecoder.php index 7a3df20..31ec068 100644 --- a/src/JsonDecoder/ExtJsonDecoder.php +++ b/src/JsonDecoder/ExtJsonDecoder.php @@ -21,6 +21,11 @@ class ExtJsonDecoder implements ItemDecoder */ private $options; + /** + * @var self + */ + private static $instance; + public function __construct($assoc = false, $depth = 512, $options = 0) { $this->assoc = $assoc; @@ -37,4 +42,13 @@ public function decode($jsonValue) return new ValidResult($decoded); } + + public static function instance(): self + { + if ( ! self::$instance) { + self::$instance = new self(); + } + + return self::$instance; + } } diff --git a/src/JsonDecoder/StringOnlyDecoder.php b/src/JsonDecoder/StringOnlyDecoder.php index ebdf544..2858379 100644 --- a/src/JsonDecoder/StringOnlyDecoder.php +++ b/src/JsonDecoder/StringOnlyDecoder.php @@ -9,6 +9,11 @@ class StringOnlyDecoder implements ItemDecoder /** @var ItemDecoder */ private $innerDecoder; + /** + * @var self + */ + private static $instance; + public function __construct(ItemDecoder $innerDecoder) { $this->innerDecoder = $innerDecoder; @@ -22,4 +27,13 @@ public function decode($jsonValue) return new ValidResult($jsonValue); } + + public static function instance(ItemDecoder $innerDecoder): self + { + if ( ! self::$instance) { + self::$instance = new self($innerDecoder); + } + + return self::$instance; + } } diff --git a/src/Parser.php b/src/Parser.php index 57b6ac4..d6f62e5 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -67,7 +67,7 @@ class Parser implements \IteratorAggregate, PositionAware private $recursive; /** @var array */ - private static $allBytes; + private static $tokenTypes; /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 @@ -97,10 +97,15 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ?ItemDecoder throw new InvalidArgumentException('$tokens must be either an instance of Iterator or IteratorAggregate.'); } - $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); - if ($recursive) { - $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); + if ($jsonDecoder instanceof StringOnlyDecoder) { + $this->jsonDecoder = $jsonDecoder; + } else { + $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); + if ($recursive) { + $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); + } } + $this->recursive = $recursive; } @@ -127,11 +132,11 @@ public function getIterator(): Generator */ private function createGenerator(): Generator { - if ( ! self::$allBytes) { - self::$allBytes = $this->tokenTypes(); + if ( ! self::$tokenTypes) { + self::$tokenTypes = $this->tokenTypes(); } - $tokenTypes = self::$allBytes; + $tokenTypes = self::$tokenTypes; $iteratorStruct = null; $currentPath = &$this->currentPath; @@ -333,9 +338,6 @@ public function ensureIterationComplete(): void $generator = $this->getIterator(); while ($generator->valid()) { -// var_dump(is_object($generator->current()) ? get_class($generator->current()) : $generator->current()); -// $generator->key(); -// $generator->current(); $generator->next(); } } From 4275f1866bc40e573f19d5c147cf7b89cc27e03f Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sun, 24 Nov 2024 10:13:45 +0100 Subject: [PATCH 36/38] Readme updates --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 78799a6..c9cebd4 100644 --- a/README.md +++ b/README.md @@ -52,10 +52,12 @@ for PHP >=7.2. See [TL;DR](#tl-dr). No dependencies in production except optiona use \JsonMachine\Items; -// this often causes Allowed Memory Size Exhausted +// this often causes Allowed Memory Size Exhausted, +// because it loads all the items in the JSON into memory - $users = json_decode(file_get_contents('500MB-users.json')); -// this usually takes few kB of memory no matter the file size +// this has very small memory footprint no matter the file size +// because it loads items into memory one by one + $users = Items::fromFile('500MB-users.json'); foreach ($users as $id => $user) { @@ -68,9 +70,10 @@ Random access like `$users[42]` is not yet possible. Use above-mentioned `foreach` and find the item or use [JSON Pointer](#parsing-a-subtree). Count the items via [`iterator_count($users)`](https://www.php.net/manual/en/function.iterator-count.php). -Remember it will still have to internally iterate the whole thing to get the count and thus will take about the same time. +Remember it will still have to internally iterate the whole thing to get the count and thus will take about the same time +as iterating it and counting by hand. -Requires `ext-json` if used out of the box. See [Decoders](#decoders). +Requires `ext-json` if used out of the box but doesn't if a custom decoder is used. See [Decoders](#decoders). Follow [CHANGELOG](CHANGELOG.md). From 81db7bc10faf96287b2535efa980b159898a82ce Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sun, 24 Nov 2024 10:35:21 +0100 Subject: [PATCH 37/38] RecursiveItems::advanceToKey() chaining + array access --- CHANGELOG.md | 2 +- README.md | 28 ++++++++-- src/Exception/BadMethodCallException.php | 9 +++ src/Exception/OutOfBoundsException.php | 9 +++ src/RecursiveItems.php | 54 ++++++++++++++++-- test/JsonMachineTest/RecursiveItemsTest.php | 61 ++++++++++++++++++++- 6 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 src/Exception/BadMethodCallException.php create mode 100644 src/Exception/OutOfBoundsException.php diff --git a/CHANGELOG.md b/CHANGELOG.md index cd54e85..c2209f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## master ### Added -- Recursive iteration via new facade `RecursiveItems`. +- Recursive iteration via new facade `RecursiveItems`. See **Recursive iteration** in README.
diff --git a/README.md b/README.md index c9cebd4..6af3966 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ for PHP >=7.2. See [TL;DR](#tl-dr). No dependencies in production except optiona [![Latest Stable Version](https://img.shields.io/github/v/release/halaxa/json-machine?color=blueviolet&include_prereleases&logoColor=white)](https://packagist.org/packages/halaxa/json-machine) [![Monthly Downloads](https://img.shields.io/packagist/dt/halaxa/json-machine?color=%23f28d1a)](https://packagist.org/packages/halaxa/json-machine) +--- +NEW in version `1.2.0` - [Recursive iteration](#recursive) + --- * [TL;DR](#tl-dr) @@ -383,13 +386,18 @@ foreach ($users as $user) { #### Convenience methods of `RecursiveItems` - `toArray(): array` If you are sure that a certain instance of RecursiveItems is pointing to a memory-manageable data structure -(for example, $friend), you can call `$friend->toArray()`, and the item will materialize into a PHP array. +(for example, $friend), you can call `$friend->toArray()`, and the item will materialize into a plain PHP array. - `advanceToKey(int|string $key): scalar|RecursiveItems` -When searching for a specific key in a collection (for example, `$user["friends"]`), +When searching for a specific key in a collection (for example, `'friends'` in `$user`), you do not need to use a loop and a condition to search for it. Instead, you can simply call `$user->advanceToKey("friends")`. -It will iterate for you and return the value at this key. +It will iterate for you and return the value at this key. Calls can be chained. +It also supports **array like syntax** for advancing to and getting following indices. +So `$user['friends']` would be an alias for `$user->advanceToKey('friends')`. Calls can be chained. +Keep in min that it's just an alias - **you won't be able to random-access previous indices** +after using this directly on `RecursiveItems`. It's just a syntax sugar. +Use `toArray()` if you need random access to indices on a record/item. The previous example could thus be simplified as follows: ```php @@ -400,12 +408,22 @@ use JsonMachine\RecursiveItems $users = RecursiveItems::fromFile('users.json'); foreach ($users as $user) { /** @var $user RecursiveItems */ - foreach ($user->advanceToKey('friends') as $friend) { + foreach ($user['friends'] as $friend) { // or $user->advanceToKey('friends') /** @var $friend RecursiveItems */ $friendArray = $friend->toArray(); - $friendArray['username'] == 'friend1'; + $friendArray['username'] === 'friend1'; } } +``` +Chaining allows you to do something like this: +```php +options); } elseif ( ! is_scalar($current)) { - throw new JsonMachineException( + throw new InvalidArgumentException( sprintf( '%s only accepts scalar or IteratorAggregate values. %s given.', self::class, @@ -160,21 +162,21 @@ public function getChildren(): ?\RecursiveIterator * * @return scalar|self * - * @throws JsonMachineException when the key is not found on this level + * @throws OutOfBoundsException when the key is not found on this level */ public function advanceToKey($key) { if ( ! $this->parserIterator) { $this->rewind(); } - $iterator = $this->parserIterator; + $iterator = $this; while ($key !== $iterator->key() && $iterator->valid()) { $iterator->next(); } if ($key !== $iterator->key()) { - throw new JsonMachineException("Key '$key' was not found."); + throw new OutOfBoundsException("Key '$key' was not found."); } return $iterator->current(); @@ -214,4 +216,46 @@ private static function toArrayRecursive(self $traversable): array return $array; } + + public function offsetExists($offset): bool + { + try { + $this->advanceToKey($offset); + + return true; + } catch (JsonMachineException $e) { + return false; + } + } + + #[\ReturnTypeWillChange] + public function offsetGet($offset) + { + return $this->advanceToKey($offset); + } + + /** + * @param $offset + * @param $value + * + * @throws BadMethodCallException + * + * @deprecated + */ + public function offsetSet($offset, $value): void + { + throw new BadMethodCallException('Unsupported: Cannot set a value on a JSON stream'); + } + + /** + * @param $offset + * + * @throws BadMethodCallException + * + * @deprecated + */ + public function offsetUnset($offset): void + { + throw new BadMethodCallException('Unsupported: Cannot unset a value from a JSON stream'); + } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php index 49e151a..fcb0b40 100644 --- a/test/JsonMachineTest/RecursiveItemsTest.php +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -70,7 +70,7 @@ public function testGetChildrenReturnsNestedIterator() $this->assertSame(null, $result[2]); } - public function testAdvanceToKeyWorks() + public function testAdvanceToKeyWorksOnScalars() { $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; $iterator = new RecursiveItems(toIteratorAggregate($generator())); @@ -81,6 +81,24 @@ public function testAdvanceToKeyWorks() $this->assertSame(3, $iterator->advanceToKey('three')); } + public function testArrayAccessIsASyntaxSugarToAdvanceToKey() + { + $generator = function () { + yield 'one' => 1; + yield 'two' => 2; + yield 'three' => 3; + }; + $iterator = new RecursiveItems(toIteratorAggregate($generator())); + + $this->assertTrue(isset($iterator['two'])); + $this->assertTrue(isset($iterator['two'])); + + $this->assertSame(2, $iterator['two']); + $this->assertSame(3, $iterator['three']); + + $this->assertFalse(isset($iterator['four'])); + } + public function testAdvanceToKeyThrows() { $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; @@ -90,6 +108,47 @@ public function testAdvanceToKeyThrows() $iterator->advanceToKey('four'); } + public function testAdvanceToKeyCanBeChained() + { + $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; + $iterator = new RecursiveItems( + toIteratorAggregate($generator( + toIteratorAggregate($generator( + toIteratorAggregate(new \ArrayIterator(['42'])) + )) + )) + ); + + $this->assertSame( + '42', + $iterator + ->advanceToKey('i') + ->advanceToKey('i') + ->advanceToKey(0) + ); + } + + public function testAdvanceToKeyArraySyntaxCanBeChained() + { + $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; + $iterator = new RecursiveItems( + toIteratorAggregate($generator( + toIteratorAggregate($generator( + toIteratorAggregate(new \ArrayIterator(['42'])) + )) + )) + ); + + $this->assertSame('42', $iterator['i']['i'][0]); + } + + public function testAdvanceToKeyArraySyntaxCanBeChainedE2E() + { + $iterator = RecursiveItems::fromString('[[[42]]]'); + + $this->assertSame(42, $iterator[0][0][0]); + } + public function testToArray() { $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; From d5024dfd49cfb35b5907bf533db7b638e3fd3c29 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sun, 24 Nov 2024 12:57:00 +0100 Subject: [PATCH 38/38] Readme updates --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6af3966..469c421 100644 --- a/README.md +++ b/README.md @@ -327,16 +327,18 @@ foreach ($fruits as $key => $value) { ### Recursive iteration -Use `RecursiveItems` instead of `Items`. -When `RecursiveItems` encounters a list or dict in the JSON, it returns a new instance of `RecursiveItems` +Use `RecursiveItems` instead of `Items` when the JSON structure is difficult or even impossible to handle with `Items` +and JSON pointers or the individual items you iterate are too big to handle. +On the other hand it's notably slower than `Items`, so bear that in mind. + +When `RecursiveItems` encounters a list or dict in the JSON, it returns a new instance of itself which can then be iterated over and the cycle repeats. Thus, it never returns a PHP array or object, but only either scalar values or `RecursiveItems`. -No JSON vector will ever be fully loaded into memory at once. -This feature is advantageous when the JSON has a complex structure -that is difficult or even impossible to iterate over with just `Items` and JSON pointers. +No JSON dict nor list will ever be fully loaded into memory at once. Let's see an example with many, many users with many, many friends: ```json +// users.json [ { "username": "user", @@ -380,7 +382,7 @@ foreach ($users as $user) { > If you break an iteration of such lazy deeper-level (i.e. you skip some `"friends"` via `break`) > and advance to a next value (i.e. next `user`), you will not be able to iterate it later. -> JSON Machine must iterate it the background to be able to read next value. +> JSON Machine must iterate it in the background to be able to read next value. > Such an attempt will result in closed generator exception. #### Convenience methods of `RecursiveItems` @@ -395,7 +397,7 @@ Instead, you can simply call `$user->advanceToKey("friends")`. It will iterate for you and return the value at this key. Calls can be chained. It also supports **array like syntax** for advancing to and getting following indices. So `$user['friends']` would be an alias for `$user->advanceToKey('friends')`. Calls can be chained. -Keep in min that it's just an alias - **you won't be able to random-access previous indices** +Keep in mind that it's just an alias - **you won't be able to random-access previous indices** after using this directly on `RecursiveItems`. It's just a syntax sugar. Use `toArray()` if you need random access to indices on a record/item.