diff --git a/CHANGELOG.md b/CHANGELOG.md index 04ce2a256..728da8c90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ You can find and compare releases at the [GitHub release page](https://github.co - Throw if `Introspection::fromSchema()` returns no data - Reorganize abstract class `ASTValidationContext` to interface `ValidationContext` - Reorganize AST interfaces related to schema and type extensions +- Align `Utils::suggestionList()` with the reference implementation (#1075) ### Added diff --git a/src/Utils/LexicalDistance.php b/src/Utils/LexicalDistance.php new file mode 100644 index 000000000..33f0f23db --- /dev/null +++ b/src/Utils/LexicalDistance.php @@ -0,0 +1,129 @@ + + */ + private array $inputArray; + + public function __construct(string $input) + { + $this->input = $input; + $this->inputLowerCase = \strtolower($input); + $this->inputArray = self::stringToArray($this->inputLowerCase); + } + + public function measure(string $option, float $threshold): ?int + { + if ($this->input === $option) { + return 0; + } + + $optionLowerCase = \strtolower($option); + + // Any case change counts as a single edit + if ($this->inputLowerCase === $optionLowerCase) { + return 1; + } + + $a = self::stringToArray($optionLowerCase); + $b = $this->inputArray; + + if (\count($a) < \count($b)) { + $tmp = $a; + $a = $b; + $b = $tmp; + } + + $aLength = \count($a); + $bLength = \count($b); + + if ($aLength - $bLength > $threshold) { + return null; + } + + /** @var array> $rows */ + $rows = []; + for ($i = 0; $i <= $bLength; ++$i) { + $rows[0][$i] = $i; + } + + for ($i = 1; $i <= $aLength; ++$i) { + $upRow = &$rows[($i - 1) % 3]; + $currentRow = &$rows[$i % 3]; + + $smallestCell = ($currentRow[0] = $i); + for ($j = 1; $j <= $bLength; ++$j) { + $cost = $a[$i - 1] === $b[$j - 1] ? 0 : 1; + + $currentCell = \min( + $upRow[$j] + 1, // delete + $currentRow[$j - 1] + 1, // insert + $upRow[$j - 1] + $cost, // substitute + ); + + if ($i > 1 && $j > 1 && $a[$i - 1] === $b[$j - 2] && $a[$i - 2] === $b[$j - 1]) { + // transposition + $doubleDiagonalCell = $rows[($i - 2) % 3][$j - 2]; + $currentCell = \min($currentCell, $doubleDiagonalCell + 1); + } + + if ($currentCell < $smallestCell) { + $smallestCell = $currentCell; + } + + $currentRow[$j] = $currentCell; + } + + // Early exit, since distance can't go smaller than smallest element of the previous row. + if ($smallestCell > $threshold) { + return null; + } + } + + $distance = $rows[$aLength % 3][$bLength]; + + return $distance <= $threshold ? $distance : null; + } + + /** + * Returns a list of char codes in the given string. + * + * @return array + */ + private static function stringToArray(string $str): array + { + $array = []; + foreach (\mb_str_split($str) as $char) { + $array[] = \mb_ord($char); + } + + return $array; + } +} diff --git a/src/Utils/Utils.php b/src/Utils/Utils.php index f8d133e92..d413e14c9 100644 --- a/src/Utils/Utils.php +++ b/src/Utils/Utils.php @@ -6,7 +6,6 @@ use function array_map; use function array_reduce; use function array_slice; -use function asort; use function count; use function dechex; use function get_class; @@ -20,7 +19,6 @@ use function is_scalar; use function is_string; use function json_encode; -use function levenshtein; use function mb_convert_encoding; use function mb_strlen; use function mb_substr; @@ -31,7 +29,6 @@ use function property_exists; use function range; use stdClass; -use function strtolower; use function unpack; class Utils @@ -284,34 +281,30 @@ static function ($list, $index) use ($selected, $selectedLength): string { * Given an invalid input string and a list of valid options, returns a filtered * list of valid options sorted based on their similarity with the input. * - * Includes a custom alteration from Damerau-Levenshtein to treat case changes - * as a single edit which helps identify mis-cased values with an edit distance - * of 1 - * * @param array $options * * @return array */ public static function suggestionList(string $input, array $options): array { + /** @var array $optionsByDistance */ $optionsByDistance = []; + $lexicalDistance = new LexicalDistance($input); $threshold = mb_strlen($input) * 0.4 + 1; foreach ($options as $option) { - if ($input === $option) { - $distance = 0; - } else { - $distance = (strtolower($input) === strtolower($option) - ? 1 - : levenshtein($input, $option)); - } + $distance = $lexicalDistance->measure($option, $threshold); - if ($distance <= $threshold) { + if ($distance !== null) { $optionsByDistance[$option] = $distance; } } - asort($optionsByDistance); + \uksort($optionsByDistance, static function (string $a, string $b) use ($optionsByDistance) { + $distanceDiff = $optionsByDistance[$a] - $optionsByDistance[$b]; + + return $distanceDiff !== 0 ? $distanceDiff : \strnatcmp($a, $b); + }); - return array_keys($optionsByDistance); + return array_map('strval', array_keys($optionsByDistance)); } } diff --git a/tests/Error/ErrorTest.php b/tests/Error/ErrorTest.php index 21e964ad6..db2340108 100644 --- a/tests/Error/ErrorTest.php +++ b/tests/Error/ErrorTest.php @@ -201,7 +201,7 @@ public function getNodes(): ?array self::assertEquals([1 => 2], $locatedError->getPositions()); self::assertNotNull($locatedError->getSource()); - $error = new class('msg', new NullValueNode([]), null, [], ) extends Error { + $error = new class('msg', new NullValueNode([]), null, []) extends Error { public function getNodes(): ?array { return [new NullValueNode([])]; diff --git a/tests/Executor/DeferredFieldsTest.php b/tests/Executor/DeferredFieldsTest.php index 22d2b0315..62c12b0c7 100644 --- a/tests/Executor/DeferredFieldsTest.php +++ b/tests/Executor/DeferredFieldsTest.php @@ -592,10 +592,10 @@ public function testDeferredChaining(): void } '); - $author1 = ['name' => 'John'/*, 'bestFriend' => ['name' => 'Dirk']*/]; - $author2 = ['name' => 'Jane'/*, 'bestFriend' => ['name' => 'Joe']*/]; - $author3 = ['name' => 'Joe'/*, 'bestFriend' => ['name' => 'Jane']*/]; - $author4 = ['name' => 'Dirk'/*, 'bestFriend' => ['name' => 'John']*/]; + $author1 = ['name' => 'John'/* , 'bestFriend' => ['name' => 'Dirk'] */]; + $author2 = ['name' => 'Jane'/* , 'bestFriend' => ['name' => 'Joe'] */]; + $author3 = ['name' => 'Joe'/* , 'bestFriend' => ['name' => 'Jane'] */]; + $author4 = ['name' => 'Dirk'/* , 'bestFriend' => ['name' => 'John'] */]; $story1 = ['title' => 'Story #8', 'author' => $author1]; $story2 = ['title' => 'Story #3', 'author' => $author3]; diff --git a/tests/Type/EnumTypeTest.php b/tests/Type/EnumTypeTest.php index 276b1eba1..a541f3b35 100644 --- a/tests/Type/EnumTypeTest.php +++ b/tests/Type/EnumTypeTest.php @@ -348,8 +348,7 @@ public function testDoesNotAcceptValuesWithIncorrectCasing(): void '{ colorEnum(fromEnum: green) }', null, [ - // Improves upon the reference implementation - 'message' => 'Value "green" does not exist in "Color" enum. Did you mean the enum value "GREEN"?', + 'message' => 'Value "green" does not exist in "Color" enum. Did you mean the enum value "GREEN" or "RED"?', 'locations' => [new SourceLocation(1, 23)], ] ); diff --git a/tests/Type/ResolveInfoTest.php b/tests/Type/ResolveInfoTest.php index 5b40d8c75..188a67100 100644 --- a/tests/Type/ResolveInfoTest.php +++ b/tests/Type/ResolveInfoTest.php @@ -322,7 +322,7 @@ public function testMergedFragmentsFieldSelection(): void 'url' => true, ], 'replies' => [ - 'body' => true, //this would be missing if not for the fix https://github.com/webonyx/graphql-php/pull/98 + 'body' => true, // this would be missing if not for the fix https://github.com/webonyx/graphql-php/pull/98 'author' => [ 'id' => true, 'name' => true, diff --git a/tests/Utils/BreakingChangesFinderTest.php b/tests/Utils/BreakingChangesFinderTest.php index 9f30b35d4..853f3521a 100644 --- a/tests/Utils/BreakingChangesFinderTest.php +++ b/tests/Utils/BreakingChangesFinderTest.php @@ -30,7 +30,7 @@ public function setUp(): void ]); } - //DESCRIBE: findBreakingChanges + // DESCRIBE: findBreakingChanges /** * @see it('should detect if a type was removed or not') @@ -1769,7 +1769,7 @@ public function testShouldDetectIfATypeWasAddedToAUnionType(): void ], ]); // logially equivalent to type1; findTypesRemovedFromUnions should not - //treat this as different than type1 + // treat this as different than type1 $type1a = new ObjectType([ 'name' => 'Type1', 'fields' => [ diff --git a/tests/Utils/SuggestionListTest.php b/tests/Utils/SuggestionListTest.php index b0221c488..2988001c3 100644 --- a/tests/Utils/SuggestionListTest.php +++ b/tests/Utils/SuggestionListTest.php @@ -7,16 +7,15 @@ class SuggestionListTest extends TestCase { - // DESCRIBE: suggestionList - /** + * @see describe('suggestionList') * @see it('Returns results when input is empty') */ - public function testResturnsResultsWhenInputIsEmpty(): void + public function testReturnsResultsWhenInputIsEmpty(): void { - self::assertEquals( + self::assertSame( + ['a'], Utils::suggestionList('', ['a']), - ['a'] ); } @@ -25,20 +24,111 @@ public function testResturnsResultsWhenInputIsEmpty(): void */ public function testReturnsEmptyArrayWhenThereAreNoOptions(): void { - self::assertEquals( + self::assertSame( + [], Utils::suggestionList('input', []), - [] ); } /** - * @see it('Returns options sorted based on similarity') + * @see it('Returns options with small lexical distance') + */ + public function testReturnsOptionsWithSmallLexicalDistance(): void + { + self::assertSame( + ['green'], + Utils::suggestionList('greenish', ['green']), + ); + self::assertSame( + ['greenish'], + Utils::suggestionList('green', ['greenish']), + ); + } + + /** + * @see it('Rejects options with distance that exceeds threshold') + */ + public function testRejectsOptionsWithDistanceThatExceedsThreshold(): void + { + self::assertSame( + ['aaab'], + Utils::suggestionList('aaaa', ['aaab']), + ); + self::assertSame( + ['aabb'], + Utils::suggestionList('aaaa', ['aabb']), + ); + self::assertSame( + [], + Utils::suggestionList('aaaa', ['abbb']), + ); + self::assertSame( + [], + Utils::suggestionList('ab', ['ca']), + ); + } + + /** + * @see it('Returns options with different case') + */ + public function testReturnsOptionsWithDifferentCase(): void + { + self::assertSame( + ['VERYLONGSTRING'], + Utils::suggestionList('verylongstring', ['VERYLONGSTRING']), + ); + self::assertSame( + ['verylongstring'], + Utils::suggestionList('VERYLONGSTRING', ['verylongstring']), + ); + self::assertSame( + ['VeryLongString'], + Utils::suggestionList('VERYLONGSTRING', ['VeryLongString']), + ); + } + + /** + * @see it('Returns options with transpositions') + */ + public function testReturnsOptionsWithTranspositions(): void + { + self::assertSame( + ['arg'], + Utils::suggestionList('agr', ['arg']), + ); + self::assertSame( + ['123456789'], + Utils::suggestionList('214365879', ['123456789']), + ); + } + + /** + * @see it('Returns options sorted based on lexical distance') */ - public function testReturnsOptionsSortedBasedOnSimilarity(): void + public function testReturnsOptionsSortedBasedOnLexicalDistance(): void { - self::assertEquals( + self::assertSame( + ['abc', 'ab', 'a'], Utils::suggestionList('abc', ['a', 'ab', 'abc']), - ['abc', 'ab', 'a'] + ); + } + + /** + * @see it('Returns options with the same lexical distance sorted lexicographically') + */ + public function testReturnsOptionsWithTheSameLexicalDistanceSortedLexicographically(): void + { + self::assertSame( + ['ax', 'ay', 'az'], + Utils::suggestionList('a', ['az', 'ax', 'ay']), + ); + } + + public function testReturnsNumericStringOptionsAsStrings(): void + { + self::assertSame( + ['12', '13', '14'], + Utils::suggestionList('1', ['12', '13', '14']), ); } }