diff --git a/README.md b/README.md index c3d3122..832aaad 100644 --- a/README.md +++ b/README.md @@ -123,12 +123,13 @@ via `getFullName()`: echo $name->getFullName(); // J. Peter M. Schluter ``` -### Setting Languages +### Setting Definitions (Languages) ```php $parser = new TheIconic\NameParser\Parser([ - new TheIconic\NameParser\Language\English(), //default - new TheIconic\NameParser\Language\German(), -]) + new TheIconic\NameParser\Definition\English\Basics(), //default + new TheIconic\NameParser\Definition\German\Basics(), + new TheIconic\NameParser\Definition\Configurable([...], [...], [...]), +]); ``` ### Setting nickname delimiters @@ -201,9 +202,9 @@ occur with different salutations, last name prefixes, suffixes etc. or in some cases even with the parsing order. To solve problems with salutations, last name prefixes and suffixes -you can create a separate language definition file and inject it when -instantiating the parser, see 'Setting Languages' above and compare -the existing language files as examples. +you can create a separate definition file and inject it when +instantiating the parser, see 'Setting Definitions' above and compare +the existing definition files as examples. To deal with parsing order you may want to reformat the input string, e.g. by simply splitting it into words and reversing their order. @@ -235,7 +236,7 @@ nick names from the input string and then use these to implement gender detection using another package (e.g. [this one](https://github.com/tuqqu/gender-detector)) or service. ### Having fun with normalisation -Writing different language files can not only be useful for parsing, +Writing different definition files can not only be useful for parsing, but you can remap the normalised versions of salutations, prefixes and suffixes to transform them into something totally different. @@ -248,6 +249,12 @@ gist. Of course this can also be used in more useful ways, e.g. to spell out abbreviated titles, like `Prof.` as `Professor` etc. . +### Dynamic definition classes +As the `Configurable` definition class shows, your custom definitions +do not have to be defined in fixed constants. You can implement definition +files that load them from a configuration file or from a database or from +an API. And of course you can combine multiple of those. + ## License THE ICONIC Name Parser library for PHP is released under the MIT License. diff --git a/src/Definition/Configurable.php b/src/Definition/Configurable.php new file mode 100644 index 0000000..5b286ae --- /dev/null +++ b/src/Definition/Configurable.php @@ -0,0 +1,72 @@ +salutations = $this->sanitize($salutations); + $this->suffixes = $this->sanitize($suffixes); + $this->lastnamePrefixes = $this->sanitize($lastnamePrefixes); + } + + public function addSalutations(array $salutations): void + { + $this->salutations = array_merge( + $this->salutations, + $this->sanitize($salutations) + ); + } + + public function addSuffixes(array $suffixes): void + { + $this->suffixes = array_merge( + $this->suffixes, + $this->sanitize($suffixes) + ); + } + + public function addLastnamePrefixes(array $lastnamePrefixes): void + { + $this->lastnamePrefixes = array_merge( + $this->lastnamePrefixes, + $this->sanitize($lastnamePrefixes) + ); + } + + public function getSalutations(): array + { + return $this->salutations; + } + + public function getSuffixes(): array + { + return $this->suffixes; + } + + public function getLastnamePrefixes(): array + { + return $this->lastnamePrefixes; + } + + private function sanitize(array $mappings): array + { + $sanitized = []; + + foreach ($mappings as $alias => $normalized) { + $sanitized[strtolower($alias)] = $normalized; + } + + return $sanitized; + } +} diff --git a/src/Language/English.php b/src/Definition/English/Basics.php similarity index 82% rename from src/Language/English.php rename to src/Definition/English/Basics.php index 08d9585..4647839 100644 --- a/src/Language/English.php +++ b/src/Definition/English/Basics.php @@ -1,10 +1,10 @@ '1st', @@ -35,8 +35,11 @@ class English implements LanguageInterface ]; const SALUTATIONS = [ + 'dame' => 'Dame', 'dr' => 'Dr.', 'fr' => 'Fr.', + 'lady' => 'Lady', + 'lord' => 'Lord', 'madam' => 'Madam', 'master' => 'Mr.', 'miss' => 'Miss', @@ -45,9 +48,14 @@ class English implements LanguageInterface 'mrs' => 'Mrs.', 'ms' => 'Ms.', 'mx' => 'Mx.', + 'pastor' => 'Pr.', + 'pr' => 'Pr.', 'rev' => 'Rev.', + 'reverend' => 'Rev.', + 'rt hon' => 'Rt. Hon.', 'sir' => 'Sir', 'prof' => 'Prof.', + 'professor' => 'Prof.', 'his honour' => 'His Honour', 'her honour' => 'Her Honour' ]; diff --git a/src/Definition/English/MilitaryRanks.php b/src/Definition/English/MilitaryRanks.php new file mode 100644 index 0000000..183eb0c --- /dev/null +++ b/src/Definition/English/MilitaryRanks.php @@ -0,0 +1,155 @@ + '1stSgt.', + '1stsgt' => '1stSgt.', + 'a1c' => 'A1C', + 'ab' => 'AB', + 'adm' => 'Adm.', + 'amn' => 'Amn.', + 'ccm' => 'CCM', + 'cdt' => 'Cdt.', + 'cmc' => 'CMC', + 'cmd' => 'Cmd.', + 'cmsaf' => 'CMSAF', + 'cmsgt' => 'CMSgt', + 'cpl' => 'Cpl.', + 'cpo' => 'CPO', + 'cpt' => 'Cpt.', + 'cptn' => 'Cpt.', + 'csm' => 'CSM', + 'ens' => 'Ens.', + 'esn' => 'Ens.', + 'fadm' => 'FAdm.', + 'flt' => '1stLt.', + 'fltmc' => 'FLTMC', + 'formc' => 'FORMC', + 'gen' => 'Gen.', + 'gysgt' => 'GySgt.', + 'lcpl' => 'LCpl.', + 'ltcmd' => 'LtCmd.', + 'ltgen' => 'LtGen.', + 'maj' => 'Maj.', + 'majgen' => 'MajGen.', + 'mcpo' => 'MCPO', + 'mcpo-cg' => 'MCPO-CG', + 'mcpon' => 'MCPON', + 'mgysgt' => 'MGySgt.', + 'msg' => 'MSgt.', + 'msgt' => 'MSgt.', + 'ocdt' => 'OCdt.', + 'pfc' => 'PFC', + 'po1' => 'PO1', + 'po2' => 'PO2', + 'po3' => 'PO3', + 'pv1' => 'Pvt.', + 'pv2' => 'Pvt.', + 'pvt' => 'Pvt.', + 'radm' => 'RAdm.', + 'sa' => 'SA', + 'scpo' => 'SCPO', + 'sfc' => 'SFC', + 'sgm' => 'SgtMaj.', + 'sgt' => 'Sgt.', + 'sgtmaj' => 'SgtMaj.', + 'sgtmajmc' => 'SgtMajMC', + 'slt' => '2ndLt.', + 'sma' => 'SMA', + 'smsgt' => 'SMSgt.', + 'sn' => 'Sn.', + 'spc' => 'Spc.', + 'sra' => 'SrA', + 'ssg' => 'SSgt.', + 'ssgt' => 'SSgt.', + 'tsgt' => 'TSgt.', + 'vadm' => 'VAdm.', + ]; + + const SUFFIXES = [ + '1sg' => '1SG', + '1stsgt' => '1SG', + 'a1c' => 'A1C', + 'ab' => 'AB', + 'adm' => 'ADM', + 'amn' => 'AMN', + 'ccm' => 'CCM', + 'cdt' => 'CDT', + 'cmc' => 'CMC', + 'cmd' => 'CMD', + 'cmsaf' => 'CMSAF', + 'cmsgt' => 'CMSGT', + 'cpl' => 'CPL', + 'cpo' => 'CPO', + 'cpt' => 'CPT', + 'cptn' => 'CPT', + 'csm' => 'CSM', + 'ens' => 'ENS', + 'esn' => 'ENS', + 'fadm' => 'FADM', + 'flt' => '1LT', + 'fltmc' => 'FLTMC', + 'formc' => 'FORMC', + 'gen' => 'GEN', + 'gysgt' => 'GYSGT', + 'lcpl' => 'LCPL', + 'ltcmd' => 'LTCMD', + 'ltgen' => 'LTGEN', + 'maj' => 'MAJ', + 'majgen' => 'MAJGEN', + 'mcpo' => 'MCPO', + 'mcpo-cg' => 'MCPO-CG', + 'mcpon' => 'MCPON', + 'mgysgt' => 'MGYSGT', + 'msg' => 'MSGT', + 'msgt' => 'MSGT', + 'ocdt' => 'OCDT', + 'pfc' => 'PFC', + 'po1' => 'PO1', + 'po2' => 'PO2', + 'po3' => 'PO3', + 'pv1' => '1PV', + 'pv2' => '2PV', + 'pvt' => 'PVT', + 'radm' => 'RADM', + 'sa' => 'SA', + 'scpo' => 'SCPO', + 'sfc' => 'SFC', + 'sgm' => 'SGTMAJ', + 'sgt' => 'SGT', + 'sgtmaj' => 'SGTMAJ', + 'sgtmajmc' => 'SGTMAJMC', + 'slt' => '2LT', + 'sma' => 'SMA', + 'smsgt' => 'SMSGT', + 'sn' => 'SN', + 'spc' => 'SPC', + 'sra' => 'SRA', + 'ssg' => 'SSGT', + 'ssgt' => 'SSGT', + 'tsgt' => 'TSGT', + 'vadm' => 'VADM', + ]; + + const LASTNAME_PREFIXES = []; + + public function getSuffixes(): array + { + return self::SUFFIXES; + } + + public function getSalutations(): array + { + return self::SALUTATIONS; + } + + public function getLastnamePrefixes(): array + { + return self::LASTNAME_PREFIXES; + } +} diff --git a/src/Language/German.php b/src/Definition/German/Basics.php similarity index 84% rename from src/Language/German.php rename to src/Definition/German/Basics.php index 491bb4c..d1b39f4 100644 --- a/src/Language/German.php +++ b/src/Definition/German/Basics.php @@ -1,10 +1,10 @@ '1.', diff --git a/src/LanguageInterface.php b/src/DefinitionInterface.php similarity index 85% rename from src/LanguageInterface.php rename to src/DefinitionInterface.php index d4d8bd8..f533430 100644 --- a/src/LanguageInterface.php +++ b/src/DefinitionInterface.php @@ -2,7 +2,7 @@ namespace TheIconic\NameParser; -interface LanguageInterface +interface DefinitionInterface { public function getSuffixes(): array; diff --git a/src/Mapper/AbstractMapper.php b/src/Mapper/AbstractMapper.php index 68811ca..a326033 100644 --- a/src/Mapper/AbstractMapper.php +++ b/src/Mapper/AbstractMapper.php @@ -3,7 +3,6 @@ namespace TheIconic\NameParser\Mapper; use TheIconic\NameParser\Part\AbstractPart; -use TheIconic\NameParser\Part\Nickname; abstract class AbstractMapper { diff --git a/src/Mapper/LastnameMapper.php b/src/Mapper/LastnameMapper.php index 43cfdc7..6afe427 100644 --- a/src/Mapper/LastnameMapper.php +++ b/src/Mapper/LastnameMapper.php @@ -2,7 +2,6 @@ namespace TheIconic\NameParser\Mapper; -use TheIconic\NameParser\LanguageInterface; use TheIconic\NameParser\Part\AbstractPart; use TheIconic\NameParser\Part\Lastname; use TheIconic\NameParser\Part\LastnamePrefix; diff --git a/src/Parser.php b/src/Parser.php index f2040b0..471b6f2 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,7 @@ namespace TheIconic\NameParser; -use TheIconic\NameParser\Language\English; +use TheIconic\NameParser\Definition\English\Basics; use TheIconic\NameParser\Mapper\NicknameMapper; use TheIconic\NameParser\Mapper\SalutationMapper; use TheIconic\NameParser\Mapper\SuffixMapper; @@ -26,7 +26,7 @@ class Parser /** * @var array */ - protected $languages = []; + protected $definitions = []; /** * @var array @@ -43,13 +43,13 @@ class Parser */ protected $maxCombinedInitials = 2; - public function __construct(array $languages = []) + public function __construct(array $definition = []) { - if (empty($languages)) { - $languages = [new English()]; + if (empty($definition)) { + $definition = [new Basics()]; } - $this->languages = $languages; + $this->definitions = $definition; } /** @@ -229,9 +229,9 @@ protected function getPrefixes() { $prefixes = []; - /** @var LanguageInterface $language */ - foreach ($this->languages as $language) { - $prefixes += $language->getLastnamePrefixes(); + /** @var DefinitionInterface $definition */ + foreach ($this->definitions as $definition) { + $prefixes += $definition->getLastnamePrefixes(); } return $prefixes; @@ -244,9 +244,9 @@ protected function getSuffixes() { $suffixes = []; - /** @var LanguageInterface $language */ - foreach ($this->languages as $language) { - $suffixes += $language->getSuffixes(); + /** @var DefinitionInterface $definition */ + foreach ($this->definitions as $definition) { + $suffixes += $definition->getSuffixes(); } return $suffixes; @@ -259,9 +259,9 @@ protected function getSalutations() { $salutations = []; - /** @var LanguageInterface $language */ - foreach ($this->languages as $language) { - $salutations += $language->getSalutations(); + /** @var DefinitionInterface $definition */ + foreach ($this->definitions as $definition) { + $salutations += $definition->getSalutations(); } return $salutations; diff --git a/tests/Definition/ConfigurableTest.php b/tests/Definition/ConfigurableTest.php new file mode 100644 index 0000000..f2838da --- /dev/null +++ b/tests/Definition/ConfigurableTest.php @@ -0,0 +1,63 @@ + 'Mr.', + 'Mrs' => 'Mrs.', + ], + [ + '1st' => '1st', + '2nd' => '2nd', + '3rd' => '3rd', + ], + [ + 'de' => 'de', + 'del' => 'del', + ] + ); + + $definition->addSalutations([ + 'Mrs' => 'Mrs.', + 'MS' => 'Ms.', + ]); + + $definition->addSuffixes([ + '3rd' => '3rd', + '4th' => '4th', + '5th' => '5th', + ]); + + $definition->addLastnamePrefixes([ + 'Del' => 'del', + 'du' => 'du', + ]); + + $this->assertEquals([ + 'mr' => 'Mr.', + 'mrs' => 'Mrs.', + 'ms' => 'Ms.', + ], $definition->getSalutations()); + + $this->assertEquals([ + '1st' => '1st', + '2nd' => '2nd', + '3rd' => '3rd', + '4th' => '4th', + '5th' => '5th', + ], $definition->getSuffixes()); + + $this->assertEquals([ + 'de' => 'de', + 'del' => 'del', + 'du' => 'du', + ], $definition->getLastnamePrefixes()); + } +} diff --git a/tests/GermanParserTest.php b/tests/GermanParserTest.php index 8e6641d..4c82b2a 100644 --- a/tests/GermanParserTest.php +++ b/tests/GermanParserTest.php @@ -3,7 +3,7 @@ namespace TheIconic\NameParser; use PHPUnit\Framework\TestCase; -use TheIconic\NameParser\Language\German; +use TheIconic\NameParser\Definition\German\Basics; class GermanParserTest extends TestCase { @@ -56,7 +56,7 @@ public function provider() public function testParse($input, $expectation) { $parser = new Parser([ - new German() + new Basics() ]); $name = $parser->parse($input); diff --git a/tests/Mapper/InitialMapperTest.php b/tests/Mapper/InitialMapperTest.php index 92448f6..8d57dba 100644 --- a/tests/Mapper/InitialMapperTest.php +++ b/tests/Mapper/InitialMapperTest.php @@ -2,7 +2,6 @@ namespace TheIconic\NameParser\Mapper; -use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Part\Initial; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Lastname; diff --git a/tests/Mapper/LastnameMapperTest.php b/tests/Mapper/LastnameMapperTest.php index 4c9b746..fa53032 100644 --- a/tests/Mapper/LastnameMapperTest.php +++ b/tests/Mapper/LastnameMapperTest.php @@ -2,7 +2,7 @@ namespace TheIconic\NameParser\Mapper; -use TheIconic\NameParser\Language\English; +use TheIconic\NameParser\Definition\English\Basics; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Firstname; use TheIconic\NameParser\Part\Lastname; @@ -120,7 +120,7 @@ public function provider() protected function getMapper($matchSingle = false) { - $english = new English(); + $english = new Basics(); return new LastnameMapper($english->getLastnamePrefixes(), $matchSingle); } diff --git a/tests/Mapper/SalutationMapperTest.php b/tests/Mapper/SalutationMapperTest.php index fd35c84..ba0a61c 100644 --- a/tests/Mapper/SalutationMapperTest.php +++ b/tests/Mapper/SalutationMapperTest.php @@ -2,10 +2,9 @@ namespace TheIconic\NameParser\Mapper; -use TheIconic\NameParser\Language\English; +use TheIconic\NameParser\Definition\English\Basics; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Firstname; -use TheIconic\NameParser\Part\Lastname; class SalutationMapperTest extends AbstractMapperTest { @@ -54,7 +53,7 @@ public function provider() protected function getMapper() { - $english = new English(); + $english = new Basics(); return new SalutationMapper($english->getSalutations()); } diff --git a/tests/Mapper/SuffixMapperTest.php b/tests/Mapper/SuffixMapperTest.php index 629aa64..45c911c 100644 --- a/tests/Mapper/SuffixMapperTest.php +++ b/tests/Mapper/SuffixMapperTest.php @@ -2,7 +2,7 @@ namespace TheIconic\NameParser\Mapper; -use TheIconic\NameParser\Language\English; +use TheIconic\NameParser\Definition\English\Basics; use TheIconic\NameParser\Part\Lastname; use TheIconic\NameParser\Part\Firstname; use TheIconic\NameParser\Part\Suffix; @@ -157,7 +157,7 @@ public function provider() protected function getMapper($matchSinglePart = false, $reservedParts = 2) { - $english = new English(); + $english = new Basics(); return new SuffixMapper($english->getSuffixes(), $matchSinglePart, $reservedParts); } diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 05e79ca..543ef8f 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -3,8 +3,9 @@ namespace TheIconic\NameParser; use PHPUnit\Framework\TestCase; -use TheIconic\NameParser\Language\English; -use TheIconic\NameParser\Language\German; +use TheIconic\NameParser\Definition\English\Basics as BasicEnglish; +use TheIconic\NameParser\Definition\English\MilitaryRanks as MilitaryEnglish; +use TheIconic\NameParser\Definition\German\Basics as German; class ParserTest extends TestCase { @@ -637,4 +638,15 @@ public function testParserAndSubparsersProperlyHandleLanguages() $this->assertSame('Herr', $parser->parse('Herr Schmidt')->getSalutation()); $this->assertSame('Herr', $parser->parse('Herr Schmidt, Bernd')->getSalutation()); } + + public function testParserCombinesMultipleDefinitions(): void + { + $parser = new Parser([ + new BasicEnglish(), + new MilitaryEnglish() + ]); + + $this->assertSame('Mr.', $parser->parse('Mr Brown')->getSalutation()); + $this->assertSame('Sgt.', $parser->parse('Sgt Montgomery Scott')->getSalutation()); + } }