feature #26890 [Inflector] Support pluralization in the inflector (mbabker)

This PR was merged into the 4.3-dev branch.

Discussion
----------

[Inflector] Support pluralization in the inflector

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | no
| New feature?  | yes
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | not yet
| Fixed tickets | N/A
| License       | MIT
| Doc PR        | Not Yet

At present the inflector only supports singularizing plural words, this PR adds the capability to pluralize singular words.

Commits
-------

06920a79c4 Support pluralization in the inflector
This commit is contained in:
Fabien Potencier 2019-03-31 19:29:55 +02:00
commit dad5b01b9c
2 changed files with 407 additions and 0 deletions

View File

@ -139,6 +139,179 @@ final class Inflector
['elpoep', 6, true, true, 'person'],
];
/**
* Map English singular to plural suffixes.
*
* @var array
*
* @see http://english-zone.com/spelling/plurals.html
*/
private static $singularMap = array(
// First entry: singular suffix, reversed
// Second entry: length of singular suffix
// Third entry: Whether the suffix may succeed a vocal
// Fourth entry: Whether the suffix may succeed a consonant
// Fifth entry: plural suffix, normal
// criterion (criteria)
array('airetirc', 8, false, false, 'criterion'),
// nebulae (nebula)
array('aluben', 6, false, false, 'nebulae'),
// children (child)
array('dlihc', 5, true, true, 'children'),
// prices (price)
array('eci', 3, false, true, 'ices'),
// services (service)
array('ecivres', 7, true, true, 'services'),
// lives (life), wives (wife)
array('efi', 3, false, true, 'ives'),
// selfies (selfie)
array('eifles', 6, true, true, 'selfies'),
// movies (movie)
array('eivom', 5, true, true, 'movies'),
// lice (louse)
array('esuol', 5, false, true, 'lice'),
// mice (mouse)
array('esuom', 5, false, true, 'mice'),
// geese (goose)
array('esoo', 4, false, true, 'eese'),
// houses (house), bases (base)
array('es', 2, true, true, 'ses'),
// geese (goose)
array('esoog', 5, true, true, 'geese'),
// caves (cave)
array('ev', 2, true, true, 'ves'),
// drives (drive)
array('evird', 5, false, true, 'drives'),
// objectives (objective), alternative (alternatives)
array('evit', 4, true, true, 'tives'),
// moves (move)
array('evom', 4, true, true, 'moves'),
// staves (staff)
array('ffats', 5, true, true, 'staves'),
// hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf)
array('ff', 2, true, true, 'ffs'),
// hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf)
array('f', 1, true, true, array('fs', 'ves')),
// arches (arch)
array('hc', 2, true, true, 'ches'),
// bushes (bush)
array('hs', 2, true, true, 'shes'),
// teeth (tooth)
array('htoot', 5, true, true, 'teeth'),
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
array('mu', 2, true, true, 'a'),
// echoes (echo)
array('ohce', 4, true, true, 'echoes'),
// men (man), women (woman)
array('nam', 3, true, true, 'men'),
// people (person)
array('nosrep', 6, true, true, array('persons', 'people')),
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
array('noi', 3, true, true, 'ions'),
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
array('no', 2, true, true, 'a'),
// atlases (atlas)
array('salta', 5, true, true, 'atlases'),
// irises (iris)
array('siri', 4, true, true, 'irises'),
// analyses (analysis), ellipses (ellipsis), neuroses (neurosis)
// theses (thesis), emphases (emphasis), oases (oasis),
// crises (crisis)
array('sis', 3, true, true, 'ses'),
// accesses (access), addresses (address), kisses (kiss)
array('ss', 2, true, false, 'sses'),
// syllabi (syllabus)
array('suballys', 8, true, true, 'syllabi'),
// buses (bus)
array('sub', 3, true, true, 'buses'),
// fungi (fungus), alumni (alumnus), syllabi (syllabus), radii (radius)
array('su', 2, true, true, 'i'),
// news (news)
array('swen', 4, true, true, 'news'),
// feet (foot)
array('toof', 4, true, true, 'feet'),
// chateaux (chateau), bureaus (bureau)
array('uae', 3, false, true, array('eaus', 'eaux')),
// oxen (ox)
array('xo', 2, false, false, 'oxen'),
// hoaxes (hoax)
array('xaoh', 4, true, false, 'hoaxes'),
// indices (index)
array('xedni', 5, false, true, array('indicies', 'indexes')),
// indexes (index), matrixes (matrix)
array('x', 1, true, false, array('cies', 'xes')),
// appendices (appendix)
array('xi', 2, false, true, 'ices'),
// babies (baby)
array('y', 1, false, true, 'ies'),
// quizzes (quiz)
array('ziuq', 4, true, false, 'quizzes'),
// waltzes (waltz)
array('z', 1, true, false, 'zes'),
);
/**
* A list of words which should not be inflected
*
* @var array
*/
private static $uninflected = array(
'data',
'deer',
'feedback',
'fish',
'moose',
'series',
'sheep',
);
/**
* This class should not be instantiated.
*/
@ -165,6 +338,11 @@ final class Inflector
$lowerPluralRev = strtolower($pluralRev);
$pluralLength = \strlen($lowerPluralRev);
// Check if the word is one which is not inflected, return early if so
if (in_array(strtolower($plural), self::$uninflected, true)) {
return $plural;
}
// The outer loop iterates over the entries of the plural table
// The inner loop $j iterates over the characters of the plural suffix
// in the plural table to compare them with the characters of the actual
@ -229,4 +407,94 @@ final class Inflector
// Assume that plural and singular is identical
return $plural;
}
/**
* Returns the plural form of a word.
*
* If the method can't determine the form with certainty, an array of the
* possible plurals is returned.
*
* @param string $singular A word in plural form
*
* @return string|array The plural form or an array of possible plural
* forms
*
* @internal
*/
public static function pluralize(string $singular)
{
$singularRev = strrev($singular);
$lowerSingularRev = strtolower($singularRev);
$singularLength = strlen($lowerSingularRev);
// Check if the word is one which is not inflected, return early if so
if (in_array(strtolower($singular), self::$uninflected, true)) {
return $singular;
}
// The outer loop iterates over the entries of the singular table
// The inner loop $j iterates over the characters of the singular suffix
// in the singular table to compare them with the characters of the actual
// given singular suffix
foreach (self::$singularMap as $map) {
$suffix = $map[0];
$suffixLength = $map[1];
$j = 0;
// Compare characters in the singular table and of the suffix of the
// given plural one by one
while ($suffix[$j] === $lowerSingularRev[$j]) {
// Let $j point to the next character
++$j;
// Successfully compared the last character
// Add an entry with the plural suffix to the plural array
if ($j === $suffixLength) {
// Is there any character preceding the suffix in the plural string?
if ($j < $singularLength) {
$nextIsVocal = false !== strpos('aeiou', $lowerSingularRev[$j]);
if (!$map[2] && $nextIsVocal) {
// suffix may not succeed a vocal but next char is one
break;
}
if (!$map[3] && !$nextIsVocal) {
// suffix may not succeed a consonant but next char is one
break;
}
}
$newBase = substr($singular, 0, $singularLength - $suffixLength);
$newSuffix = $map[4];
// Check whether the first character in the singular suffix
// is uppercased. If yes, uppercase the first character in
// the singular suffix too
$firstUpper = ctype_upper($singularRev[$j - 1]);
if (is_array($newSuffix)) {
$plurals = array();
foreach ($newSuffix as $newSuffixEntry) {
$plurals[] = $newBase.($firstUpper ? ucfirst($newSuffixEntry) : $newSuffixEntry);
}
return $plurals;
}
return $newBase.($firstUpper ? ucfirst($newSuffix) : $newSuffix);
}
// Suffix is longer than word
if ($j === $singularLength) {
break;
}
}
}
// Assume that plural is singular with a trailing `s`
return $singular.'s';
}
}

View File

@ -155,6 +155,130 @@ class InflectorTest extends TestCase
];
}
public function pluralizeProvider()
{
// see http://english-zone.com/spelling/plurals.html
// see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English
return array(
array('access', 'accesses'),
array('address', 'addresses'),
array('agenda', 'agendas'),
array('alumnus', 'alumni'),
array('analysis', 'analyses'),
array('antenna', 'antennas'), //antennae
array('appendix', array('appendicies', 'appendixes')),
array('arch', 'arches'),
array('atlas', 'atlases'),
array('axe', 'axes'),
array('baby', 'babies'),
array('bacterium', 'bacteria'),
array('base', 'bases'),
array('batch', 'batches'),
array('beau', array('beaus', 'beaux')),
array('bee', 'bees'),
array('box', array('bocies', 'boxes')),
array('boy', 'boys'),
array('bureau', array('bureaus', 'bureaux')),
array('bus', 'buses'),
array('bush', 'bushes'),
array('calf', array('calfs', 'calves')),
array('car', 'cars'),
array('cassette', 'cassettes'),
array('cave', 'caves'),
array('chateau', array('chateaus', 'chateaux')),
array('cheese', 'cheeses'),
array('child', 'children'),
array('circus', 'circuses'),
array('cliff', 'cliffs'),
array('committee', 'committees'),
array('crisis', 'crises'),
array('criteria', 'criterion'),
array('cup', 'cups'),
array('data', 'data'),
array('day', 'days'),
array('disco', 'discos'),
array('device', 'devices'),
array('drive', 'drives'),
array('driver', 'drivers'),
array('dwarf', array('dwarfs', 'dwarves')),
array('echo', 'echoes'),
array('elf', array('elfs', 'elves')),
array('emphasis', 'emphases'),
array('fax', array('facies', 'faxes')),
array('feedback', 'feedback'),
array('focus', 'foci'),
array('foot', 'feet'),
array('formula', 'formulas'), //formulae
array('fungus', 'fungi'),
array('garage', 'garages'),
array('goose', 'geese'),
array('half', array('halfs', 'halves')),
array('hat', 'hats'),
array('hero', 'heroes'),
array('hippopotamus', 'hippopotami'), //hippopotamuses
array('hoax', 'hoaxes'),
array('hoof', array('hoofs', 'hooves')),
array('house', 'houses'),
array('index', array('indicies', 'indexes')),
array('ion', 'ions'),
array('iris', 'irises'),
array('kiss', 'kisses'),
array('knife', 'knives'),
array('lamp', 'lamps'),
array('leaf', array('leafs', 'leaves')),
array('life', 'lives'),
array('louse', 'lice'),
array('man', 'men'),
array('matrix', array('matricies', 'matrixes')),
array('mouse', 'mice'),
array('move', 'moves'),
array('movie', 'movies'),
array('nebula', 'nebulae'),
array('neurosis', 'neuroses'),
array('news', 'news'),
array('oasis', 'oases'),
array('objective', 'objectives'),
array('ox', 'oxen'),
array('party', 'parties'),
array('person', array('persons', 'people')),
array('phenomenon', 'phenomena'),
array('photo', 'photos'),
array('piano', 'pianos'),
array('plateau', array('plateaus', 'plateaux')),
array('poppy', 'poppies'),
array('price', 'prices'),
array('quiz', 'quizzes'),
array('radius', 'radii'),
array('roof', array('roofs', 'rooves')),
array('rose', 'roses'),
array('sandwich', 'sandwiches'),
array('scarf', array('scarfs', 'scarves')),
array('schema', 'schemas'), //schemata
array('selfie', 'selfies'),
array('series', 'series'),
array('service', 'services'),
array('sheriff', 'sheriffs'),
array('shoe', 'shoes'),
array('spy', 'spies'),
array('staff', 'staves'),
array('story', 'stories'),
array('stratum', 'strata'),
array('suitcase', 'suitcases'),
array('syllabus', 'syllabi'),
array('tag', 'tags'),
array('thief', array('thiefs', 'thieves')),
array('tooth', 'teeth'),
array('tree', 'trees'),
array('waltz', 'waltzes'),
array('wife', 'wives'),
// test casing: if the first letter was uppercase, it should remain so
array('Man', 'Men'),
array('GrandChild', 'GrandChildren'),
array('SubTree', 'SubTrees'),
);
}
/**
* @dataProvider singularizeProvider
*/
@ -169,4 +293,19 @@ class InflectorTest extends TestCase
$this->assertEquals($singular, $single);
}
/**
* @dataProvider pluralizeProvider
*/
public function testPluralize($plural, $singular)
{
$single = Inflector::pluralize($plural);
if (is_string($singular) && is_array($single)) {
$this->fail("--- Expected\n`string`: ".$singular."\n+++ Actual\n`array`: ".implode(', ', $single));
} elseif (is_array($singular) && is_string($single)) {
$this->fail("--- Expected\n`array`: ".implode(', ', $singular)."\n+++ Actual\n`string`: ".$single);
}
$this->assertEquals($singular, $single);
}
}