Support pluralization in the inflector
This commit is contained in:
parent
9a999553a5
commit
06920a79c4
@ -82,9 +82,6 @@ final class Inflector
|
||||
// news (news)
|
||||
array('swen', 4, true, true, 'news'),
|
||||
|
||||
// series (series)
|
||||
array('seires', 6, true, true, 'series'),
|
||||
|
||||
// babies (baby)
|
||||
array('sei', 3, false, true, 'y'),
|
||||
|
||||
@ -139,6 +136,179 @@ final class Inflector
|
||||
array('elpoep', 6, true, true, 'person'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Map English singular to plural suffixes.
|
||||
*
|
||||
* @var array
|
||||
*
|
||||
* @see http://english-zone.com/spelling/plurals.html
|
||||
*/
|
||||
private static $singularMap = array(
|
||||
// First entry: singular suffix, reversed
|
||||
// Second entry: length of singular suffix
|
||||
// Third entry: Whether the suffix may succeed a vocal
|
||||
// Fourth entry: Whether the suffix may succeed a consonant
|
||||
// Fifth entry: plural suffix, normal
|
||||
|
||||
// criterion (criteria)
|
||||
array('airetirc', 8, false, false, 'criterion'),
|
||||
|
||||
// nebulae (nebula)
|
||||
array('aluben', 6, false, false, 'nebulae'),
|
||||
|
||||
// children (child)
|
||||
array('dlihc', 5, true, true, 'children'),
|
||||
|
||||
// prices (price)
|
||||
array('eci', 3, false, true, 'ices'),
|
||||
|
||||
// services (service)
|
||||
array('ecivres', 7, true, true, 'services'),
|
||||
|
||||
// lives (life), wives (wife)
|
||||
array('efi', 3, false, true, 'ives'),
|
||||
|
||||
// selfies (selfie)
|
||||
array('eifles', 6, true, true, 'selfies'),
|
||||
|
||||
// movies (movie)
|
||||
array('eivom', 5, true, true, 'movies'),
|
||||
|
||||
// lice (louse)
|
||||
array('esuol', 5, false, true, 'lice'),
|
||||
|
||||
// mice (mouse)
|
||||
array('esuom', 5, false, true, 'mice'),
|
||||
|
||||
// geese (goose)
|
||||
array('esoo', 4, false, true, 'eese'),
|
||||
|
||||
// houses (house), bases (base)
|
||||
array('es', 2, true, true, 'ses'),
|
||||
|
||||
// geese (goose)
|
||||
array('esoog', 5, true, true, 'geese'),
|
||||
|
||||
// caves (cave)
|
||||
array('ev', 2, true, true, 'ves'),
|
||||
|
||||
// drives (drive)
|
||||
array('evird', 5, false, true, 'drives'),
|
||||
|
||||
// objectives (objective), alternative (alternatives)
|
||||
array('evit', 4, true, true, 'tives'),
|
||||
|
||||
// moves (move)
|
||||
array('evom', 4, true, true, 'moves'),
|
||||
|
||||
// staves (staff)
|
||||
array('ffats', 5, true, true, 'staves'),
|
||||
|
||||
// hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf)
|
||||
array('ff', 2, true, true, 'ffs'),
|
||||
|
||||
// hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf)
|
||||
array('f', 1, true, true, array('fs', 'ves')),
|
||||
|
||||
// arches (arch)
|
||||
array('hc', 2, true, true, 'ches'),
|
||||
|
||||
// bushes (bush)
|
||||
array('hs', 2, true, true, 'shes'),
|
||||
|
||||
// teeth (tooth)
|
||||
array('htoot', 5, true, true, 'teeth'),
|
||||
|
||||
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
|
||||
array('mu', 2, true, true, 'a'),
|
||||
|
||||
// echoes (echo)
|
||||
array('ohce', 4, true, true, 'echoes'),
|
||||
|
||||
// men (man), women (woman)
|
||||
array('nam', 3, true, true, 'men'),
|
||||
|
||||
// people (person)
|
||||
array('nosrep', 6, true, true, array('persons', 'people')),
|
||||
|
||||
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
|
||||
array('noi', 3, true, true, 'ions'),
|
||||
|
||||
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
|
||||
array('no', 2, true, true, 'a'),
|
||||
|
||||
// atlases (atlas)
|
||||
array('salta', 5, true, true, 'atlases'),
|
||||
|
||||
// irises (iris)
|
||||
array('siri', 4, true, true, 'irises'),
|
||||
|
||||
// analyses (analysis), ellipses (ellipsis), neuroses (neurosis)
|
||||
// theses (thesis), emphases (emphasis), oases (oasis),
|
||||
// crises (crisis)
|
||||
array('sis', 3, true, true, 'ses'),
|
||||
|
||||
// accesses (access), addresses (address), kisses (kiss)
|
||||
array('ss', 2, true, false, 'sses'),
|
||||
|
||||
// syllabi (syllabus)
|
||||
array('suballys', 8, true, true, 'syllabi'),
|
||||
|
||||
// buses (bus)
|
||||
array('sub', 3, true, true, 'buses'),
|
||||
|
||||
// fungi (fungus), alumni (alumnus), syllabi (syllabus), radii (radius)
|
||||
array('su', 2, true, true, 'i'),
|
||||
|
||||
// news (news)
|
||||
array('swen', 4, true, true, 'news'),
|
||||
|
||||
// feet (foot)
|
||||
array('toof', 4, true, true, 'feet'),
|
||||
|
||||
// chateaux (chateau), bureaus (bureau)
|
||||
array('uae', 3, false, true, array('eaus', 'eaux')),
|
||||
|
||||
// oxen (ox)
|
||||
array('xo', 2, false, false, 'oxen'),
|
||||
|
||||
// hoaxes (hoax)
|
||||
array('xaoh', 4, true, false, 'hoaxes'),
|
||||
|
||||
// indices (index)
|
||||
array('xedni', 5, false, true, array('indicies', 'indexes')),
|
||||
|
||||
// indexes (index), matrixes (matrix)
|
||||
array('x', 1, true, false, array('cies', 'xes')),
|
||||
|
||||
// appendices (appendix)
|
||||
array('xi', 2, false, true, 'ices'),
|
||||
|
||||
// babies (baby)
|
||||
array('y', 1, false, true, 'ies'),
|
||||
|
||||
// quizzes (quiz)
|
||||
array('ziuq', 4, true, false, 'quizzes'),
|
||||
|
||||
// waltzes (waltz)
|
||||
array('z', 1, true, false, 'zes'),
|
||||
);
|
||||
|
||||
/**
|
||||
* A list of words which should not be inflected
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $uninflected = array(
|
||||
'data',
|
||||
'deer',
|
||||
'feedback',
|
||||
'fish',
|
||||
'moose',
|
||||
'series',
|
||||
'sheep',
|
||||
);
|
||||
|
||||
/**
|
||||
* This class should not be instantiated.
|
||||
*/
|
||||
@ -165,6 +335,11 @@ final class Inflector
|
||||
$lowerPluralRev = strtolower($pluralRev);
|
||||
$pluralLength = strlen($lowerPluralRev);
|
||||
|
||||
// Check if the word is one which is not inflected, return early if so
|
||||
if (in_array(strtolower($plural), self::$uninflected, true)) {
|
||||
return $plural;
|
||||
}
|
||||
|
||||
// The outer loop iterates over the entries of the plural table
|
||||
// The inner loop $j iterates over the characters of the plural suffix
|
||||
// in the plural table to compare them with the characters of the actual
|
||||
@ -229,4 +404,94 @@ final class Inflector
|
||||
// Assume that plural and singular is identical
|
||||
return $plural;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the plural form of a word.
|
||||
*
|
||||
* If the method can't determine the form with certainty, an array of the
|
||||
* possible plurals is returned.
|
||||
*
|
||||
* @param string $singular A word in plural form
|
||||
*
|
||||
* @return string|array The plural form or an array of possible plural
|
||||
* forms
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public static function pluralize(string $singular)
|
||||
{
|
||||
$singularRev = strrev($singular);
|
||||
$lowerSingularRev = strtolower($singularRev);
|
||||
$singularLength = strlen($lowerSingularRev);
|
||||
|
||||
// Check if the word is one which is not inflected, return early if so
|
||||
if (in_array(strtolower($singular), self::$uninflected, true)) {
|
||||
return $singular;
|
||||
}
|
||||
|
||||
// The outer loop iterates over the entries of the singular table
|
||||
// The inner loop $j iterates over the characters of the singular suffix
|
||||
// in the singular table to compare them with the characters of the actual
|
||||
// given singular suffix
|
||||
foreach (self::$singularMap as $map) {
|
||||
$suffix = $map[0];
|
||||
$suffixLength = $map[1];
|
||||
$j = 0;
|
||||
|
||||
// Compare characters in the singular table and of the suffix of the
|
||||
// given plural one by one
|
||||
|
||||
while ($suffix[$j] === $lowerSingularRev[$j]) {
|
||||
// Let $j point to the next character
|
||||
++$j;
|
||||
|
||||
// Successfully compared the last character
|
||||
// Add an entry with the plural suffix to the plural array
|
||||
if ($j === $suffixLength) {
|
||||
// Is there any character preceding the suffix in the plural string?
|
||||
if ($j < $singularLength) {
|
||||
$nextIsVocal = false !== strpos('aeiou', $lowerSingularRev[$j]);
|
||||
|
||||
if (!$map[2] && $nextIsVocal) {
|
||||
// suffix may not succeed a vocal but next char is one
|
||||
break;
|
||||
}
|
||||
|
||||
if (!$map[3] && !$nextIsVocal) {
|
||||
// suffix may not succeed a consonant but next char is one
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$newBase = substr($singular, 0, $singularLength - $suffixLength);
|
||||
$newSuffix = $map[4];
|
||||
|
||||
// Check whether the first character in the singular suffix
|
||||
// is uppercased. If yes, uppercase the first character in
|
||||
// the singular suffix too
|
||||
$firstUpper = ctype_upper($singularRev[$j - 1]);
|
||||
|
||||
if (is_array($newSuffix)) {
|
||||
$plurals = array();
|
||||
|
||||
foreach ($newSuffix as $newSuffixEntry) {
|
||||
$plurals[] = $newBase.($firstUpper ? ucfirst($newSuffixEntry) : $newSuffixEntry);
|
||||
}
|
||||
|
||||
return $plurals;
|
||||
}
|
||||
|
||||
return $newBase.($firstUpper ? ucfirst($newSuffix) : $newSuffix);
|
||||
}
|
||||
|
||||
// Suffix is longer than word
|
||||
if ($j === $singularLength) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume that plural is singular with a trailing `s`
|
||||
return $singular.'s';
|
||||
}
|
||||
}
|
||||
|
@ -155,6 +155,130 @@ class InflectorTest extends TestCase
|
||||
);
|
||||
}
|
||||
|
||||
public function pluralizeProvider()
|
||||
{
|
||||
// see http://english-zone.com/spelling/plurals.html
|
||||
// see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English
|
||||
return array(
|
||||
array('access', 'accesses'),
|
||||
array('address', 'addresses'),
|
||||
array('agenda', 'agendas'),
|
||||
array('alumnus', 'alumni'),
|
||||
array('analysis', 'analyses'),
|
||||
array('antenna', 'antennas'), //antennae
|
||||
array('appendix', array('appendicies', 'appendixes')),
|
||||
array('arch', 'arches'),
|
||||
array('atlas', 'atlases'),
|
||||
array('axe', 'axes'),
|
||||
array('baby', 'babies'),
|
||||
array('bacterium', 'bacteria'),
|
||||
array('base', 'bases'),
|
||||
array('batch', 'batches'),
|
||||
array('beau', array('beaus', 'beaux')),
|
||||
array('bee', 'bees'),
|
||||
array('box', array('bocies', 'boxes')),
|
||||
array('boy', 'boys'),
|
||||
array('bureau', array('bureaus', 'bureaux')),
|
||||
array('bus', 'buses'),
|
||||
array('bush', 'bushes'),
|
||||
array('calf', array('calfs', 'calves')),
|
||||
array('car', 'cars'),
|
||||
array('cassette', 'cassettes'),
|
||||
array('cave', 'caves'),
|
||||
array('chateau', array('chateaus', 'chateaux')),
|
||||
array('cheese', 'cheeses'),
|
||||
array('child', 'children'),
|
||||
array('circus', 'circuses'),
|
||||
array('cliff', 'cliffs'),
|
||||
array('committee', 'committees'),
|
||||
array('crisis', 'crises'),
|
||||
array('criteria', 'criterion'),
|
||||
array('cup', 'cups'),
|
||||
array('data', 'data'),
|
||||
array('day', 'days'),
|
||||
array('disco', 'discos'),
|
||||
array('device', 'devices'),
|
||||
array('drive', 'drives'),
|
||||
array('driver', 'drivers'),
|
||||
array('dwarf', array('dwarfs', 'dwarves')),
|
||||
array('echo', 'echoes'),
|
||||
array('elf', array('elfs', 'elves')),
|
||||
array('emphasis', 'emphases'),
|
||||
array('fax', array('facies', 'faxes')),
|
||||
array('feedback', 'feedback'),
|
||||
array('focus', 'foci'),
|
||||
array('foot', 'feet'),
|
||||
array('formula', 'formulas'), //formulae
|
||||
array('fungus', 'fungi'),
|
||||
array('garage', 'garages'),
|
||||
array('goose', 'geese'),
|
||||
array('half', array('halfs', 'halves')),
|
||||
array('hat', 'hats'),
|
||||
array('hero', 'heroes'),
|
||||
array('hippopotamus', 'hippopotami'), //hippopotamuses
|
||||
array('hoax', 'hoaxes'),
|
||||
array('hoof', array('hoofs', 'hooves')),
|
||||
array('house', 'houses'),
|
||||
array('index', array('indicies', 'indexes')),
|
||||
array('ion', 'ions'),
|
||||
array('iris', 'irises'),
|
||||
array('kiss', 'kisses'),
|
||||
array('knife', 'knives'),
|
||||
array('lamp', 'lamps'),
|
||||
array('leaf', array('leafs', 'leaves')),
|
||||
array('life', 'lives'),
|
||||
array('louse', 'lice'),
|
||||
array('man', 'men'),
|
||||
array('matrix', array('matricies', 'matrixes')),
|
||||
array('mouse', 'mice'),
|
||||
array('move', 'moves'),
|
||||
array('movie', 'movies'),
|
||||
array('nebula', 'nebulae'),
|
||||
array('neurosis', 'neuroses'),
|
||||
array('news', 'news'),
|
||||
array('oasis', 'oases'),
|
||||
array('objective', 'objectives'),
|
||||
array('ox', 'oxen'),
|
||||
array('party', 'parties'),
|
||||
array('person', array('persons', 'people')),
|
||||
array('phenomenon', 'phenomena'),
|
||||
array('photo', 'photos'),
|
||||
array('piano', 'pianos'),
|
||||
array('plateau', array('plateaus', 'plateaux')),
|
||||
array('poppy', 'poppies'),
|
||||
array('price', 'prices'),
|
||||
array('quiz', 'quizzes'),
|
||||
array('radius', 'radii'),
|
||||
array('roof', array('roofs', 'rooves')),
|
||||
array('rose', 'roses'),
|
||||
array('sandwich', 'sandwiches'),
|
||||
array('scarf', array('scarfs', 'scarves')),
|
||||
array('schema', 'schemas'), //schemata
|
||||
array('selfie', 'selfies'),
|
||||
array('series', 'series'),
|
||||
array('service', 'services'),
|
||||
array('sheriff', 'sheriffs'),
|
||||
array('shoe', 'shoes'),
|
||||
array('spy', 'spies'),
|
||||
array('staff', 'staves'),
|
||||
array('story', 'stories'),
|
||||
array('stratum', 'strata'),
|
||||
array('suitcase', 'suitcases'),
|
||||
array('syllabus', 'syllabi'),
|
||||
array('tag', 'tags'),
|
||||
array('thief', array('thiefs', 'thieves')),
|
||||
array('tooth', 'teeth'),
|
||||
array('tree', 'trees'),
|
||||
array('waltz', 'waltzes'),
|
||||
array('wife', 'wives'),
|
||||
|
||||
// test casing: if the first letter was uppercase, it should remain so
|
||||
array('Man', 'Men'),
|
||||
array('GrandChild', 'GrandChildren'),
|
||||
array('SubTree', 'SubTrees'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider singularizeProvider
|
||||
*/
|
||||
@ -169,4 +293,19 @@ class InflectorTest extends TestCase
|
||||
|
||||
$this->assertEquals($singular, $single);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider pluralizeProvider
|
||||
*/
|
||||
public function testPluralize($plural, $singular)
|
||||
{
|
||||
$single = Inflector::pluralize($plural);
|
||||
if (is_string($singular) && is_array($single)) {
|
||||
$this->fail("--- Expected\n`string`: ".$singular."\n+++ Actual\n`array`: ".implode(', ', $single));
|
||||
} elseif (is_array($singular) && is_string($single)) {
|
||||
$this->fail("--- Expected\n`array`: ".implode(', ', $singular)."\n+++ Actual\n`string`: ".$single);
|
||||
}
|
||||
|
||||
$this->assertEquals($singular, $single);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user