diff --git a/src/Symfony/Component/Form/Util/FormUtil.php b/src/Symfony/Component/Form/Util/FormUtil.php index d4f6b087f8..284f02e1e7 100644 --- a/src/Symfony/Component/Form/Util/FormUtil.php +++ b/src/Symfony/Component/Form/Util/FormUtil.php @@ -16,6 +16,181 @@ namespace Symfony\Component\Form\Util; */ abstract class FormUtil { + const PLURAL_SUFFIX = 0; + + const PLURAL_SUFFIX_LENGTH = 1; + + const PLURAL_SUFFIX_AFTER_VOCAL = 2; + + const PLURAL_SUFFIX_AFTER_CONS = 3; + + const SINGULAR_SUFFIX = 4; + + /** + * Map english plural to singular suffixes + * + * @var array + * + * @see http://english-zone.com/spelling/plurals.html + * @see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English + */ + static private $pluralMap = array( + // First entry: plural suffix, reversed + // Second entry: length of plural suffix + // Third entry: Whether the suffix may succeed a vocal + // Fourth entry: Whether the suffix may succeed a consonant + // Fifth entry: singular suffix, normal + + // bacteria (bacterium), criteria (criterion), phenomena (phenomenon) + array('a', 1, true, true, array('on', 'um')), + + // nebulae (nebula) + array('ea', 2, true, true, 'a'), + + // mice (mouse), lice (louse) + array('eci', 3, false, true, 'ouse'), + + // geese (goose) + array('esee', 4, false, true, 'oose'), + + // fungi (fungus), alumni (alumnus), syllabi (syllabus), radii (radius) + array('i', 1, true, true, 'us'), + + // men (man), women (woman) + array('nem', 3, true, true, 'man'), + + // children (child) + array('nerdlihc', 8, true, true, 'child'), + + // oxen (ox) + array('nexo', 4, false, false, 'ox'), + + // indices (index), appendices (appendix) + array('seci', 4, false, true, array('ex', 'ix')), + + // babies (baby) + array('sei', 3, false, true, 'y'), + + // analyses (analysis), ellipses (ellipsis), funguses (fungus), + // neuroses (neurosis), theses (thesis), emphases (emphasis), + // oases (oasis), crises (crisis), houses (house), bases (base), + // atlases (atlas), kisses (kiss) + array('ses', 3, true, true, array('s', 'se', 'sis')), + + // lives (life), wives (wife) + array('sevi', 4, false, true, 'ife'), + + // hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf) + array('sev', 3, true, true, 'f'), + + // axes (axis) + array('sexa', 4, false, false, 'axis'), + + // indexes (index), matrixes (matrix) + array('sex', 3, true, false, 'x'), + + // quizzes (quiz) + array('sezz', 4, true, false, 'z'), + + // bureaus (bureau) + array('suae', 4, false, true, 'eau'), + + // roses (rose), garages (garage), cassettes (cassette), + // waltzes (waltz), heroes (hero), bushes (bush), arches (arch), + // shoes (shoe) + array('se', 2, true, true, array('', 'e')), + + // tags (tag) + array('s', 1, true, true, ''), + + // chateaux (chateau) + array('xuae', 4, false, true, 'eau'), + ); + + /** + * Returns the singular form of a word + * + * If the method can't determine the form with certainty, an array of the + * possible singulars is returned. + * + * @param string $plural A word in plural form + * @return string|array The singular form or an array of possible singular + * forms + */ + static public function singularify($plural) + { + $pluralRev = strrev($plural); + $lowerPluralRev = strtolower($pluralRev); + $pluralLength = strlen($lowerPluralRev); + + // The outer loop $i iterates over the entries of the plural table + // The inner loop $j iterates over the characters of the plural suffix + // in the plural table to compare them with the characters of the actual + // given plural suffix + for ($i = 0, $numPlurals = count(self::$pluralMap); $i < $numPlurals; ++$i) { + $suffix = self::$pluralMap[$i][self::PLURAL_SUFFIX]; + $suffixLength = self::$pluralMap[$i][self::PLURAL_SUFFIX_LENGTH]; + $j = 0; + + // Compare characters in the plural table and of the suffix of the + // given plural one by one + while ($suffix[$j] === $lowerPluralRev[$j]) { + // Let $j point to the next character + ++$j; + + // Successfully compared the last character + // Add an entry with the singular suffix to the singular array + if ($j === $suffixLength) { + // Is there any character preceding the suffix in the plural string? + if ($j < $pluralLength) { + $nextIsVocal = false !== strpos('aeiou', $lowerPluralRev[$j]); + + if (!self::$pluralMap[$i][self::PLURAL_SUFFIX_AFTER_VOCAL] && $nextIsVocal) { + break; + } + + if (!self::$pluralMap[$i][self::PLURAL_SUFFIX_AFTER_CONS] && !$nextIsVocal) { + break; + } + } + + $newBase = substr($plural, 0, $pluralLength - $suffixLength); + $newSuffix = self::$pluralMap[$i][self::SINGULAR_SUFFIX]; + + // Check whether the first character in the plural suffix + // is uppercased. If yes, uppercase the first character in + // the singular suffix too + $firstUpper = ctype_upper($pluralRev[$j - 1]); + + if (is_array($newSuffix)) { + $singulars = array(); + + foreach ($newSuffix as $newSuffixEntry) { + $singulars[] = $newBase . ($firstUpper ? ucfirst($newSuffixEntry) : $newSuffixEntry); + } + + return $singulars; + } + + return $newBase . ($firstUpper ? ucFirst($newSuffix) : $newSuffix); + } + + // Suffix is longer than word + if ($j === $pluralLength) { + break; + } + } + } + + // Convert teeth to tooth, feet to foot + if (false !== ($pos = strpos($plural, 'ee'))) { + return substr_replace($plural, 'oo', $pos, 2); + } + + // Assume that plural and singular is identical + return $plural; + } + /** * Returns whether the given choice is a group. * diff --git a/tests/Symfony/Tests/Component/Form/Util/FormUtilTest.php b/tests/Symfony/Tests/Component/Form/Util/FormUtilTest.php index 20e110a791..5fc1d0736c 100644 --- a/tests/Symfony/Tests/Component/Form/Util/FormUtilTest.php +++ b/tests/Symfony/Tests/Component/Form/Util/FormUtilTest.php @@ -77,4 +77,122 @@ class FormUtilTest extends \PHPUnit_Framework_TestCase { $this->assertSame($expected, FormUtil::isChoiceSelected($choice, $value)); } + + public function singularifyProvider() + { + // see http://english-zone.com/spelling/plurals.html + // see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English + return array( + array('tags', 'tag'), + array('alumni', 'alumnus'), + array('funguses', array('fungus', 'funguse', 'fungusis')), + array('fungi', 'fungus'), + array('axes', 'axis'), + array('appendices', array('appendex', 'appendix')), + array('indices', array('index', 'indix')), + array('indexes', 'index'), + array('children', 'child'), + array('men', 'man'), + array('women', 'woman'), + array('oxen', 'ox'), + array('bacteria', array('bacterion', 'bacterium')), + array('criteria', array('criterion', 'criterium')), + array('feet', 'foot'), + array('nebulae', 'nebula'), + array('babies', 'baby'), + array('hooves', 'hoof'), + array('chateaux', 'chateau'), + array('echoes', array('echo', 'echoe')), + array('analyses', array('analys', 'analyse', 'analysis')), + array('theses', array('thes', 'these', 'thesis')), + array('foci', 'focus'), + array('focuses', array('focus', 'focuse', 'focusis')), + array('oases', array('oas', 'oase', 'oasis')), + array('matrices', array('matrex', 'matrix')), + array('matrixes', 'matrix'), + array('bureaus', 'bureau'), + array('bureaux', 'bureau'), + array('beaux', 'beau'), + array('data', array('daton', 'datum')), + array('phenomena', array('phenomenon', 'phenomenum')), + array('strata', array('straton', 'stratum')), + array('geese', 'goose'), + array('teeth', 'tooth'), + array('antennae', 'antenna'), + array('antennas', 'antenna'), + array('houses', array('hous', 'house', 'housis')), + array('arches', array('arch', 'arche')), + array('atlases', array('atlas', 'atlase', 'atlasis')), +// array('axes', 'axe'), + array('batches', array('batch', 'batche')), + array('bushes', array('bush', 'bushe')), + array('buses', array('bus', 'buse', 'busis')), + array('calves', 'calf'), + array('circuses', array('circus', 'circuse', 'circusis')), + array('crises', array('cris', 'crise', 'crisis')), + array('dwarves', 'dwarf'), + array('elves', 'elf'), + array('emphases', array('emphas', 'emphase', 'emphasis')), + array('faxes', 'fax'), + array('halves', 'half'), + array('heroes', array('hero', 'heroe')), + array('hoaxes', 'hoax'), + array('irises', array('iris', 'irise', 'irisis')), + array('kisses', array('kiss', 'kisse', 'kissis')), + array('knives', 'knife'), + array('lives', 'life'), + array('lice', 'louse'), + array('mice', 'mouse'), + array('neuroses', array('neuros', 'neurose', 'neurosis')), + array('plateaux', 'plateau'), + array('poppies', 'poppy'), + array('quizzes', 'quiz'), + array('scarves', 'scarf'), + array('spies', 'spy'), + array('stories', 'story'), + array('syllabi', 'syllabus'), + array('thieves', 'thief'), + array('waltzes', array('waltz', 'waltze')), + array('wharves', 'wharf'), + array('wives', 'wife'), + array('ions', 'ion'), + array('bases', array('bas', 'base', 'basis')), + array('cars', 'car'), + array('cassettes', array('cassett', 'cassette')), + array('lamps', 'lamp'), + array('hats', 'hat'), + array('cups', 'cup'), + array('boxes', 'box'), + array('sandwiches', array('sandwich', 'sandwiche')), + array('suitcases', array('suitcas', 'suitcase', 'suitcasis')), + array('roses', array('ros', 'rose', 'rosis')), + array('garages', array('garag', 'garage')), + array('shoes', array('sho', 'shoe')), + array('days', 'day'), + array('boys', 'boy'), + array('roofs', 'roof'), + array('cliffs', 'cliff'), + array('sheriffs', 'sheriff'), + array('discos', 'disco'), + array('pianos', 'piano'), + array('photos', 'photo'), + array('trees', array('tre', 'tree')), + array('bees', array('be', 'bee')), + array('cheeses', array('chees', 'cheese', 'cheesis')), + array('radii', 'radius'), + + // test casing: if the first letter was uppercase, it should remain so + array('Men', 'Man'), + array('GrandChildren', 'GrandChild'), + array('SubTrees', array('SubTre', 'SubTree')), + ); + } + + /** + * @dataProvider singularifyProvider + */ + public function testSingularify($plural, $singular) + { + $this->assertEquals($singular, FormUtil::singularify($plural)); + } }