[Form] Added FormUtil::singularify()

This commit is contained in:
Bernhard Schussek 2012-02-01 17:16:33 +01:00
parent 8245bf13d5
commit 7837f50c73
2 changed files with 293 additions and 0 deletions

View File

@ -16,6 +16,181 @@ namespace Symfony\Component\Form\Util;
*/
abstract class FormUtil
{
const PLURAL_SUFFIX = 0;
const PLURAL_SUFFIX_LENGTH = 1;
const PLURAL_SUFFIX_AFTER_VOCAL = 2;
const PLURAL_SUFFIX_AFTER_CONS = 3;
const SINGULAR_SUFFIX = 4;
/**
* Map english plural to singular suffixes
*
* @var array
*
* @see http://english-zone.com/spelling/plurals.html
* @see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English
*/
static private $pluralMap = array(
// First entry: plural suffix, reversed
// Second entry: length of plural suffix
// Third entry: Whether the suffix may succeed a vocal
// Fourth entry: Whether the suffix may succeed a consonant
// Fifth entry: singular suffix, normal
// bacteria (bacterium), criteria (criterion), phenomena (phenomenon)
array('a', 1, true, true, array('on', 'um')),
// nebulae (nebula)
array('ea', 2, true, true, 'a'),
// mice (mouse), lice (louse)
array('eci', 3, false, true, 'ouse'),
// geese (goose)
array('esee', 4, false, true, 'oose'),
// fungi (fungus), alumni (alumnus), syllabi (syllabus), radii (radius)
array('i', 1, true, true, 'us'),
// men (man), women (woman)
array('nem', 3, true, true, 'man'),
// children (child)
array('nerdlihc', 8, true, true, 'child'),
// oxen (ox)
array('nexo', 4, false, false, 'ox'),
// indices (index), appendices (appendix)
array('seci', 4, false, true, array('ex', 'ix')),
// babies (baby)
array('sei', 3, false, true, 'y'),
// analyses (analysis), ellipses (ellipsis), funguses (fungus),
// neuroses (neurosis), theses (thesis), emphases (emphasis),
// oases (oasis), crises (crisis), houses (house), bases (base),
// atlases (atlas), kisses (kiss)
array('ses', 3, true, true, array('s', 'se', 'sis')),
// lives (life), wives (wife)
array('sevi', 4, false, true, 'ife'),
// hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf)
array('sev', 3, true, true, 'f'),
// axes (axis)
array('sexa', 4, false, false, 'axis'),
// indexes (index), matrixes (matrix)
array('sex', 3, true, false, 'x'),
// quizzes (quiz)
array('sezz', 4, true, false, 'z'),
// bureaus (bureau)
array('suae', 4, false, true, 'eau'),
// roses (rose), garages (garage), cassettes (cassette),
// waltzes (waltz), heroes (hero), bushes (bush), arches (arch),
// shoes (shoe)
array('se', 2, true, true, array('', 'e')),
// tags (tag)
array('s', 1, true, true, ''),
// chateaux (chateau)
array('xuae', 4, false, true, 'eau'),
);
/**
* Returns the singular form of a word
*
* If the method can't determine the form with certainty, an array of the
* possible singulars is returned.
*
* @param string $plural A word in plural form
* @return string|array The singular form or an array of possible singular
* forms
*/
static public function singularify($plural)
{
$pluralRev = strrev($plural);
$lowerPluralRev = strtolower($pluralRev);
$pluralLength = strlen($lowerPluralRev);
// The outer loop $i iterates over the entries of the plural table
// The inner loop $j iterates over the characters of the plural suffix
// in the plural table to compare them with the characters of the actual
// given plural suffix
for ($i = 0, $numPlurals = count(self::$pluralMap); $i < $numPlurals; ++$i) {
$suffix = self::$pluralMap[$i][self::PLURAL_SUFFIX];
$suffixLength = self::$pluralMap[$i][self::PLURAL_SUFFIX_LENGTH];
$j = 0;
// Compare characters in the plural table and of the suffix of the
// given plural one by one
while ($suffix[$j] === $lowerPluralRev[$j]) {
// Let $j point to the next character
++$j;
// Successfully compared the last character
// Add an entry with the singular suffix to the singular array
if ($j === $suffixLength) {
// Is there any character preceding the suffix in the plural string?
if ($j < $pluralLength) {
$nextIsVocal = false !== strpos('aeiou', $lowerPluralRev[$j]);
if (!self::$pluralMap[$i][self::PLURAL_SUFFIX_AFTER_VOCAL] && $nextIsVocal) {
break;
}
if (!self::$pluralMap[$i][self::PLURAL_SUFFIX_AFTER_CONS] && !$nextIsVocal) {
break;
}
}
$newBase = substr($plural, 0, $pluralLength - $suffixLength);
$newSuffix = self::$pluralMap[$i][self::SINGULAR_SUFFIX];
// Check whether the first character in the plural suffix
// is uppercased. If yes, uppercase the first character in
// the singular suffix too
$firstUpper = ctype_upper($pluralRev[$j - 1]);
if (is_array($newSuffix)) {
$singulars = array();
foreach ($newSuffix as $newSuffixEntry) {
$singulars[] = $newBase . ($firstUpper ? ucfirst($newSuffixEntry) : $newSuffixEntry);
}
return $singulars;
}
return $newBase . ($firstUpper ? ucFirst($newSuffix) : $newSuffix);
}
// Suffix is longer than word
if ($j === $pluralLength) {
break;
}
}
}
// Convert teeth to tooth, feet to foot
if (false !== ($pos = strpos($plural, 'ee'))) {
return substr_replace($plural, 'oo', $pos, 2);
}
// Assume that plural and singular is identical
return $plural;
}
/**
* Returns whether the given choice is a group.
*

View File

@ -77,4 +77,122 @@ class FormUtilTest extends \PHPUnit_Framework_TestCase
{
$this->assertSame($expected, FormUtil::isChoiceSelected($choice, $value));
}
public function singularifyProvider()
{
// see http://english-zone.com/spelling/plurals.html
// see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English
return array(
array('tags', 'tag'),
array('alumni', 'alumnus'),
array('funguses', array('fungus', 'funguse', 'fungusis')),
array('fungi', 'fungus'),
array('axes', 'axis'),
array('appendices', array('appendex', 'appendix')),
array('indices', array('index', 'indix')),
array('indexes', 'index'),
array('children', 'child'),
array('men', 'man'),
array('women', 'woman'),
array('oxen', 'ox'),
array('bacteria', array('bacterion', 'bacterium')),
array('criteria', array('criterion', 'criterium')),
array('feet', 'foot'),
array('nebulae', 'nebula'),
array('babies', 'baby'),
array('hooves', 'hoof'),
array('chateaux', 'chateau'),
array('echoes', array('echo', 'echoe')),
array('analyses', array('analys', 'analyse', 'analysis')),
array('theses', array('thes', 'these', 'thesis')),
array('foci', 'focus'),
array('focuses', array('focus', 'focuse', 'focusis')),
array('oases', array('oas', 'oase', 'oasis')),
array('matrices', array('matrex', 'matrix')),
array('matrixes', 'matrix'),
array('bureaus', 'bureau'),
array('bureaux', 'bureau'),
array('beaux', 'beau'),
array('data', array('daton', 'datum')),
array('phenomena', array('phenomenon', 'phenomenum')),
array('strata', array('straton', 'stratum')),
array('geese', 'goose'),
array('teeth', 'tooth'),
array('antennae', 'antenna'),
array('antennas', 'antenna'),
array('houses', array('hous', 'house', 'housis')),
array('arches', array('arch', 'arche')),
array('atlases', array('atlas', 'atlase', 'atlasis')),
// array('axes', 'axe'),
array('batches', array('batch', 'batche')),
array('bushes', array('bush', 'bushe')),
array('buses', array('bus', 'buse', 'busis')),
array('calves', 'calf'),
array('circuses', array('circus', 'circuse', 'circusis')),
array('crises', array('cris', 'crise', 'crisis')),
array('dwarves', 'dwarf'),
array('elves', 'elf'),
array('emphases', array('emphas', 'emphase', 'emphasis')),
array('faxes', 'fax'),
array('halves', 'half'),
array('heroes', array('hero', 'heroe')),
array('hoaxes', 'hoax'),
array('irises', array('iris', 'irise', 'irisis')),
array('kisses', array('kiss', 'kisse', 'kissis')),
array('knives', 'knife'),
array('lives', 'life'),
array('lice', 'louse'),
array('mice', 'mouse'),
array('neuroses', array('neuros', 'neurose', 'neurosis')),
array('plateaux', 'plateau'),
array('poppies', 'poppy'),
array('quizzes', 'quiz'),
array('scarves', 'scarf'),
array('spies', 'spy'),
array('stories', 'story'),
array('syllabi', 'syllabus'),
array('thieves', 'thief'),
array('waltzes', array('waltz', 'waltze')),
array('wharves', 'wharf'),
array('wives', 'wife'),
array('ions', 'ion'),
array('bases', array('bas', 'base', 'basis')),
array('cars', 'car'),
array('cassettes', array('cassett', 'cassette')),
array('lamps', 'lamp'),
array('hats', 'hat'),
array('cups', 'cup'),
array('boxes', 'box'),
array('sandwiches', array('sandwich', 'sandwiche')),
array('suitcases', array('suitcas', 'suitcase', 'suitcasis')),
array('roses', array('ros', 'rose', 'rosis')),
array('garages', array('garag', 'garage')),
array('shoes', array('sho', 'shoe')),
array('days', 'day'),
array('boys', 'boy'),
array('roofs', 'roof'),
array('cliffs', 'cliff'),
array('sheriffs', 'sheriff'),
array('discos', 'disco'),
array('pianos', 'piano'),
array('photos', 'photo'),
array('trees', array('tre', 'tree')),
array('bees', array('be', 'bee')),
array('cheeses', array('chees', 'cheese', 'cheesis')),
array('radii', 'radius'),
// test casing: if the first letter was uppercase, it should remain so
array('Men', 'Man'),
array('GrandChildren', 'GrandChild'),
array('SubTrees', array('SubTre', 'SubTree')),
);
}
/**
* @dataProvider singularifyProvider
*/
public function testSingularify($plural, $singular)
{
$this->assertEquals($singular, FormUtil::singularify($plural));
}
}