feature #36016 [Translation] Add a pseudo localization translator (fancyweb)

This PR was merged into the 5.2-dev branch.

Discussion
----------

[Translation] Add a pseudo localization translator

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | no
| New feature?  | yes
| Deprecations? | no
| Tickets       | https://github.com/symfony/symfony/issues/35666
| License       | MIT
| Doc PR        | TODO

This PR introduces a new translator to be able to test apps with pseudo localization (check the related issue).

The `PseudoLocalizationTranslator` decorates another translator and then alter the translated string. There are 5 options:

- accents:
     - type: boolean
     - default: true
     - description: replace ASCII characters of the translated string with accented versions or similar characters
     - example: if true, `foo` => `ƒöö`

- expansion_factor:
     - type: float
     - default: 1
     - validation: it must be greater than or equal to 1
     - description: expand the translated string by the given factor with spaces and tildes
     example: if 2, `foo` => `~foo ~`

- brackets:
     - type: boolean
     - default: true
     - description: wrap the translated string with brackets
     - example: if true, `foo` => `[foo]`

- parse_html:
     - type: boolean
     - default: false
     - description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
     - warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, `foo <div>bar` => `foo <div>bar</div>`

- localizable_html_attributes:
     - type: string[]
     - default: []
     - description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
     - example: if ["title"], and with the "accents" option set to true, `<a href="#" title="Go to your profile">Profile</a>` => `<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>` - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.

Here is a screenshot on a Symfony demo page:
<img width="1374" alt="Screenshot 2020-03-26 at 14 31 20" src="https://user-images.githubusercontent.com/3658119/77652489-9380d100-6f6e-11ea-9bca-142faeee750b.png">

TODO:
- [x] Update FWB XSD

Commits
-------

4d6a41a415 [Translation] Add a pseudo localization translator
This commit is contained in:
Fabien Potencier 2020-08-18 16:11:58 +02:00
commit 27d84dbe57
7 changed files with 496 additions and 0 deletions

View File

@ -784,6 +784,22 @@ class Configuration implements ConfigurationInterface
->prototype('scalar')->end()
->defaultValue([])
->end()
->arrayNode('pseudo_localization')
->canBeEnabled()
->fixXmlConfig('localizable_html_attribute')
->children()
->booleanNode('accents')->defaultTrue()->end()
->floatNode('expansion_factor')
->min(1.0)
->defaultValue(1.0)
->end()
->booleanNode('brackets')->defaultTrue()->end()
->booleanNode('parse_html')->defaultFalse()->end()
->arrayNode('localizable_html_attributes')
->prototype('scalar')->end()
->end()
->end()
->end()
->end()
->end()
->end()

View File

@ -134,6 +134,7 @@ use Symfony\Component\Stopwatch\Stopwatch;
use Symfony\Component\String\LazyString;
use Symfony\Component\String\Slugger\SluggerInterface;
use Symfony\Component\Translation\Command\XliffLintCommand as BaseXliffLintCommand;
use Symfony\Component\Translation\PseudoLocalizationTranslator;
use Symfony\Component\Translation\Translator;
use Symfony\Component\Validator\ConstraintValidatorInterface;
use Symfony\Component\Validator\Mapping\Loader\PropertyInfoLoader;
@ -1219,6 +1220,19 @@ class FrameworkExtension extends Extension
$translator->replaceArgument(4, $options);
}
if ($config['pseudo_localization']['enabled']) {
$options = $config['pseudo_localization'];
unset($options['enabled']);
$container
->register('translator.pseudo', PseudoLocalizationTranslator::class)
->setDecoratedService('translator', null, -1) // Lower priority than "translator.data_collector"
->setArguments([
new Reference('translator.pseudo.inner'),
$options,
]);
}
}
private function registerValidationConfiguration(array $config, ContainerBuilder $container, PhpFileLoader $loader, bool $propertyInfoEnabled)

View File

@ -170,6 +170,7 @@
<xsd:element name="fallback" type="xsd:string" minOccurs="0" maxOccurs="unbounded" />
<xsd:element name="path" type="xsd:string" minOccurs="0" maxOccurs="unbounded" />
<xsd:element name="enabled-locale" type="xsd:string" minOccurs="0" maxOccurs="unbounded" />
<xsd:element name="pseudo_localization" type="pseudo_localization" minOccurs="0" maxOccurs="1" />
</xsd:sequence>
<xsd:attribute name="enabled" type="xsd:boolean" />
<xsd:attribute name="fallback" type="xsd:string" />
@ -178,6 +179,17 @@
<xsd:attribute name="cache-dir" type="xsd:string" />
</xsd:complexType>
<xsd:complexType name="pseudo_localization">
<xsd:sequence>
<xsd:element name="localizable_html_attribute" type="xsd:string" minOccurs="0" maxOccurs="unbounded" />
</xsd:sequence>
<xsd:attribute name="enabled" type="xsd:boolean" />
<xsd:attribute name="accents" type="xsd:boolean" />
<xsd:attribute name="expansion_factor" type="xsd:float" />
<xsd:attribute name="brackets" type="xsd:boolean" />
<xsd:attribute name="parse_html" type="xsd:boolean" />
</xsd:complexType>
<xsd:complexType name="validation">
<xsd:choice minOccurs="0" maxOccurs="unbounded">
<xsd:element name="static-method" type="xsd:string" />

View File

@ -379,6 +379,14 @@ class ConfigurationTest extends TestCase
'paths' => [],
'default_path' => '%kernel.project_dir%/translations',
'enabled_locales' => [],
'pseudo_localization' => [
'enabled' => false,
'accents' => true,
'expansion_factor' => 1.0,
'brackets' => true,
'parse_html' => false,
'localizable_html_attributes' => [],
],
],
'validation' => [
'enabled' => !class_exists(FullStack::class),

View File

@ -1,6 +1,11 @@
CHANGELOG
=========
5.2.0
-----
* added `PseudoLocalizationTranslator`
5.1.0
-----

View File

@ -0,0 +1,359 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\Translation;
use Symfony\Contracts\Translation\TranslatorInterface;
/**
* This translator should only be used in a development environment.
*/
final class PseudoLocalizationTranslator implements TranslatorInterface
{
private const EXPANSION_CHARACTER = '~';
private $translator;
private $accents;
private $expansionFactor;
private $brackets;
private $parseHTML;
private $localizableHTMLAttributes;
/**
* Available options:
* * accents:
* type: boolean
* default: true
* description: replace ASCII characters of the translated string with accented versions or similar characters
* example: if true, "foo" => "ƒöö".
*
* * expansion_factor:
* type: float
* default: 1
* validation: it must be greater than or equal to 1
* description: expand the translated string by the given factor with spaces and tildes
* example: if 2, "foo" => "~foo ~"
*
* * brackets:
* type: boolean
* default: true
* description: wrap the translated string with brackets
* example: if true, "foo" => "[foo]"
*
* * parse_html:
* type: boolean
* default: false
* description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
* warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
*
* * localizable_html_attributes:
* type: string[]
* default: []
* description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
* example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
*/
public function __construct(TranslatorInterface $translator, array $options = [])
{
$this->translator = $translator;
$this->accents = $options['accents'] ?? true;
if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
}
$this->brackets = $options['brackets'] ?? true;
$this->parseHTML = $options['parse_html'] ?? false;
if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
$this->parseHTML = false;
}
$this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
}
/**
* {@inheritdoc}
*/
public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null)
{
$trans = '';
$visibleText = '';
foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
if ($visible) {
$visibleText .= $text;
}
if (!$localizable) {
$trans .= $text;
continue;
}
$this->addAccents($trans, $text);
}
$this->expand($trans, $visibleText);
$this->addBrackets($trans);
return $trans;
}
private function getParts(string $originalTrans): array
{
if (!$this->parseHTML) {
return [[true, true, $originalTrans]];
}
$html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
$useInternalErrors = libxml_use_internal_errors(true);
$dom = new \DOMDocument();
$dom->loadHTML('<trans>'.$html.'</trans>');
libxml_clear_errors();
libxml_use_internal_errors($useInternalErrors);
return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
}
private function parseNode(\DOMNode $node): array
{
$parts = [];
foreach ($node->childNodes as $childNode) {
if (!$childNode instanceof \DOMElement) {
$parts[] = [true, true, $childNode->nodeValue];
continue;
}
$parts[] = [false, false, '<'.$childNode->tagName];
/** @var \DOMAttr $attribute */
foreach ($childNode->attributes as $attribute) {
$parts[] = [false, false, ' '.$attribute->nodeName.'="'];
$localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, ENT_QUOTES, 'UTF-8'), -1, PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
if ('' === $match) {
continue;
}
$parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
}
$parts[] = [false, false, '"'];
}
$parts[] = [false, false, '>'];
$parts = array_merge($parts, $this->parseNode($childNode, $parts));
$parts[] = [false, false, '</'.$childNode->tagName.'>'];
}
return $parts;
}
private function addAccents(string &$trans, string $text): void
{
$trans .= $this->accents ? strtr($text, [
' ' => '',
'!' => '¡',
'"' => '″',
'#' => '♯',
'$' => '€',
'%' => '‰',
'&' => '⅋',
'\'' => '´',
'(' => '{',
')' => '}',
'*' => '',
'+' => '⁺',
',' => '،',
'-' => '',
'.' => '·',
'/' => '',
'0' => '⓪',
'1' => '①',
'2' => '②',
'3' => '③',
'4' => '④',
'5' => '⑤',
'6' => '⑥',
'7' => '⑦',
'8' => '⑧',
'9' => '⑨',
':' => '',
';' => '⁏',
'<' => '≤',
'=' => '≂',
'>' => '≥',
'?' => '¿',
'@' => '՞',
'A' => 'Å',
'B' => 'Ɓ',
'C' => 'Ç',
'D' => 'Ð',
'E' => 'É',
'F' => 'Ƒ',
'G' => 'Ĝ',
'H' => 'Ĥ',
'I' => 'Î',
'J' => 'Ĵ',
'K' => 'Ķ',
'L' => 'Ļ',
'M' => 'Ṁ',
'N' => 'Ñ',
'O' => 'Ö',
'P' => 'Þ',
'Q' => 'Ǫ',
'R' => 'Ŕ',
'S' => 'Š',
'T' => 'Ţ',
'U' => 'Û',
'V' => 'Ṽ',
'W' => 'Ŵ',
'X' => 'Ẋ',
'Y' => 'Ý',
'Z' => 'Ž',
'[' => '⁅',
'\\' => '',
']' => '⁆',
'^' => '˄',
'_' => '‿',
'`' => '',
'a' => 'å',
'b' => 'ƀ',
'c' => 'ç',
'd' => 'ð',
'e' => 'é',
'f' => 'ƒ',
'g' => 'ĝ',
'h' => 'ĥ',
'i' => 'î',
'j' => 'ĵ',
'k' => 'ķ',
'l' => 'ļ',
'm' => 'ɱ',
'n' => 'ñ',
'o' => 'ö',
'p' => 'þ',
'q' => 'ǫ',
'r' => 'ŕ',
's' => 'š',
't' => 'ţ',
'u' => 'û',
'v' => 'ṽ',
'w' => 'ŵ',
'x' => 'ẋ',
'y' => 'ý',
'z' => 'ž',
'{' => '(',
'|' => '¦',
'}' => ')',
'~' => '˞',
]) : $text;
}
private function expand(string &$trans, string $visibleText): void
{
if (1.0 >= $this->expansionFactor) {
return;
}
$visibleLength = $this->strlen($visibleText);
$missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
if ($this->brackets) {
$missingLength -= 2;
}
if (0 >= $missingLength) {
return;
}
$words = [];
$wordsCount = 0;
foreach (preg_split('/ +/', $visibleText, -1, PREG_SPLIT_NO_EMPTY) as $word) {
$wordLength = $this->strlen($word);
if ($wordLength >= $missingLength) {
continue;
}
if (!isset($words[$wordLength])) {
$words[$wordLength] = 0;
}
++$words[$wordLength];
++$wordsCount;
}
if (!$words) {
$trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
return;
}
arsort($words, SORT_NUMERIC);
$longestWordLength = max(array_keys($words));
while (true) {
$r = mt_rand(1, $wordsCount);
foreach ($words as $length => $count) {
$r -= $count;
if ($r <= 0) {
break;
}
}
$trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
$missingLength -= $length + 1;
if (0 === $missingLength) {
return;
}
while ($longestWordLength >= $missingLength) {
$wordsCount -= $words[$longestWordLength];
unset($words[$longestWordLength]);
if (!$words) {
$trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
return;
}
$longestWordLength = max(array_keys($words));
}
}
}
private function addBrackets(string &$trans): void
{
if (!$this->brackets) {
return;
}
$trans = '['.$trans.']';
}
private function strlen(string $s): int
{
return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
}
}

View File

@ -0,0 +1,82 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\Translation\Tests;
use PHPUnit\Framework\TestCase;
use Symfony\Component\Translation\IdentityTranslator;
use Symfony\Component\Translation\PseudoLocalizationTranslator;
final class PseudoLocalizationTranslatorTest extends TestCase
{
/**
* @dataProvider provideTrans
*/
public function testTrans(string $expected, string $input, array $options = []): void
{
mt_srand(987);
$this->assertSame($expected, (new PseudoLocalizationTranslator(new IdentityTranslator(), $options))->trans($input));
}
public function provideTrans(): array
{
return [
['[ƒöö⭐ ≤þ≥ƁÅŔ≤⁄þ≥]', 'foo⭐ <p>BAR</p>'], // Test defaults
['before <div data-label="fcy"><a href="#" title="bar" data-content="ccc">foo</a></div> after', 'before <div data-label="fcy"><a href="#" title="bar" data-content="ccc">foo</a></div> after', $this->getIsolatedOptions(['parse_html' => true])],
['ƀéƒöŕé <div data-label="fcyéé"><a href="#" title="bar" data-content="ccc">ƒöö éé</a></div> åƒţéŕ', 'before <div data-label="fcyéé"><a href="#" title="bar" data-content="ccc">foo éé</a></div> after', $this->getIsolatedOptions(['parse_html' => true, 'accents' => true])],
['ƀéƒöŕé <div data-label="ƒçý"><a href="#" title="ƀåŕ" data-content="ccc">ƒöö</a></div> åƒţéŕ', 'before <div data-label="fcy"><a href="#" title="bar" data-content="ccc">foo</a></div> after', $this->getIsolatedOptions(['parse_html' => true, 'localizable_html_attributes' => ['data-label', 'title'], 'accents' => true])],
[' ¡″♯€‰⅋´{}⁎⁺،‐·⁄⓪①②③④⑤⑥⑦⑧⑨∶⁏≤≂≥¿՞ÅƁÇÐÉƑĜĤÎĴĶĻṀÑÖÞǪŔŠŢÛṼŴẊÝŽ⁅∖⁆˄‿‵åƀçðéƒĝĥîĵķļɱñöþǫŕšţûṽŵẋýž(¦)˞', ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', $this->getIsolatedOptions(['accents' => true])],
['foo <p>bar</p> ~~~~~~~~~~ ~~', 'foo <p>bar</p>', $this->getIsolatedOptions(['expansion_factor' => 2.0])],
['foo <p>bar</p> ~~~ ~~', 'foo <p>bar</p>', $this->getIsolatedOptions(['parse_html' => true, 'expansion_factor' => 2.0])], // Only the visible text length is expanded
['foobar ~~', 'foobar', $this->getIsolatedOptions(['expansion_factor' => 1.35])], // 6*1.35 = 8.1 but we round up to 9
['[foobar]', 'foobar', $this->getIsolatedOptions(['brackets' => true])],
['[foobar ~~~]', 'foobar', $this->getIsolatedOptions(['expansion_factor' => 2.0, 'brackets' => true])], // The added brackets are taken into account in the expansion
['<p data-foo="&quot;ççç&lt;å">ƀåŕ</p>', '<p data-foo="&quot;ccc&lt;a">bar</p>', $this->getIsolatedOptions(['parse_html' => true, 'localizable_html_attributes' => ['data-foo'], 'accents' => true])],
['<p data-foo="ççéç&quot;&quot;">ƀåéŕ</p>', '<p data-foo="ccéc&quot;&quot;">baér</p>', $this->getIsolatedOptions(['parse_html' => true, 'localizable_html_attributes' => ['data-foo'], 'accents' => true])],
['<p data-foo="ccc&quot;&quot;">ƀåŕ</p>', '<p data-foo="ccc&quot;&quot;">bar</p>', $this->getIsolatedOptions(['parse_html' => true, 'accents' => true])],
['<p>″≤″</p>', '<p>&quot;&lt;&quot;</p>', $this->getIsolatedOptions(['parse_html' => true, 'accents' => true])],
['Symfony is an Open Source, community-driven project with thousands of contributors. ~~~~~~~ ~~ ~~~~ ~~~~~~~ ~~~~~~~ ~~ ~~~~ ~~~~~~~~~~~~~ ~~~~~~~~~~~~~ ~~~~~~~ ~~ ~~~', 'Symfony is an Open Source, community-driven project with thousands of contributors.', $this->getIsolatedOptions(['expansion_factor' => 2.0])],
];
}
/**
* @dataProvider provideInvalidExpansionFactor
*/
public function testInvalidExpansionFactor(float $expansionFactor): void
{
$this->expectException(\InvalidArgumentException::class);
$this->expectExceptionMessage('The expansion factor must be greater than or equal to 1.');
new PseudoLocalizationTranslator(new IdentityTranslator(), [
'expansion_factor' => $expansionFactor,
]);
}
public function provideInvalidExpansionFactor(): array
{
return [
[0],
[0.99],
[-1],
];
}
private function getIsolatedOptions(array $options): array
{
return array_replace([
'parse_html' => false,
'localizable_html_attributes' => [],
'accents' => false,
'expansion_factor' => 1.0,
'brackets' => false,
], $options);
}
}