feature #19604 [Routing] Add seamless support for unicode requirements (nicolas-grekas)
This PR was merged into the 3.2-dev branch.
Discussion
----------
[Routing] Add seamless support for unicode requirements
| Q | A
| ------------- | ---
| Branch? | master
| Bug fix? | no
| New feature? | yes
| BC breaks? | no
| Deprecations? | yes
| Tests pass? | yes
| Fixed tickets | #3629, #5236, #19562
| License | MIT
| Doc PR | https://github.com/symfony/symfony-docs/pull/6890
This PR adds unicode support to route matching and generation by automatically adding the `u` modifier to regexps that use either unicode characters or unicode enabled character classes (e.g. `\p...` `\x{...}` `\X`).
As a side note, if one wants to match a single unicode character (vs a single byte), one should use `\PM` or `\X` instead of `.` *or* set the `unicode` parameter to true.
Commits
-------
a829d34
[Routing] Add seamless support for unicode requirements
This commit is contained in:
commit
98051e909f
@ -5,6 +5,7 @@ CHANGELOG
|
||||
-----
|
||||
|
||||
* Added support for `bool`, `int`, `float`, `string`, `list` and `map` defaults in XML configurations.
|
||||
* Added support for UTF-8 requirements
|
||||
|
||||
2.8.0
|
||||
-----
|
||||
|
@ -158,7 +158,7 @@ class UrlGenerator implements UrlGeneratorInterface, ConfigurableRequirementsInt
|
||||
if ('variable' === $token[0]) {
|
||||
if (!$optional || !array_key_exists($token[3], $defaults) || null !== $mergedParams[$token[3]] && (string) $mergedParams[$token[3]] !== (string) $defaults[$token[3]]) {
|
||||
// check requirement
|
||||
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#', $mergedParams[$token[3]])) {
|
||||
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
|
||||
if ($this->strictRequirements) {
|
||||
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
|
||||
}
|
||||
@ -212,7 +212,7 @@ class UrlGenerator implements UrlGeneratorInterface, ConfigurableRequirementsInt
|
||||
$routeHost = '';
|
||||
foreach ($hostTokens as $token) {
|
||||
if ('variable' === $token[0]) {
|
||||
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i', $mergedParams[$token[3]])) {
|
||||
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
|
||||
if ($this->strictRequirements) {
|
||||
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
|
||||
}
|
||||
|
@ -223,8 +223,9 @@ EOF;
|
||||
}
|
||||
|
||||
$supportsTrailingSlash = $supportsRedirections && (!$methods || in_array('HEAD', $methods));
|
||||
$regex = $compiledRoute->getRegex();
|
||||
|
||||
if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#', $compiledRoute->getRegex(), $m)) {
|
||||
if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#'.(substr($regex, -1) === 'u' ? 'u' : ''), $regex, $m)) {
|
||||
if ($supportsTrailingSlash && substr($m['url'], -1) === '/') {
|
||||
$conditions[] = sprintf("rtrim(\$pathinfo, '/') === %s", var_export(rtrim(str_replace('\\', '', $m['url']), '/'), true));
|
||||
$hasTrailingSlash = true;
|
||||
@ -236,7 +237,6 @@ EOF;
|
||||
$conditions[] = sprintf('0 === strpos($pathinfo, %s)', var_export($compiledRoute->getStaticPrefix(), true));
|
||||
}
|
||||
|
||||
$regex = $compiledRoute->getRegex();
|
||||
if ($supportsTrailingSlash && $pos = strpos($regex, '/$')) {
|
||||
$regex = substr($regex, 0, $pos).'/?$'.substr($regex, $pos + 2);
|
||||
$hasTrailingSlash = true;
|
||||
|
@ -70,6 +70,7 @@ class Route implements \Serializable
|
||||
* Available options:
|
||||
*
|
||||
* * compiler_class: A class name able to compile this route instance (RouteCompiler by default)
|
||||
* * utf8: Whether UTF-8 matching is enforced ot not
|
||||
*
|
||||
* @param string $path The path pattern to match
|
||||
* @param array $defaults An array of default parameter values
|
||||
|
@ -91,6 +91,16 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
$matches = array();
|
||||
$pos = 0;
|
||||
$defaultSeparator = $isHost ? '.' : '/';
|
||||
$useUtf8 = preg_match('//u', $pattern);
|
||||
$needsUtf8 = $route->getOption('utf8');
|
||||
|
||||
if (!$needsUtf8 && $useUtf8 && preg_match('/[\x80-\xFF]/', $pattern)) {
|
||||
$needsUtf8 = true;
|
||||
@trigger_error(sprintf('Using UTF-8 route patterns without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s".', $pattern), E_USER_DEPRECATED);
|
||||
}
|
||||
if (!$useUtf8 && $needsUtf8) {
|
||||
throw new \LogicException(sprintf('Cannot mix UTF-8 requirements with non-UTF-8 pattern "%s".', $pattern));
|
||||
}
|
||||
|
||||
// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
|
||||
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
|
||||
@ -100,7 +110,15 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
// get all static text preceding the current variable
|
||||
$precedingText = substr($pattern, $pos, $match[0][1] - $pos);
|
||||
$pos = $match[0][1] + strlen($match[0][0]);
|
||||
$precedingChar = strlen($precedingText) > 0 ? substr($precedingText, -1) : '';
|
||||
|
||||
if (!strlen($precedingText)) {
|
||||
$precedingChar = '';
|
||||
} elseif ($useUtf8) {
|
||||
preg_match('/.$/u', $precedingText, $precedingChar);
|
||||
$precedingChar = $precedingChar[0];
|
||||
} else {
|
||||
$precedingChar = substr($precedingText, -1);
|
||||
}
|
||||
$isSeparator = '' !== $precedingChar && false !== strpos(static::SEPARATORS, $precedingChar);
|
||||
|
||||
if (is_numeric($varName)) {
|
||||
@ -110,8 +128,8 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
throw new \LogicException(sprintf('Route pattern "%s" cannot reference variable name "%s" more than once.', $pattern, $varName));
|
||||
}
|
||||
|
||||
if ($isSeparator && strlen($precedingText) > 1) {
|
||||
$tokens[] = array('text', substr($precedingText, 0, -1));
|
||||
if ($isSeparator && $precedingText !== $precedingChar) {
|
||||
$tokens[] = array('text', substr($precedingText, 0, -strlen($precedingChar)));
|
||||
} elseif (!$isSeparator && strlen($precedingText) > 0) {
|
||||
$tokens[] = array('text', $precedingText);
|
||||
}
|
||||
@ -126,7 +144,7 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
|
||||
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
|
||||
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
|
||||
$nextSeparator = self::findNextSeparator($followingPattern);
|
||||
$nextSeparator = self::findNextSeparator($followingPattern, $useUtf8);
|
||||
$regexp = sprintf(
|
||||
'[^%s%s]+',
|
||||
preg_quote($defaultSeparator, self::REGEX_DELIMITER),
|
||||
@ -140,6 +158,16 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
// directly adjacent, e.g. '/{x}{y}'.
|
||||
$regexp .= '+';
|
||||
}
|
||||
} else {
|
||||
if (!preg_match('//u', $regexp)) {
|
||||
$useUtf8 = false;
|
||||
} elseif (!$needsUtf8 && preg_match('/[\x80-\xFF]|(?<!\\\\)\\\\(?:\\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{[A-Fa-f0-9]{3})/', $regexp)) {
|
||||
$needsUtf8 = true;
|
||||
@trigger_error(sprintf('Using UTF-8 route requirements without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for variable "%s" in pattern "%s".', $varName, $pattern), E_USER_DEPRECATED);
|
||||
}
|
||||
if (!$useUtf8 && $needsUtf8) {
|
||||
throw new \LogicException(sprintf('Cannot mix UTF-8 requirement with non-UTF-8 charset for variable "%s" in pattern "%s".', $varName, $pattern));
|
||||
}
|
||||
}
|
||||
|
||||
$tokens[] = array('variable', $isSeparator ? $precedingChar : '', $regexp, $varName);
|
||||
@ -168,10 +196,21 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
for ($i = 0, $nbToken = count($tokens); $i < $nbToken; ++$i) {
|
||||
$regexp .= self::computeRegexp($tokens, $i, $firstOptional);
|
||||
}
|
||||
$regexp = self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : '');
|
||||
|
||||
// enable Utf8 matching if really required
|
||||
if ($needsUtf8) {
|
||||
$regexp .= 'u';
|
||||
for ($i = 0, $nbToken = count($tokens); $i < $nbToken; ++$i) {
|
||||
if ('variable' === $tokens[$i][0]) {
|
||||
$tokens[$i][] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array(
|
||||
'staticPrefix' => 'text' === $tokens[0][0] ? $tokens[0][1] : '',
|
||||
'regex' => self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : ''),
|
||||
'regex' => $regexp,
|
||||
'tokens' => array_reverse($tokens),
|
||||
'variables' => $variables,
|
||||
);
|
||||
@ -181,19 +220,25 @@ class RouteCompiler implements RouteCompilerInterface
|
||||
* Returns the next static character in the Route pattern that will serve as a separator.
|
||||
*
|
||||
* @param string $pattern The route pattern
|
||||
* @param bool $useUtf8 Whether the character is encoded in UTF-8 or not
|
||||
*
|
||||
* @return string The next static character that functions as separator (or empty string when none available)
|
||||
*/
|
||||
private static function findNextSeparator($pattern)
|
||||
private static function findNextSeparator($pattern, $useUtf8)
|
||||
{
|
||||
if ('' == $pattern) {
|
||||
// return empty string if pattern is empty or false (false which can be returned by substr)
|
||||
return '';
|
||||
}
|
||||
// first remove all placeholders from the pattern so we can find the next real static character
|
||||
$pattern = preg_replace('#\{\w+\}#', '', $pattern);
|
||||
if ('' === $pattern = preg_replace('#\{\w+\}#', '', $pattern)) {
|
||||
return '';
|
||||
}
|
||||
if ($useUtf8) {
|
||||
preg_match('/^./u', $pattern, $pattern);
|
||||
}
|
||||
|
||||
return isset($pattern[0]) && false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
|
||||
return false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -233,6 +233,15 @@ class UrlGeneratorTest extends \PHPUnit_Framework_TestCase
|
||||
$this->getGenerator($routes)->generate('test', array('foo' => 'bar'), UrlGeneratorInterface::ABSOLUTE_URL);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
|
||||
*/
|
||||
public function testGenerateForRouteWithInvalidUtf8Parameter()
|
||||
{
|
||||
$routes = $this->getRoutes('test', new Route('/testing/{foo}', array(), array('foo' => '\pL+'), array('utf8' => true)));
|
||||
$this->getGenerator($routes)->generate('test', array('foo' => 'abc123'), UrlGeneratorInterface::ABSOLUTE_URL);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
|
||||
*/
|
||||
|
@ -165,7 +165,7 @@ class UrlMatcherTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
$collection = new RouteCollection();
|
||||
$chars = '!"$%éà &\'()*+,./:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\[]^_`abcdefghijklmnopqrstuvwxyz{|}~-';
|
||||
$collection->add('foo', new Route('/{foo}/bar', array(), array('foo' => '['.preg_quote($chars).']+')));
|
||||
$collection->add('foo', new Route('/{foo}/bar', array(), array('foo' => '['.preg_quote($chars).']+'), array('utf8' => true)));
|
||||
|
||||
$matcher = new UrlMatcher($collection, new RequestContext());
|
||||
$this->assertEquals(array('_route' => 'foo', 'foo' => $chars), $matcher->match('/'.rawurlencode($chars).'/bar'));
|
||||
|
@ -11,7 +11,9 @@
|
||||
|
||||
namespace Symfony\Component\Routing\Tests;
|
||||
|
||||
use Symfony\Bridge\PhpUnit\ErrorAssert;
|
||||
use Symfony\Component\Routing\Route;
|
||||
use Symfony\Component\Routing\RouteCompiler;
|
||||
|
||||
class RouteCompilerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -162,6 +164,87 @@ class RouteCompilerTest extends \PHPUnit_Framework_TestCase
|
||||
array('text', '/foo'),
|
||||
),
|
||||
),
|
||||
|
||||
array(
|
||||
'Static non UTF-8 route',
|
||||
array("/fo\xE9"),
|
||||
"/fo\xE9", "#^/fo\xE9$#s", array(), array(
|
||||
array('text', "/fo\xE9"),
|
||||
),
|
||||
),
|
||||
|
||||
array(
|
||||
'Route with an explicit UTF-8 requirement',
|
||||
array('/{bar}', array('bar' => null), array('bar' => '.'), array('utf8' => true)),
|
||||
'', '#^/(?P<bar>.)?$#su', array('bar'), array(
|
||||
array('variable', '/', '.', 'bar', true),
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider provideCompileImplicitUtf8Data
|
||||
* @requires function Symfony\Bridge\PhpUnit\ErrorAssert::assertDeprecationsAreTriggered
|
||||
*/
|
||||
public function testCompileImplicitUtf8Data($name, $arguments, $prefix, $regex, $variables, $tokens, $deprecationType)
|
||||
{
|
||||
$deprecations = array(
|
||||
sprintf('Using UTF-8 route %s without setting the "utf8" option is deprecated', $deprecationType),
|
||||
);
|
||||
|
||||
ErrorAssert::assertDeprecationsAreTriggered($deprecations, function () use ($name, $arguments, $prefix, $regex, $variables, $tokens) {
|
||||
$r = new \ReflectionClass('Symfony\\Component\\Routing\\Route');
|
||||
$route = $r->newInstanceArgs($arguments);
|
||||
|
||||
$compiled = $route->compile();
|
||||
$this->assertEquals($prefix, $compiled->getStaticPrefix(), $name.' (static prefix)');
|
||||
$this->assertEquals($regex, $compiled->getRegex(), $name.' (regex)');
|
||||
$this->assertEquals($variables, $compiled->getVariables(), $name.' (variables)');
|
||||
$this->assertEquals($tokens, $compiled->getTokens(), $name.' (tokens)');
|
||||
});
|
||||
}
|
||||
|
||||
public function provideCompileImplicitUtf8Data()
|
||||
{
|
||||
return array(
|
||||
array(
|
||||
'Static UTF-8 route',
|
||||
array('/foé'),
|
||||
'/foé', '#^/foé$#su', array(), array(
|
||||
array('text', '/foé'),
|
||||
),
|
||||
'patterns',
|
||||
),
|
||||
|
||||
array(
|
||||
'Route with an implicit UTF-8 requirement',
|
||||
array('/{bar}', array('bar' => null), array('bar' => 'é')),
|
||||
'', '#^/(?P<bar>é)?$#su', array('bar'), array(
|
||||
array('variable', '/', 'é', 'bar', true),
|
||||
),
|
||||
'requirements',
|
||||
),
|
||||
|
||||
array(
|
||||
'Route with a UTF-8 class requirement',
|
||||
array('/{bar}', array('bar' => null), array('bar' => '\pM')),
|
||||
'', '#^/(?P<bar>\pM)?$#su', array('bar'), array(
|
||||
array('variable', '/', '\pM', 'bar', true),
|
||||
),
|
||||
'requirements',
|
||||
),
|
||||
|
||||
array(
|
||||
'Route with a UTF-8 separator',
|
||||
array('/foo/{bar}§{_format}', array(), array(), array('compiler_class' => Utf8RouteCompiler::class)),
|
||||
'/foo', '#^/foo/(?P<bar>[^/§]++)§(?P<_format>[^/]++)$#su', array('bar', '_format'), array(
|
||||
array('variable', '§', '[^/]++', '_format', true),
|
||||
array('variable', '/', '[^/§]++', 'bar', true),
|
||||
array('text', '/foo'),
|
||||
),
|
||||
'patterns',
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@ -175,6 +258,26 @@ class RouteCompilerTest extends \PHPUnit_Framework_TestCase
|
||||
$compiled = $route->compile();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \LogicException
|
||||
*/
|
||||
public function testRouteCharsetMismatch()
|
||||
{
|
||||
$route = new Route("/\xE9/{bar}", array(), array('bar' => '.'), array('utf8' => true));
|
||||
|
||||
$compiled = $route->compile();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \LogicException
|
||||
*/
|
||||
public function testRequirementCharsetMismatch()
|
||||
{
|
||||
$route = new Route('/foo/{bar}', array(), array('bar' => "\xE9"), array('utf8' => true));
|
||||
|
||||
$compiled = $route->compile();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \InvalidArgumentException
|
||||
*/
|
||||
@ -275,3 +378,8 @@ class RouteCompilerTest extends \PHPUnit_Framework_TestCase
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class Utf8RouteCompiler extends RouteCompiler
|
||||
{
|
||||
const SEPARATORS = '/§';
|
||||
}
|
||||
|
Reference in New Issue
Block a user