[String] Made AbstractString::width() follow POSIX.1-2001
Co-authored-by: Nicolas Grekas <nicolas.grekas@gmail.com>
This commit is contained in:
parent
07818f2747
commit
347d8252fb
2
src/Symfony/Component/String/.gitattributes
vendored
2
src/Symfony/Component/String/.gitattributes
vendored
@ -1,3 +1,5 @@
|
|||||||
|
/Resources/bin/update-data.php export-ignore
|
||||||
|
/Resources/WcswidthDataGenerator.php export-ignore
|
||||||
/Tests export-ignore
|
/Tests export-ignore
|
||||||
/phpunit.xml.dist export-ignore
|
/phpunit.xml.dist export-ignore
|
||||||
/.gitignore export-ignore
|
/.gitignore export-ignore
|
||||||
|
@ -646,6 +646,9 @@ abstract class AbstractString implements \JsonSerializable
|
|||||||
*/
|
*/
|
||||||
abstract public function upper(): self;
|
abstract public function upper(): self;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the printable length on a terminal.
|
||||||
|
*/
|
||||||
abstract public function width(bool $ignoreAnsiDecoration = true): int;
|
abstract public function width(bool $ignoreAnsiDecoration = true): int;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -352,9 +352,6 @@ abstract class AbstractUnicodeString extends AbstractString
|
|||||||
return $str;
|
return $str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritdoc}
|
|
||||||
*/
|
|
||||||
public function reverse(): parent
|
public function reverse(): parent
|
||||||
{
|
{
|
||||||
$str = clone $this;
|
$str = clone $this;
|
||||||
@ -444,22 +441,21 @@ abstract class AbstractUnicodeString extends AbstractString
|
|||||||
$s = str_replace(["\r\n", "\r"], "\n", $s);
|
$s = str_replace(["\r\n", "\r"], "\n", $s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!$ignoreAnsiDecoration) {
|
||||||
|
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
|
||||||
|
}
|
||||||
|
|
||||||
foreach (explode("\n", $s) as $s) {
|
foreach (explode("\n", $s) as $s) {
|
||||||
if ($ignoreAnsiDecoration) {
|
if ($ignoreAnsiDecoration) {
|
||||||
$s = preg_replace('/\x1B(?:
|
$s = preg_replace('/(?:\x1B(?:
|
||||||
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
|
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
|
||||||
| [P\]X^_] .*? \x1B\\\\
|
| [P\]X^_] .*? \x1B\\\\
|
||||||
| [\x41-\x7E]
|
| [\x41-\x7E]
|
||||||
)/x', '', $s);
|
)|[\p{Cc}\x7F]++)/xu', '', $s);
|
||||||
}
|
}
|
||||||
|
|
||||||
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
|
// Non printable characters have been dropped, so wcswidth cannot logically return -1.
|
||||||
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
|
$width += $this->wcswidth($s);
|
||||||
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
|
|
||||||
|
|
||||||
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
|
|
||||||
$width = $w;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $width;
|
return $width;
|
||||||
@ -503,4 +499,80 @@ abstract class AbstractUnicodeString extends AbstractString
|
|||||||
throw new InvalidArgumentException('Invalid padding type.');
|
throw new InvalidArgumentException('Invalid padding type.');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
|
||||||
|
*/
|
||||||
|
private function wcswidth(string $string): int
|
||||||
|
{
|
||||||
|
$width = 0;
|
||||||
|
|
||||||
|
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
|
||||||
|
$codePoint = mb_ord($c, 'UTF-8');
|
||||||
|
|
||||||
|
if (0 === $codePoint // NULL
|
||||||
|
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
|
||||||
|
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
|
||||||
|
|| 0x2028 === $codePoint // LINE SEPARATOR
|
||||||
|
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
|
||||||
|
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
|
||||||
|
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non printable characters
|
||||||
|
if (32 > $codePoint // C0 control characters
|
||||||
|
|| (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
|
||||||
|
) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static $tableZero;
|
||||||
|
if (null === $tableZero) {
|
||||||
|
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
|
||||||
|
$lbound = 0;
|
||||||
|
while ($ubound >= $lbound) {
|
||||||
|
$mid = floor(($lbound + $ubound) / 2);
|
||||||
|
|
||||||
|
if ($codePoint > $tableZero[$mid][1]) {
|
||||||
|
$lbound = $mid + 1;
|
||||||
|
} elseif ($codePoint < $tableZero[$mid][0]) {
|
||||||
|
$ubound = $mid - 1;
|
||||||
|
} else {
|
||||||
|
continue 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static $tableWide;
|
||||||
|
if (null === $tableWide) {
|
||||||
|
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
|
||||||
|
$lbound = 0;
|
||||||
|
while ($ubound >= $lbound) {
|
||||||
|
$mid = floor(($lbound + $ubound) / 2);
|
||||||
|
|
||||||
|
if ($codePoint > $tableWide[$mid][1]) {
|
||||||
|
$lbound = $mid + 1;
|
||||||
|
} elseif ($codePoint < $tableWide[$mid][0]) {
|
||||||
|
$ubound = $mid - 1;
|
||||||
|
} else {
|
||||||
|
$width += 2;
|
||||||
|
|
||||||
|
continue 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
++$width;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $width;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -303,9 +303,6 @@ class ByteString extends AbstractString
|
|||||||
return $str;
|
return $str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritdoc}
|
|
||||||
*/
|
|
||||||
public function reverse(): parent
|
public function reverse(): parent
|
||||||
{
|
{
|
||||||
$str = clone $this;
|
$str = clone $this;
|
||||||
@ -460,29 +457,8 @@ class ByteString extends AbstractString
|
|||||||
|
|
||||||
public function width(bool $ignoreAnsiDecoration = true): int
|
public function width(bool $ignoreAnsiDecoration = true): int
|
||||||
{
|
{
|
||||||
$width = 0;
|
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
|
||||||
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
|
|
||||||
|
|
||||||
if (false !== strpos($s, "\r")) {
|
return (new CodePointString($string))->width($ignoreAnsiDecoration);
|
||||||
$s = str_replace(["\r\n", "\r"], "\n", $s);
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (explode("\n", $s) as $s) {
|
|
||||||
if ($ignoreAnsiDecoration) {
|
|
||||||
$s = preg_replace('/\x1B(?:
|
|
||||||
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
|
|
||||||
| [P\]X^_] .*? \x1B\\\\
|
|
||||||
| [\x41-\x7E]
|
|
||||||
)/x', '', $s);
|
|
||||||
}
|
|
||||||
|
|
||||||
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
|
|
||||||
|
|
||||||
if ($width < $w += \strlen($s)) {
|
|
||||||
$width = $w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $width;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ CHANGELOG
|
|||||||
-----
|
-----
|
||||||
|
|
||||||
* Added the `AbstractString::reverse()` method.
|
* Added the `AbstractString::reverse()` method.
|
||||||
|
* Made `AbstractString::width()` follow POSIX.1-2001.
|
||||||
|
|
||||||
5.0.0
|
5.0.0
|
||||||
-----
|
-----
|
||||||
|
113
src/Symfony/Component/String/Resources/WcswidthDataGenerator.php
Normal file
113
src/Symfony/Component/String/Resources/WcswidthDataGenerator.php
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of the Symfony package.
|
||||||
|
*
|
||||||
|
* (c) Fabien Potencier <fabien@symfony.com>
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please view the LICENSE
|
||||||
|
* file that was distributed with this source code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Symfony\Component\String\Resources;
|
||||||
|
|
||||||
|
use Symfony\Component\HttpClient\HttpClient;
|
||||||
|
use Symfony\Component\String\Exception\RuntimeException;
|
||||||
|
use Symfony\Component\VarExporter\VarExporter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
final class WcswidthDataGenerator
|
||||||
|
{
|
||||||
|
private $outDir;
|
||||||
|
|
||||||
|
private $client;
|
||||||
|
|
||||||
|
public function __construct(string $outDir)
|
||||||
|
{
|
||||||
|
$this->outDir = $outDir;
|
||||||
|
|
||||||
|
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function generate(): void
|
||||||
|
{
|
||||||
|
$this->writeWideWidthData();
|
||||||
|
|
||||||
|
$this->writeZeroWidthData();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function writeWideWidthData(): void
|
||||||
|
{
|
||||||
|
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
|
||||||
|
throw new RuntimeException('The Unicode version could not be determined.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$version = $matches[1];
|
||||||
|
|
||||||
|
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
|
||||||
|
throw new RuntimeException('The wide width pattern did not match anything.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->write('wcswidth_table_wide.php', $version, $matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function writeZeroWidthData(): void
|
||||||
|
{
|
||||||
|
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
|
||||||
|
throw new RuntimeException('The Unicode version could not be determined.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$version = $matches[1];
|
||||||
|
|
||||||
|
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
|
||||||
|
throw new RuntimeException('The zero width pattern did not match anything.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->write('wcswidth_table_zero.php', $version, $matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function write(string $fileName, string $version, array $rawData): void
|
||||||
|
{
|
||||||
|
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
|
||||||
|
|
||||||
|
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
|
||||||
|
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function getHeader(string $version): string
|
||||||
|
{
|
||||||
|
$date = (new \DateTimeImmutable())->format('c');
|
||||||
|
|
||||||
|
return <<<EOT
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file has been auto-generated by the Symfony String Component for internal use.
|
||||||
|
*
|
||||||
|
* Unicode version: $version
|
||||||
|
* Date: $date
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
EOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function format(array $rawData): array
|
||||||
|
{
|
||||||
|
$data = array_map(static function (array $row): array {
|
||||||
|
$start = $row[1];
|
||||||
|
$end = $row[2] ?? $start;
|
||||||
|
|
||||||
|
return [hexdec($start), hexdec($end)];
|
||||||
|
}, $rawData);
|
||||||
|
|
||||||
|
usort($data, static function (array $a, array $b): int {
|
||||||
|
return $a[0] - $b[0];
|
||||||
|
});
|
||||||
|
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
}
|
55
src/Symfony/Component/String/Resources/bin/update-data.php
Normal file
55
src/Symfony/Component/String/Resources/bin/update-data.php
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of the Symfony package.
|
||||||
|
*
|
||||||
|
* (c) Fabien Potencier <fabien@symfony.com>
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please view the LICENSE
|
||||||
|
* file that was distributed with this source code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use Symfony\Component\String\Resources\WcswidthDataGenerator;
|
||||||
|
|
||||||
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
|
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
|
||||||
|
throw new \ErrorException($msg, 0, $type, $file, $line);
|
||||||
|
});
|
||||||
|
|
||||||
|
set_exception_handler(static function (\Throwable $exception): void {
|
||||||
|
echo "\n";
|
||||||
|
|
||||||
|
$cause = $exception;
|
||||||
|
$root = true;
|
||||||
|
|
||||||
|
while (null !== $cause) {
|
||||||
|
if (!$root) {
|
||||||
|
echo "Caused by\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo get_class($cause).': '.$cause->getMessage()."\n";
|
||||||
|
echo "\n";
|
||||||
|
echo $cause->getFile().':'.$cause->getLine()."\n";
|
||||||
|
echo $cause->getTraceAsString()."\n";
|
||||||
|
|
||||||
|
$cause = $cause->getPrevious();
|
||||||
|
$root = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
$autoload = __DIR__.'/../../vendor/autoload.php';
|
||||||
|
|
||||||
|
if (!file_exists($autoload)) {
|
||||||
|
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
require_once $autoload;
|
||||||
|
|
||||||
|
echo "Generating wcswidth tables data...\n";
|
||||||
|
|
||||||
|
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
|
||||||
|
|
||||||
|
echo "Done.\n";
|
1095
src/Symfony/Component/String/Resources/data/wcswidth_table_wide.php
Normal file
1095
src/Symfony/Component/String/Resources/data/wcswidth_table_wide.php
Normal file
File diff suppressed because it is too large
Load Diff
1303
src/Symfony/Component/String/Resources/data/wcswidth_table_zero.php
Normal file
1303
src/Symfony/Component/String/Resources/data/wcswidth_table_zero.php
Normal file
File diff suppressed because it is too large
Load Diff
@ -1451,4 +1451,35 @@ abstract class AbstractAsciiTestCase extends TestCase
|
|||||||
["\n!!!\tTAERG SI ynofmyS ", " Symfony IS GREAT\t!!!\n"],
|
["\n!!!\tTAERG SI ynofmyS ", " Symfony IS GREAT\t!!!\n"],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider provideWidth
|
||||||
|
*/
|
||||||
|
public function testWidth(int $expected, string $origin, bool $ignoreAnsiDecoration = true)
|
||||||
|
{
|
||||||
|
$this->assertSame($expected, static::createFromString($origin)->width($ignoreAnsiDecoration));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function provideWidth(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
[0, ''],
|
||||||
|
[1, 'c'],
|
||||||
|
[3, 'foo'],
|
||||||
|
[2, '⭐'],
|
||||||
|
[8, 'f⭐o⭐⭐'],
|
||||||
|
[19, 'コンニチハ, セカイ!'],
|
||||||
|
[6, "foo\u{0000}bar"],
|
||||||
|
[6, "foo\u{001b}[0mbar"],
|
||||||
|
[6, "foo\u{0001}bar"],
|
||||||
|
[6, "foo\u{0001}bar", false],
|
||||||
|
[4, '--ֿ--'],
|
||||||
|
[4, 'café'],
|
||||||
|
[1, 'А҈'],
|
||||||
|
[4, 'ᬓᬨᬮ᭄'],
|
||||||
|
[1, "\u{00AD}"],
|
||||||
|
[14, "\u{007f}\u{007f}f\u{001b}[0moo\u{0001}bar\u{007f}cccïf\u{008e}cy\u{0005}1"], // foobarcccïfcy1
|
||||||
|
[17, "\u{007f}\u{007f}f\u{001b}[0moo\u{0001}bar\u{007f}cccïf\u{008e}cy\u{0005}1", false], // f[0moobarcccïfcy1
|
||||||
|
];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,4 +43,15 @@ class ByteStringTest extends AbstractAsciiTestCase
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function provideWidth(): array
|
||||||
|
{
|
||||||
|
return array_merge(
|
||||||
|
parent::provideWidth(),
|
||||||
|
[
|
||||||
|
[10, "f\u{001b}[0moo\x80bar\xfe\xfe1"], // foo?bar??1
|
||||||
|
[13, "f\u{001b}[0moo\x80bar\xfe\xfe1", false], // f[0moo?bar??1
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,10 @@
|
|||||||
"symfony/polyfill-mbstring": "~1.0",
|
"symfony/polyfill-mbstring": "~1.0",
|
||||||
"symfony/translation-contracts": "^1.1|^2"
|
"symfony/translation-contracts": "^1.1|^2"
|
||||||
},
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"symfony/http-client": "^4.4|^5.0",
|
||||||
|
"symfony/var-exporter": "^4.4|^5.0"
|
||||||
|
},
|
||||||
"autoload": {
|
"autoload": {
|
||||||
"psr-4": { "Symfony\\Component\\String\\": "" },
|
"psr-4": { "Symfony\\Component\\String\\": "" },
|
||||||
"files": [ "Resources/functions.php" ],
|
"files": [ "Resources/functions.php" ],
|
||||||
|
Reference in New Issue
Block a user