From a8a40fcb9edcd514206526cecf951cae95453a97 Mon Sep 17 00:00:00 2001 From: Michael Tibben Date: Tue, 4 Feb 2014 19:26:21 +1100 Subject: [PATCH] [FrameworkBundle] PhpExtractor bugfix and improvements --- .../Resources/views/translation.html.php | 30 ++++ .../Tests/Translation/PhpExtractorTest.php | 25 ++- .../Translation/PhpExtractor.php | 87 +++++++++-- .../Translation/PhpStringTokenParser.php | 142 ++++++++++++++++++ 4 files changed, 265 insertions(+), 19 deletions(-) create mode 100644 src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php diff --git a/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php b/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php index 23631b9943..2167138a1e 100644 --- a/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php +++ b/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php @@ -1,3 +1,33 @@ This template is used for translation message extraction tests trans('single-quoted key') ?> trans("double-quoted key") ?> +trans(<< +trans(<<<'EOF' +nowdoc key +EOF +) ?> +trans( + "double-quoted key with whitespace and escaped \$\n\" sequences" +) ?> +trans( + 'single-quoted key with whitespace and nonescaped \$\n\' sequences' +) ?> +trans( << +trans( <<<'EOF' +nowdoc key with whitespace and nonescaped \$\n sequences +EOF +) ?> + +trans('single-quoted key with "quote mark at the end"') ?> + +transChoice( + '{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples', + 10, + array('%count%' => 10) +) ?> diff --git a/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php b/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php index d639f01806..52e20b874e 100644 --- a/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php +++ b/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php @@ -27,10 +27,27 @@ class PhpExtractorTest extends TestCase // Act $extractor->extract(__DIR__.'/../Fixtures/Resources/views/', $catalogue); + $expectedHeredoc = <<assertCount(2, $catalogue->all('messages'), '->extract() should find 1 translation'); - $this->assertTrue($catalogue->has('single-quoted key'), '->extract() should find the "single-quoted key" message'); - $this->assertTrue($catalogue->has('double-quoted key'), '->extract() should find the "double-quoted key" message'); - $this->assertEquals('prefixsingle-quoted key', $catalogue->get('single-quoted key'), '->extract() should apply "prefix" as prefix'); + $expectedCatalogue = array('messages' => array( + 'single-quoted key' => 'prefixsingle-quoted key', + 'double-quoted key' => 'prefixdouble-quoted key', + 'heredoc key' => 'prefixheredoc key', + 'nowdoc key' => 'prefixnowdoc key', + "double-quoted key with whitespace and escaped \$\n\" sequences" => "prefixdouble-quoted key with whitespace and escaped \$\n\" sequences", + 'single-quoted key with whitespace and nonescaped \$\n\' sequences' => 'prefixsingle-quoted key with whitespace and nonescaped \$\n\' sequences', + 'single-quoted key with "quote mark at the end"' => 'prefixsingle-quoted key with "quote mark at the end"', + $expectedHeredoc => "prefix".$expectedHeredoc, + $expectedNowdoc => "prefix".$expectedNowdoc, + '{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples' => 'prefix{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples', + )); + $actualCatalogue = $catalogue->all(); + + $this->assertEquals($expectedCatalogue, $actualCatalogue); } } diff --git a/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php index 1b12c8ca9e..99a62fdc6b 100644 --- a/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php +++ b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php @@ -23,7 +23,6 @@ use Symfony\Component\Translation\Extractor\ExtractorInterface; class PhpExtractor implements ExtractorInterface { const MESSAGE_TOKEN = 300; - const IGNORE_TOKEN = 400; /** * Prefix for new found message. @@ -39,15 +38,16 @@ class PhpExtractor implements ExtractorInterface */ protected $sequences = array( array( - '$view', - '[', - '\'translator\'', - ']', '->', 'trans', '(', self::MESSAGE_TOKEN, - ')', + ), + array( + '->', + 'transChoice', + '(', + self::MESSAGE_TOKEN, ), ); @@ -75,7 +75,7 @@ class PhpExtractor implements ExtractorInterface /** * Normalizes a token. * - * @param mixed $token + * @param mixed $token * @return string */ protected function normalizeToken($token) @@ -87,6 +87,60 @@ class PhpExtractor implements ExtractorInterface return $token; } + /** + * Seeks to a non-whitespace token + * + * @param \ArrayIterator $tokenIterator + */ + protected function seekToNextRelaventToken($tokenIterator) + { + for ( ; $tokenIterator->valid(); $tokenIterator->next()) { + $t = $tokenIterator->current(); + if (!is_array($t) || ($t[0] !== T_WHITESPACE)) { + break; + } + } + } + + /** + * Extracts the message from the iterator while the tokens + * match allowed message tokens + * + * @param \ArrayIterator $tokenIterator + */ + protected function getMessage($tokenIterator) + { + $message = ''; + $docToken = ''; + + for ( ; $tokenIterator->valid(); $tokenIterator->next()) { + $t = $tokenIterator->current(); + if (!is_array($t)) { + break; + } + + switch ($t[0]) { + case T_START_HEREDOC: + $docToken = $t[1]; + break; + case T_ENCAPSED_AND_WHITESPACE: + case T_CONSTANT_ENCAPSED_STRING: + $message .= $t[1]; + break; + case T_END_HEREDOC: + return PhpStringTokenParser::parseDocString($docToken, $message); + default: + break 2; + } + } + + if ($message) { + $message = PhpStringTokenParser::parse($message); + } + + return $message; + } + /** * Extracts trans message from PHP tokens. * @@ -95,24 +149,27 @@ class PhpExtractor implements ExtractorInterface */ protected function parseTokens($tokens, MessageCatalogue $catalog) { - foreach ($tokens as $key => $token) { + $tokenIterator = new \ArrayIterator($tokens); + + for ($key = 0; $key < $tokenIterator->count(); $key++) { foreach ($this->sequences as $sequence) { $message = ''; + $tokenIterator->seek($key); - foreach ($sequence as $id => $item) { - if ($this->normalizeToken($tokens[$key + $id]) == $item) { + foreach ($sequence as $item) { + $this->seekToNextRelaventToken($tokenIterator); + + if ($this->normalizeToken($tokenIterator->current()) == $item) { + $tokenIterator->next(); continue; } elseif (self::MESSAGE_TOKEN == $item) { - $message = $this->normalizeToken($tokens[$key + $id]); - } elseif (self::IGNORE_TOKEN == $item) { - continue; + $message = $this->getMessage($tokenIterator); + break; } else { break; } } - $message = trim($message, '\'"'); - if ($message) { $catalog->set($message, $this->prefix.$message); break; diff --git a/src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php new file mode 100644 index 0000000000..0b29792e52 --- /dev/null +++ b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php @@ -0,0 +1,142 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Bundle\FrameworkBundle\Translation; + +/* + * The following is derived from code at http://github.com/nikic/PHP-Parser + * + * Copyright (c) 2011 by Nikita Popov + * + * Some rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * The names of the contributors may not be used to endorse or + * promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +class PhpStringTokenParser +{ + protected static $replacements = array( + '\\' => '\\', + '$' => '$', + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'f' => "\f", + 'v' => "\v", + 'e' => "\x1B", + ); + + /** + * Parses a string token. + * + * @param string $str String token content + * + * @return string The parsed string + */ + public static function parse($str) + { + $bLength = 0; + if ('b' === $str[0]) { + $bLength = 1; + } + + if ('\'' === $str[$bLength]) { + return str_replace( + array('\\\\', '\\\''), + array( '\\', '\''), + substr($str, $bLength + 1, -1) + ); + } else { + return self::parseEscapeSequences(substr($str, $bLength + 1, -1), '"'); + } + } + + /** + * Parses escape sequences in strings (all string types apart from single quoted). + * + * @param string $str String without quotes + * @param null|string $quote Quote type + * + * @return string String with escape sequences parsed + */ + public static function parseEscapeSequences($str, $quote) + { + if (null !== $quote) { + $str = str_replace('\\' . $quote, $quote, $str); + } + + return preg_replace_callback( + '~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3})~', + array(__CLASS__, 'parseCallback'), + $str + ); + } + + public static function parseCallback($matches) + { + $str = $matches[1]; + + if (isset(self::$replacements[$str])) { + return self::$replacements[$str]; + } elseif ('x' === $str[0] || 'X' === $str[0]) { + return chr(hexdec($str)); + } else { + return chr(octdec($str)); + } + } + + /** + * Parses a constant doc string. + * + * @param string $startToken Doc string start token content (<<