[FrameworkBundle] PhpExtractor bugfix and improvements

2014-02-04 19:26:21 +11:00 · 2014-02-04 19:26:21 +11:00 · a8a40fcb9e
parent 55d17fad19
commit a8a40fcb9e
4 changed files with 265 additions and 19 deletions
--- a/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php
+++ b/src/Symfony/Bundle/FrameworkBundle/Tests/Fixtures/Resources/views/translation.html.php
@ -1,3 +1,33 @@
 This template is used for translation message extraction tests
 <?php echo $view['translator']->trans('single-quoted key') ?>
 <?php echo $view['translator']->trans("double-quoted key") ?>
+<?php echo $view['translator']->trans(<<<EOF
+heredoc key
+EOF
+) ?>
+<?php echo $view['translator']->trans(<<<'EOF'
+nowdoc key
+EOF
+) ?>
+<?php echo $view['translator']->trans(
+    "double-quoted key with whitespace and escaped \$\n\" sequences"
+) ?>
+<?php echo $view['translator']->trans(
+    'single-quoted key with whitespace and nonescaped \$\n\' sequences'
+) ?>
+<?php echo $view['translator']->trans( <<<EOF
+heredoc key with whitespace and escaped \$\n sequences
+EOF
+) ?>
+<?php echo $view['translator']->trans( <<<'EOF'
+nowdoc key with whitespace and nonescaped \$\n sequences
+EOF
+) ?>
+
+<?php echo $view['translator']->trans('single-quoted key with "quote mark at the end"') ?>
+
+<?php echo $view['translator']->transChoice(
+    '{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples',
+    10,
+    array('%count%' => 10)
+) ?>
--- a/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php
+++ b/src/Symfony/Bundle/FrameworkBundle/Tests/Translation/PhpExtractorTest.php
@ -27,10 +27,27 @@ class PhpExtractorTest extends TestCase
        // Act
        $extractor->extract(__DIR__.'/../Fixtures/Resources/views/', $catalogue);

+        $expectedHeredoc = <<<EOF
+heredoc key with whitespace and escaped \$\n sequences
+EOF;
+        $expectedNowdoc = <<<'EOF'
+nowdoc key with whitespace and nonescaped \$\n sequences
+EOF;
        // Assert
-        $this->assertCount(2, $catalogue->all('messages'), '->extract() should find 1 translation');
-        $this->assertTrue($catalogue->has('single-quoted key'), '->extract() should find the "single-quoted key" message');
-        $this->assertTrue($catalogue->has('double-quoted key'), '->extract() should find the "double-quoted key" message');
-        $this->assertEquals('prefixsingle-quoted key', $catalogue->get('single-quoted key'), '->extract() should apply "prefix" as prefix');
+        $expectedCatalogue = array('messages' => array(
+            'single-quoted key' => 'prefixsingle-quoted key',
+            'double-quoted key' => 'prefixdouble-quoted key',
+            'heredoc key' => 'prefixheredoc key',
+            'nowdoc key' => 'prefixnowdoc key',
+            "double-quoted key with whitespace and escaped \$\n\" sequences" => "prefixdouble-quoted key with whitespace and escaped \$\n\" sequences",
+            'single-quoted key with whitespace and nonescaped \$\n\' sequences' => 'prefixsingle-quoted key with whitespace and nonescaped \$\n\' sequences',
+            'single-quoted key with "quote mark at the end"' => 'prefixsingle-quoted key with "quote mark at the end"',
+            $expectedHeredoc => "prefix".$expectedHeredoc,
+            $expectedNowdoc => "prefix".$expectedNowdoc,
+            '{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples' => 'prefix{0} There is no apples|{1} There is one apple|]1,Inf[ There are %count% apples',
+        ));
+        $actualCatalogue = $catalogue->all();
+
+        $this->assertEquals($expectedCatalogue, $actualCatalogue);
    }
 }
--- a/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php
+++ b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpExtractor.php
@ -23,7 +23,6 @@ use Symfony\Component\Translation\Extractor\ExtractorInterface;
 class PhpExtractor implements ExtractorInterface
 {
    const MESSAGE_TOKEN = 300;
-    const IGNORE_TOKEN = 400;

    /**
     * Prefix for new found message.
@ -39,15 +38,16 @@ class PhpExtractor implements ExtractorInterface
     */
    protected $sequences = array(
        array(
-            '$view',
-            '[',
-            '\'translator\'',
-            ']',
            '->',
            'trans',
            '(',
            self::MESSAGE_TOKEN,
-            ')',
+        ),
+        array(
+            '->',
+            'transChoice',
+            '(',
+            self::MESSAGE_TOKEN,
        ),
    );

@ -75,7 +75,7 @@ class PhpExtractor implements ExtractorInterface
    /**
     * Normalizes a token.
     *
-     * @param mixed $token
+     * @param  mixed  $token
     * @return string
     */
    protected function normalizeToken($token)
@ -87,6 +87,60 @@ class PhpExtractor implements ExtractorInterface
        return $token;
    }

+    /**
+     * Seeks to a non-whitespace token
+     *
+     * @param \ArrayIterator $tokenIterator
+     */
+    protected function seekToNextRelaventToken($tokenIterator)
+    {
+        for ( ; $tokenIterator->valid(); $tokenIterator->next()) {
+            $t = $tokenIterator->current();
+            if (!is_array($t) || ($t[0] !== T_WHITESPACE)) {
+                break;
+            }
+        }
+    }
+
+    /**
+     * Extracts the message from the iterator while the tokens
+     * match allowed message tokens
+     *
+     * @param \ArrayIterator $tokenIterator
+     */
+    protected function getMessage($tokenIterator)
+    {
+        $message = '';
+        $docToken = '';
+
+        for ( ; $tokenIterator->valid(); $tokenIterator->next()) {
+            $t = $tokenIterator->current();
+            if (!is_array($t)) {
+                break;
+            }
+
+            switch ($t[0]) {
+                case T_START_HEREDOC:
+                    $docToken = $t[1];
+                    break;
+                case T_ENCAPSED_AND_WHITESPACE:
+                case T_CONSTANT_ENCAPSED_STRING:
+                    $message .= $t[1];
+                    break;
+                case T_END_HEREDOC:
+                    return PhpStringTokenParser::parseDocString($docToken, $message);
+                default:
+                    break 2;
+            }
+        }
+
+        if ($message) {
+            $message = PhpStringTokenParser::parse($message);
+        }
+
+        return $message;
+    }
+
    /**
     * Extracts trans message from PHP tokens.
     *
@ -95,24 +149,27 @@ class PhpExtractor implements ExtractorInterface
     */
    protected function parseTokens($tokens, MessageCatalogue $catalog)
    {
-        foreach ($tokens as $key => $token) {
+        $tokenIterator = new \ArrayIterator($tokens);
+
+        for ($key = 0; $key < $tokenIterator->count(); $key++) {
            foreach ($this->sequences as $sequence) {
                $message = '';
+                $tokenIterator->seek($key);

-                foreach ($sequence as $id => $item) {
-                    if ($this->normalizeToken($tokens[$key + $id]) == $item) {
+                foreach ($sequence as $item) {
+                    $this->seekToNextRelaventToken($tokenIterator);
+
+                    if ($this->normalizeToken($tokenIterator->current()) == $item) {
+                        $tokenIterator->next();
                        continue;
                    } elseif (self::MESSAGE_TOKEN == $item) {
-                        $message = $this->normalizeToken($tokens[$key + $id]);
-                    } elseif (self::IGNORE_TOKEN == $item) {
-                        continue;
+                        $message = $this->getMessage($tokenIterator);
+                        break;
                    } else {
                        break;
                    }
                }

-                $message = trim($message, '\'"');
-
                if ($message) {
                    $catalog->set($message, $this->prefix.$message);
                    break;
--- a/src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php
+++ b/src/Symfony/Bundle/FrameworkBundle/Translation/PhpStringTokenParser.php
@ -0,0 +1,142 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Bundle\FrameworkBundle\Translation;
+
+/*
+ * The following is derived from code at http://github.com/nikic/PHP-Parser
+ *
+ * Copyright (c) 2011 by Nikita Popov
+ *
+ * Some rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *
+ *     * The names of the contributors may not be used to endorse or
+ *       promote products derived from this software without specific
+ *       prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+class PhpStringTokenParser
+{
+    protected static $replacements = array(
+        '\\' => '\\',
+        '$'  =>  '$',
+        'n'  => "\n",
+        'r'  => "\r",
+        't'  => "\t",
+        'f'  => "\f",
+        'v'  => "\v",
+        'e'  => "\x1B",
+    );
+
+    /**
+     * Parses a string token.
+     *
+     * @param string $str String token content
+     *
+     * @return string The parsed string
+     */
+    public static function parse($str)
+    {
+        $bLength = 0;
+        if ('b' === $str[0]) {
+            $bLength = 1;
+        }
+
+        if ('\'' === $str[$bLength]) {
+            return str_replace(
+                array('\\\\', '\\\''),
+                array(  '\\',   '\''),
+                substr($str, $bLength + 1, -1)
+            );
+        } else {
+            return self::parseEscapeSequences(substr($str, $bLength + 1, -1), '"');
+        }
+    }
+
+    /**
+     * Parses escape sequences in strings (all string types apart from single quoted).
+     *
+     * @param string      $str   String without quotes
+     * @param null|string $quote Quote type
+     *
+     * @return string String with escape sequences parsed
+     */
+    public static function parseEscapeSequences($str, $quote)
+    {
+        if (null !== $quote) {
+            $str = str_replace('\\' . $quote, $quote, $str);
+        }
+
+        return preg_replace_callback(
+            '~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3})~',
+            array(__CLASS__, 'parseCallback'),
+            $str
+        );
+    }
+
+    public static function parseCallback($matches)
+    {
+        $str = $matches[1];
+
+        if (isset(self::$replacements[$str])) {
+            return self::$replacements[$str];
+        } elseif ('x' === $str[0] || 'X' === $str[0]) {
+            return chr(hexdec($str));
+        } else {
+            return chr(octdec($str));
+        }
+    }
+
+    /**
+     * Parses a constant doc string.
+     *
+     * @param string $startToken Doc string start token content (<<<SMTHG)
+     * @param string $str        String token content
+     *
+     * @return string Parsed string
+     */
+    public static function parseDocString($startToken, $str)
+    {
+        // strip last newline (thanks tokenizer for sticking it into the string!)
+        $str = preg_replace('~(\r\n|\n|\r)$~', '', $str);
+
+        // nowdoc string
+        if (false !== strpos($startToken, '\'')) {
+            return $str;
+        }
+
+        return self::parseEscapeSequences($str, null);
+    }
+}