lexer support for interpolated strings
authorArnaud Le Blanc <arnaud.lb@gmail.com>
Sat, 12 Nov 2011 15:53:31 +0000 (16:53 +0100)
committerArnaud Le Blanc <arnaud.lb@gmail.com>
Sat, 12 Nov 2011 15:53:31 +0000 (16:53 +0100)
lib/Twig/Lexer.php
lib/Twig/Token.php

index b2e2a1c..ad1e87a 100644 (file)
@@ -31,14 +31,18 @@ class Twig_Lexer implements Twig_LexerInterface
     protected $filename;
     protected $options;
 
-    const STATE_DATA  = 0;
-    const STATE_BLOCK = 1;
-    const STATE_VAR   = 2;
-
-    const REGEX_NAME   = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
-    const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A';
-    const REGEX_STRING = '/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
-    const PUNCTUATION  = '()[]{}?:.,|';
+    const STATE_DATA            = 0;
+    const STATE_BLOCK           = 1;
+    const STATE_VAR             = 2;
+    const STATE_STRING          = 3;
+    const STATE_INTERPOLATION   = 4;
+
+    const REGEX_NAME            = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
+    const REGEX_NUMBER          = '/[0-9]+(?:\.[0-9]+)?/A';
+    const REGEX_STRING          = '/\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
+    const REGEX_DQ_STRING_START = '/"/A';
+    const REGEX_DQ_STRING_PART  = '/(?:[^#"\\\\]|\\\\.|#(?!{))+/A';
+    const PUNCTUATION           = '()[]{}?:.,|';
 
     public function __construct(Twig_Environment $env, array $options = array())
     {
@@ -49,6 +53,7 @@ class Twig_Lexer implements Twig_LexerInterface
             'tag_block'       => array('{%', '%}'),
             'tag_variable'    => array('{{', '}}'),
             'whitespace_trim' => '-',
+            'interpolation'   => array('#{', '}'),
         ), $options);
 
         $this->options['lex_var_regex'] = '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A';
@@ -59,6 +64,8 @@ class Twig_Lexer implements Twig_LexerInterface
         $this->options['lex_block_raw_regex'] = '/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As';
         $this->options['lex_block_line_regex'] = '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As';
         $this->options['lex_tokens_start_regex'] = '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s';
+        $this->options['interpolation_start_regex'] = '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A';
+        $this->options['interpolation_end_regex'] = '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A';
     }
 
     /**
@@ -106,6 +113,14 @@ class Twig_Lexer implements Twig_LexerInterface
                 case self::STATE_VAR:
                     $this->lexVar();
                     break;
+
+                case self::STATE_STRING:
+                    $this->lexString();
+                    break;
+
+                case self::STATE_INTERPOLATION:
+                    $this->lexInterpolation();
+                    break;
             }
         }
 
@@ -245,6 +260,12 @@ class Twig_Lexer implements Twig_LexerInterface
             $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
             ++$this->cursor;
         }
+        // opening double quoted string
+        elseif (preg_match(self::REGEX_DQ_STRING_START, $this->code, $match, null, $this->cursor)) {
+            $this->brackets[] = array('"', $this->lineno);
+            $this->pushState(self::STATE_STRING);
+            $this->moveCursor($match[0]);
+        }
         // strings
         elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
@@ -275,6 +296,44 @@ class Twig_Lexer implements Twig_LexerInterface
         $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
     }
 
+    protected function lexString()
+    {
+        if (preg_match($this->options['interpolation_start_regex'], $this->code, $match, null, $this->cursor)) {
+            $this->brackets[] = array($this->options['interpolation'][0], $this->lineno);
+            $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE);
+            $this->moveCursor($match[0]);
+            $this->pushState(self::STATE_INTERPOLATION);
+
+        } else if (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor)) {
+            $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0]));
+            $this->moveCursor($match[0]);
+
+        } else if (preg_match('/"/A', $this->code, $match, null, $this->cursor)) {
+
+            list($expect, $lineno) = array_pop($this->brackets);
+            if ($this->code[$this->cursor] != '"') {
+                throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
+            }
+
+            $this->popState();
+            ++$this->cursor;
+            return;
+        }
+    }
+
+    protected function lexInterpolation()
+    {
+        $bracket = end($this->brackets);
+        if ($this->options['interpolation'][0] === $bracket[0] && preg_match('/\s*}/A', $this->code, $match, null, $this->cursor)) {
+            array_pop($this->brackets);
+            $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE);
+            $this->moveCursor($match[0]);
+            $this->popState();
+        } else {
+            $this->lexExpression();
+        }
+    }
+
     protected function pushToken($type, $value = '')
     {
         // do not push empty text tokens
index 79a1003..9bc7119 100644 (file)
@@ -22,17 +22,19 @@ class Twig_Token
     protected $type;
     protected $lineno;
 
-    const EOF_TYPE         = -1;
-    const TEXT_TYPE        = 0;
-    const BLOCK_START_TYPE = 1;
-    const VAR_START_TYPE   = 2;
-    const BLOCK_END_TYPE   = 3;
-    const VAR_END_TYPE     = 4;
-    const NAME_TYPE        = 5;
-    const NUMBER_TYPE      = 6;
-    const STRING_TYPE      = 7;
-    const OPERATOR_TYPE    = 8;
-    const PUNCTUATION_TYPE = 9;
+    const EOF_TYPE                  = -1;
+    const TEXT_TYPE                 = 0;
+    const BLOCK_START_TYPE          = 1;
+    const VAR_START_TYPE            = 2;
+    const BLOCK_END_TYPE            = 3;
+    const VAR_END_TYPE              = 4;
+    const NAME_TYPE                 = 5;
+    const NUMBER_TYPE               = 6;
+    const STRING_TYPE               = 7;
+    const OPERATOR_TYPE             = 8;
+    const PUNCTUATION_TYPE          = 9;
+    const INTERPOLATION_START_TYPE  = 10;
+    const INTERPOLATION_END_TYPE    = 11;
 
     /**
      * Constructor.
@@ -159,6 +161,12 @@ class Twig_Token
             case self::PUNCTUATION_TYPE:
                 $name = 'PUNCTUATION_TYPE';
                 break;
+            case self::INTERPOLATION_START_TYPE:
+                $name = 'INTERPOLATION_START_TYPE';
+                break;
+            case self::INTERPOLATION_END_TYPE:
+                $name = 'INTERPOLATION_END_TYPE';
+                break;
             default:
                 throw new Twig_Error_Syntax(sprintf('Token of type "%s" does not exist.', $type), $line);
         }
@@ -199,6 +207,10 @@ class Twig_Token
                 return 'operator';
             case self::PUNCTUATION_TYPE:
                 return 'punctuation';
+            case self::INTERPOLATION_START_TYPE:
+                return 'begin of string interpolation';
+            case self::INTERPOLATION_END_TYPE:
+                return 'end of string interpolation';
             default:
                 throw new Twig_Error_Syntax(sprintf('Token of type "%s" does not exist.', $type), $line);
         }