From a2036868a509b77211c288694edfe17b72833f25 Mon Sep 17 00:00:00 2001 From: Arnaud Le Blanc Date: Sat, 12 Nov 2011 16:53:31 +0100 Subject: [PATCH] lexer support for interpolated strings --- lib/Twig/Lexer.php | 75 ++++++++++++++++++++++++++++++++++++++++++++++----- lib/Twig/Token.php | 34 ++++++++++++++++------- 2 files changed, 90 insertions(+), 19 deletions(-) diff --git a/lib/Twig/Lexer.php b/lib/Twig/Lexer.php index b2e2a1c..ad1e87a 100644 --- a/lib/Twig/Lexer.php +++ b/lib/Twig/Lexer.php @@ -31,14 +31,18 @@ class Twig_Lexer implements Twig_LexerInterface protected $filename; protected $options; - const STATE_DATA = 0; - const STATE_BLOCK = 1; - const STATE_VAR = 2; - - const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A'; - const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A'; - const REGEX_STRING = '/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; - const PUNCTUATION = '()[]{}?:.,|'; + const STATE_DATA = 0; + const STATE_BLOCK = 1; + const STATE_VAR = 2; + const STATE_STRING = 3; + const STATE_INTERPOLATION = 4; + + const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A'; + const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A'; + const REGEX_STRING = '/\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; + const REGEX_DQ_STRING_START = '/"/A'; + const REGEX_DQ_STRING_PART = '/(?:[^#"\\\\]|\\\\.|#(?!{))+/A'; + const PUNCTUATION = '()[]{}?:.,|'; public function __construct(Twig_Environment $env, array $options = array()) { @@ -49,6 +53,7 @@ class Twig_Lexer implements Twig_LexerInterface 'tag_block' => array('{%', '%}'), 'tag_variable' => array('{{', '}}'), 'whitespace_trim' => '-', + 'interpolation' => array('#{', '}'), ), $options); $this->options['lex_var_regex'] = '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A'; @@ -59,6 +64,8 @@ class Twig_Lexer implements Twig_LexerInterface $this->options['lex_block_raw_regex'] = '/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As'; $this->options['lex_block_line_regex'] = '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As'; $this->options['lex_tokens_start_regex'] = '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s'; + $this->options['interpolation_start_regex'] = '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A'; + $this->options['interpolation_end_regex'] = '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A'; } /** @@ -106,6 +113,14 @@ class Twig_Lexer implements Twig_LexerInterface case self::STATE_VAR: $this->lexVar(); break; + + case self::STATE_STRING: + $this->lexString(); + break; + + case self::STATE_INTERPOLATION: + $this->lexInterpolation(); + break; } } @@ -245,6 +260,12 @@ class Twig_Lexer implements Twig_LexerInterface $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]); ++$this->cursor; } + // opening double quoted string + elseif (preg_match(self::REGEX_DQ_STRING_START, $this->code, $match, null, $this->cursor)) { + $this->brackets[] = array('"', $this->lineno); + $this->pushState(self::STATE_STRING); + $this->moveCursor($match[0]); + } // strings elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) { $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1))); @@ -275,6 +296,44 @@ class Twig_Lexer implements Twig_LexerInterface $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]); } + protected function lexString() + { + if (preg_match($this->options['interpolation_start_regex'], $this->code, $match, null, $this->cursor)) { + $this->brackets[] = array($this->options['interpolation'][0], $this->lineno); + $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE); + $this->moveCursor($match[0]); + $this->pushState(self::STATE_INTERPOLATION); + + } else if (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor)) { + $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0])); + $this->moveCursor($match[0]); + + } else if (preg_match('/"/A', $this->code, $match, null, $this->cursor)) { + + list($expect, $lineno) = array_pop($this->brackets); + if ($this->code[$this->cursor] != '"') { + throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename); + } + + $this->popState(); + ++$this->cursor; + return; + } + } + + protected function lexInterpolation() + { + $bracket = end($this->brackets); + if ($this->options['interpolation'][0] === $bracket[0] && preg_match('/\s*}/A', $this->code, $match, null, $this->cursor)) { + array_pop($this->brackets); + $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE); + $this->moveCursor($match[0]); + $this->popState(); + } else { + $this->lexExpression(); + } + } + protected function pushToken($type, $value = '') { // do not push empty text tokens diff --git a/lib/Twig/Token.php b/lib/Twig/Token.php index 79a1003..9bc7119 100644 --- a/lib/Twig/Token.php +++ b/lib/Twig/Token.php @@ -22,17 +22,19 @@ class Twig_Token protected $type; protected $lineno; - const EOF_TYPE = -1; - const TEXT_TYPE = 0; - const BLOCK_START_TYPE = 1; - const VAR_START_TYPE = 2; - const BLOCK_END_TYPE = 3; - const VAR_END_TYPE = 4; - const NAME_TYPE = 5; - const NUMBER_TYPE = 6; - const STRING_TYPE = 7; - const OPERATOR_TYPE = 8; - const PUNCTUATION_TYPE = 9; + const EOF_TYPE = -1; + const TEXT_TYPE = 0; + const BLOCK_START_TYPE = 1; + const VAR_START_TYPE = 2; + const BLOCK_END_TYPE = 3; + const VAR_END_TYPE = 4; + const NAME_TYPE = 5; + const NUMBER_TYPE = 6; + const STRING_TYPE = 7; + const OPERATOR_TYPE = 8; + const PUNCTUATION_TYPE = 9; + const INTERPOLATION_START_TYPE = 10; + const INTERPOLATION_END_TYPE = 11; /** * Constructor. @@ -159,6 +161,12 @@ class Twig_Token case self::PUNCTUATION_TYPE: $name = 'PUNCTUATION_TYPE'; break; + case self::INTERPOLATION_START_TYPE: + $name = 'INTERPOLATION_START_TYPE'; + break; + case self::INTERPOLATION_END_TYPE: + $name = 'INTERPOLATION_END_TYPE'; + break; default: throw new Twig_Error_Syntax(sprintf('Token of type "%s" does not exist.', $type), $line); } @@ -199,6 +207,10 @@ class Twig_Token return 'operator'; case self::PUNCTUATION_TYPE: return 'punctuation'; + case self::INTERPOLATION_START_TYPE: + return 'begin of string interpolation'; + case self::INTERPOLATION_END_TYPE: + return 'end of string interpolation'; default: throw new Twig_Error_Syntax(sprintf('Token of type "%s" does not exist.', $type), $line); } -- 1.7.2.5