From f5adcb69707ad587f40db67fdfd586771feeff1a Mon Sep 17 00:00:00 2001 From: Fabien Potencier Date: Mon, 31 Oct 2011 09:05:09 +0100 Subject: [PATCH] optimized tokenizer by pre-computing all start tag positions (memory and CPU) --- lib/Twig/Lexer.php | 45 ++++++++++++++++++++------------------------- 1 files changed, 20 insertions(+), 25 deletions(-) diff --git a/lib/Twig/Lexer.php b/lib/Twig/Lexer.php index a365715..6c75d45 100644 --- a/lib/Twig/Lexer.php +++ b/lib/Twig/Lexer.php @@ -57,6 +57,7 @@ class Twig_Lexer implements Twig_LexerInterface $this->options['lex_comment_regex'] = '/(?:'.preg_quote($this->options['whitespace_trim'], '/').preg_quote($this->options['tag_comment'][1], '/').'\s*|'.preg_quote($this->options['tag_comment'][1], '/').')\n?/s'; $this->options['lex_block_raw_regex'] = '/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As'; $this->options['lex_block_line_regex'] = '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As'; + $this->options['lex_tokens_start_regex'] = '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s'; } /** @@ -82,6 +83,11 @@ class Twig_Lexer implements Twig_LexerInterface $this->tokens = array(); $this->state = self::STATE_DATA; $this->brackets = array(); + $this->position = -1; + + // find all token starts in one go + preg_match_all($this->options['lex_tokens_start_regex'], $this->code, $matches, PREG_OFFSET_CAPTURE); + $this->positions = $matches; while ($this->cursor < $this->end) { // dispatch to the lexing functions depending @@ -117,42 +123,31 @@ class Twig_Lexer implements Twig_LexerInterface protected function lexData() { - $pos = $this->end; - $trimBlock = false; - $append = ''; - - // Find the first token after the cursor - foreach (array('tag_comment', 'tag_variable', 'tag_block') as $type) { - $tmpPos = strpos($this->code, $this->options[$type][0], $this->cursor); - if (false !== $tmpPos && $tmpPos < $pos) { - $pos = $tmpPos; - $token = $this->options[$type][0]; - if ($this->options['whitespace_trim'] === substr($this->code, $pos + strlen($token), strlen($this->options['whitespace_trim']))) { - $trimBlock = true; - $append = $this->options['whitespace_trim']; - } else { - $trimBlock = false; - $append = ''; - } - } - } - // if no matches are left we return the rest of the template as simple text token - if ($pos === $this->end) { + if ($this->position == count($this->positions[0]) - 1) { $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor)); $this->cursor = $this->end; return; } + // Find the first token after the current cursor + $position = $this->positions[0][++$this->position]; + while ($position[1] < $this->cursor) { + if ($this->position == count($this->positions[0]) - 1) { + return; + } + $position = $this->positions[0][++$this->position]; + } + // push the template text first - $text = $textContent = substr($this->code, $this->cursor, $pos - $this->cursor); - if (true === $trimBlock) { + $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor); + if (isset($this->positions[2][$this->position][0])) { $text = rtrim($text); } $this->pushToken(Twig_Token::TEXT_TYPE, $text); - $this->moveCursor($textContent.$token.$append); + $this->moveCursor($textContent.$position[0]); - switch ($token) { + switch ($this->positions[1][$this->position][0]) { case $this->options['tag_comment'][0]: $this->lexComment(); break; -- 1.7.2.5