From 372ad50667fa11c2c718701d20c492a31e62b3be Mon Sep 17 00:00:00 2001 From: sam-osborne <44622117+sam-osborne@users.noreply.github.com> Date: Fri, 28 Jun 2019 00:06:37 -0400 Subject: [PATCH 1/2] Performance improvements to Tokenizer::scan --- src/Handlebars/Tokenizer.php | 271 +++++++++++++++++++---------------- 1 file changed, 145 insertions(+), 126 deletions(-) diff --git a/src/Handlebars/Tokenizer.php b/src/Handlebars/Tokenizer.php index 89bdfc3..c1c6c59 100644 --- a/src/Handlebars/Tokenizer.php +++ b/src/Handlebars/Tokenizer.php @@ -82,126 +82,144 @@ class Tokenizer protected $otag; protected $ctag; - /** - * Scan and tokenize template source. - * - * @param string $text Mustache template source to tokenize - * @param string $delimiters Optional, pass opening and closing delimiters - * - * @return array Set of Mustache tokens - */ - public function scan($text, $delimiters = null) - { - if ($text instanceof HandlebarsString) { - $text = $text->getString(); - } - $this->reset(); + /** + * Scan and tokenize template source. + * + * @param string $text Mustache template source to tokenize + * @param string $delimiters Optional, pass opening and closing delimiters + * + * @return array Set of Mustache tokens + */ + public function scan($text, $delimiters = null) + { + if ($text instanceof HandlebarsString) { + $text = $text->getString(); + } - if ($delimiters = trim($delimiters)) { - list($otag, $ctag) = explode(' ', $delimiters); - $this->otag = $otag; - $this->ctag = $ctag; - } + $this->reset(); - $len = strlen($text); - for ($i = 0; $i < $len; $i++) { - switch ($this->state) { - case self::IN_TEXT: - if ($this->tagChange($this->otag, $text, $i)) { - $i--; - $this->flushBuffer(); - $this->state = self::IN_TAG_TYPE; - } else { - if ($text[$i] == "\n") { - $this->filterLine(); - } else { - $this->buffer .= $text[$i]; - } - } - break; + if ($delimiters = trim($delimiters)) { + list($otag, $ctag) = explode(' ', $delimiters); + $this->otag = $otag; + $this->ctag = $ctag; + } - case self::IN_TAG_TYPE: + $openingTagLength = strlen($this->otag); + $closingTagLength = strlen($this->ctag); + $firstOpeningTagCharacter = $this->otag[0]; + $firstClosingTagCharacter = $this->ctag[0]; - $i += strlen($this->otag) - 1; - if (isset($this->tagTypes[$text[$i + 1]])) { - $tag = $text[$i + 1]; - $this->tagType = $tag; - } else { - $tag = null; - $this->tagType = self::T_ESCAPED; - } + $len = strlen($text); - if ($this->tagType === self::T_DELIM_CHANGE) { - $i = $this->changeDelimiters($text, $i); - $this->state = self::IN_TEXT; - } else { - if ($tag !== null) { - $i++; - } - $this->state = self::IN_TAG; - } - $this->seenTag = $i; - break; - - default: - if ($this->tagChange($this->ctag, $text, $i)) { - // Sections (Helpers) can accept parameters - // Same thing for Partials (little known fact) - if (in_array($this->tagType, [ - self::T_SECTION, - self::T_PARTIAL, - self::T_PARTIAL_2] - )) { - $newBuffer = explode(' ', trim($this->buffer), 2); - $args = ''; - if (count($newBuffer) == 2) { - $args = $newBuffer[1]; - } - $this->buffer = $newBuffer[0]; - } - $t = [ - self::TYPE => $this->tagType, - self::NAME => trim($this->buffer), - self::OTAG => $this->otag, - self::CTAG => $this->ctag, - self::INDEX => ($this->tagType == self::T_END_SECTION) ? - $this->seenTag - strlen($this->otag) : - $i + strlen($this->ctag), - ]; - if (isset($args)) { - $t[self::ARGS] = $args; - } - $this->tokens[] = $t; - unset($t); - unset($args); - $this->buffer = ''; - $i += strlen($this->ctag) - 1; - $this->state = self::IN_TEXT; - if ($this->tagType == self::T_UNESCAPED) { - if ($this->ctag == '}}') { - $i++; - } else { - // Clean up `{{{ tripleStache }}}` style tokens. - $lastIndex = count($this->tokens) - 1; - $lastName = $this->tokens[$lastIndex][self::NAME]; - if (substr($lastName, -1) === '}') { - $this->tokens[$lastIndex][self::NAME] = trim( - substr($lastName, 0, -1) - ); - } - } - } - } else { - $this->buffer .= $text[$i]; - } - break; - } - } + for ($i = 0; $i < $len; $i++) { - $this->filterLine(true); + $character = $text[$i]; - return $this->tokens; - } + switch ($this->state) { + + case self::IN_TEXT: + if ($character === $firstOpeningTagCharacter && $this->tagChange($this->otag, $text, $i, $openingTagLength) + ) { + $i--; + $this->flushBuffer(); + $this->state = self::IN_TAG_TYPE; + } else { + if ($character == "\n") { + $this->filterLine(); + } else { + $this->buffer .= $character; + } + } + break; + + case self::IN_TAG_TYPE: + + $i += $openingTagLength - 1; + if (isset($this->tagTypes[$text[$i + 1]])) { + $tag = $text[$i + 1]; + $this->tagType = $tag; + } else { + $tag = null; + $this->tagType = self::T_ESCAPED; + } + + if ($this->tagType === self::T_DELIM_CHANGE) { + $i = $this->changeDelimiters($text, $i); + $openingTagLength = strlen($this->otag); + $closingTagLength = strlen($this->ctag); + $firstOpeningTagCharacter = $this->otag[0]; + $firstClosingTagCharacter = $this->ctag[0]; + + $this->state = self::IN_TEXT; + } else { + if ($tag !== null) { + $i++; + } + $this->state = self::IN_TAG; + } + $this->seenTag = $i; + break; + + default: + if ($character === $firstClosingTagCharacter && $this->tagChange($this->ctag, $text, $i, $closingTagLength)) { + // Sections (Helpers) can accept parameters + // Same thing for Partials (little known fact) + if (in_array($this->tagType, [ + self::T_SECTION, + self::T_PARTIAL, + self::T_PARTIAL_2] + )) { + $newBuffer = explode(' ', trim($this->buffer), 2); + $args = ''; + if (count($newBuffer) == 2) { + $args = $newBuffer[1]; + } + $this->buffer = $newBuffer[0]; + } + $t = [ + self::TYPE => $this->tagType, + self::NAME => trim($this->buffer), + self::OTAG => $this->otag, + self::CTAG => $this->ctag, + self::INDEX => ($this->tagType == self::T_END_SECTION) ? + $this->seenTag - $openingTagLength : + $i + strlen($this->ctag), + ]; + if (isset($args)) { + $t[self::ARGS] = $args; + } + $this->tokens[] = $t; + unset($t); + unset($args); + $this->buffer = ''; + $i += strlen($this->ctag) - 1; + $this->state = self::IN_TEXT; + if ($this->tagType == self::T_UNESCAPED) { + if ($this->ctag == '}}') { + $i++; + } else { + // Clean up `{{{ tripleStache }}}` style tokens. + $lastIndex = count($this->tokens) - 1; + $lastName = $this->tokens[$lastIndex][self::NAME]; + if (substr($lastName, -1) === '}') { + $this->tokens[$lastIndex][self::NAME] = trim( + substr($lastName, 0, -1) + ); + } + } + } + } else { + $this->buffer .= $character; + } + break; + } + + } + + $this->filterLine(true); + + return $this->tokens; + } /** * Helper function to reset tokenizer internal state. @@ -317,18 +335,19 @@ protected function changeDelimiters($text, $index) return $closeIndex + strlen($close) - 1; } - /** - * Test whether it's time to change tags. - * - * @param string $tag Current tag name - * @param string $text Mustache template source - * @param int $index Current tokenizer index - * - * @return boolean True if this is a closing section tag - */ - protected function tagChange($tag, $text, $index) - { - return substr($text, $index, strlen($tag)) === $tag; - } + /** + * Test whether it's time to change tags. + * + * @param string $tag Current tag name + * @param string $text Mustache template source + * @param int $index Current tokenizer index + * @param int $tagLength Length of the opening/closing tag string + * + * @return boolean True if this is a closing section tag + */ + protected function tagChange($tag, $text, $index, $tagLength) + { + return substr($text, $index, $tagLength) === $tag; + } } From 517f67335dc0e64946d457dd800fb6651d3a6906 Mon Sep 17 00:00:00 2001 From: sam-osborne <44622117+sam-osborne@users.noreply.github.com> Date: Wed, 10 Jul 2019 20:36:55 -0400 Subject: [PATCH 2/2] clean up --- src/Handlebars/Tokenizer.php | 304 +++++++++++++++++------------------ 1 file changed, 152 insertions(+), 152 deletions(-) diff --git a/src/Handlebars/Tokenizer.php b/src/Handlebars/Tokenizer.php index c1c6c59..2a08ccb 100644 --- a/src/Handlebars/Tokenizer.php +++ b/src/Handlebars/Tokenizer.php @@ -82,144 +82,144 @@ class Tokenizer protected $otag; protected $ctag; - /** - * Scan and tokenize template source. - * - * @param string $text Mustache template source to tokenize - * @param string $delimiters Optional, pass opening and closing delimiters - * - * @return array Set of Mustache tokens - */ - public function scan($text, $delimiters = null) - { - if ($text instanceof HandlebarsString) { - $text = $text->getString(); - } - - $this->reset(); - - if ($delimiters = trim($delimiters)) { - list($otag, $ctag) = explode(' ', $delimiters); - $this->otag = $otag; - $this->ctag = $ctag; - } - - $openingTagLength = strlen($this->otag); - $closingTagLength = strlen($this->ctag); - $firstOpeningTagCharacter = $this->otag[0]; - $firstClosingTagCharacter = $this->ctag[0]; - - $len = strlen($text); - - for ($i = 0; $i < $len; $i++) { - - $character = $text[$i]; - - switch ($this->state) { - - case self::IN_TEXT: - if ($character === $firstOpeningTagCharacter && $this->tagChange($this->otag, $text, $i, $openingTagLength) - ) { - $i--; - $this->flushBuffer(); - $this->state = self::IN_TAG_TYPE; - } else { - if ($character == "\n") { - $this->filterLine(); - } else { - $this->buffer .= $character; - } - } - break; - - case self::IN_TAG_TYPE: - - $i += $openingTagLength - 1; - if (isset($this->tagTypes[$text[$i + 1]])) { - $tag = $text[$i + 1]; - $this->tagType = $tag; - } else { - $tag = null; - $this->tagType = self::T_ESCAPED; - } - - if ($this->tagType === self::T_DELIM_CHANGE) { - $i = $this->changeDelimiters($text, $i); - $openingTagLength = strlen($this->otag); - $closingTagLength = strlen($this->ctag); - $firstOpeningTagCharacter = $this->otag[0]; - $firstClosingTagCharacter = $this->ctag[0]; - - $this->state = self::IN_TEXT; - } else { - if ($tag !== null) { - $i++; - } - $this->state = self::IN_TAG; - } - $this->seenTag = $i; - break; - - default: - if ($character === $firstClosingTagCharacter && $this->tagChange($this->ctag, $text, $i, $closingTagLength)) { - // Sections (Helpers) can accept parameters - // Same thing for Partials (little known fact) - if (in_array($this->tagType, [ - self::T_SECTION, - self::T_PARTIAL, - self::T_PARTIAL_2] - )) { - $newBuffer = explode(' ', trim($this->buffer), 2); - $args = ''; - if (count($newBuffer) == 2) { - $args = $newBuffer[1]; - } - $this->buffer = $newBuffer[0]; - } - $t = [ - self::TYPE => $this->tagType, - self::NAME => trim($this->buffer), - self::OTAG => $this->otag, - self::CTAG => $this->ctag, - self::INDEX => ($this->tagType == self::T_END_SECTION) ? - $this->seenTag - $openingTagLength : - $i + strlen($this->ctag), - ]; - if (isset($args)) { - $t[self::ARGS] = $args; - } - $this->tokens[] = $t; - unset($t); - unset($args); - $this->buffer = ''; - $i += strlen($this->ctag) - 1; - $this->state = self::IN_TEXT; - if ($this->tagType == self::T_UNESCAPED) { - if ($this->ctag == '}}') { - $i++; - } else { - // Clean up `{{{ tripleStache }}}` style tokens. - $lastIndex = count($this->tokens) - 1; - $lastName = $this->tokens[$lastIndex][self::NAME]; - if (substr($lastName, -1) === '}') { - $this->tokens[$lastIndex][self::NAME] = trim( - substr($lastName, 0, -1) - ); - } - } - } - } else { - $this->buffer .= $character; - } - break; - } - - } - - $this->filterLine(true); - - return $this->tokens; - } + /** + * Scan and tokenize template source. + * + * @param string $text Mustache template source to tokenize + * @param string $delimiters Optional, pass opening and closing delimiters + * + * @return array Set of Mustache tokens + */ + public function scan($text, $delimiters = null) + { + if ($text instanceof HandlebarsString) { + $text = $text->getString(); + } + + $this->reset(); + + if ($delimiters = trim($delimiters)) { + list($otag, $ctag) = explode(' ', $delimiters); + $this->otag = $otag; + $this->ctag = $ctag; + } + + $openingTagLength = strlen($this->otag); + $closingTagLength = strlen($this->ctag); + $firstOpeningTagCharacter = $this->otag[0]; + $firstClosingTagCharacter = $this->ctag[0]; + + $len = strlen($text); + + for ($i = 0; $i < $len; $i++) { + + $character = $text[$i]; + + switch ($this->state) { + + case self::IN_TEXT: + if ($character === $firstOpeningTagCharacter && $this->tagChange($this->otag, $text, $i, $openingTagLength) + ) { + $i--; + $this->flushBuffer(); + $this->state = self::IN_TAG_TYPE; + } else { + if ($character == "\n") { + $this->filterLine(); + } else { + $this->buffer .= $character; + } + } + break; + + case self::IN_TAG_TYPE: + + $i += $openingTagLength - 1; + if (isset($this->tagTypes[$text[$i + 1]])) { + $tag = $text[$i + 1]; + $this->tagType = $tag; + } else { + $tag = null; + $this->tagType = self::T_ESCAPED; + } + + if ($this->tagType === self::T_DELIM_CHANGE) { + $i = $this->changeDelimiters($text, $i); + $openingTagLength = strlen($this->otag); + $closingTagLength = strlen($this->ctag); + $firstOpeningTagCharacter = $this->otag[0]; + $firstClosingTagCharacter = $this->ctag[0]; + + $this->state = self::IN_TEXT; + } else { + if ($tag !== null) { + $i++; + } + $this->state = self::IN_TAG; + } + $this->seenTag = $i; + break; + + default: + if ($character === $firstClosingTagCharacter && $this->tagChange($this->ctag, $text, $i, $closingTagLength)) { + // Sections (Helpers) can accept parameters + // Same thing for Partials (little known fact) + if (in_array($this->tagType, [ + self::T_SECTION, + self::T_PARTIAL, + self::T_PARTIAL_2] + )) { + $newBuffer = explode(' ', trim($this->buffer), 2); + $args = ''; + if (count($newBuffer) == 2) { + $args = $newBuffer[1]; + } + $this->buffer = $newBuffer[0]; + } + $t = [ + self::TYPE => $this->tagType, + self::NAME => trim($this->buffer), + self::OTAG => $this->otag, + self::CTAG => $this->ctag, + self::INDEX => ($this->tagType == self::T_END_SECTION) ? + $this->seenTag - $openingTagLength : + $i + strlen($this->ctag), + ]; + if (isset($args)) { + $t[self::ARGS] = $args; + } + $this->tokens[] = $t; + unset($t); + unset($args); + $this->buffer = ''; + $i += strlen($this->ctag) - 1; + $this->state = self::IN_TEXT; + if ($this->tagType == self::T_UNESCAPED) { + if ($this->ctag == '}}') { + $i++; + } else { + // Clean up `{{{ tripleStache }}}` style tokens. + $lastIndex = count($this->tokens) - 1; + $lastName = $this->tokens[$lastIndex][self::NAME]; + if (substr($lastName, -1) === '}') { + $this->tokens[$lastIndex][self::NAME] = trim( + substr($lastName, 0, -1) + ); + } + } + } + } else { + $this->buffer .= $character; + } + break; + } + + } + + $this->filterLine(true); + + return $this->tokens; + } /** * Helper function to reset tokenizer internal state. @@ -335,19 +335,19 @@ protected function changeDelimiters($text, $index) return $closeIndex + strlen($close) - 1; } - /** - * Test whether it's time to change tags. - * - * @param string $tag Current tag name - * @param string $text Mustache template source - * @param int $index Current tokenizer index - * @param int $tagLength Length of the opening/closing tag string - * - * @return boolean True if this is a closing section tag - */ - protected function tagChange($tag, $text, $index, $tagLength) - { - return substr($text, $index, $tagLength) === $tag; - } + /** + * Test whether it's time to change tags. + * + * @param string $tag Current tag name + * @param string $text Mustache template source + * @param int $index Current tokenizer index + * @param int $tagLength Length of the opening/closing tag string + * + * @return boolean True if this is a closing section tag + */ + protected function tagChange($tag, $text, $index, $tagLength) + { + return substr($text, $index, $tagLength) === $tag; + } }