From 7657c01e65695a9a53cf63e0d8e4b872ce00451b Mon Sep 17 00:00:00 2001 From: Fabien Potencier Date: Wed, 27 Jun 2012 09:07:27 +0200 Subject: [PATCH] added support for more escaping strategies (url, css, and html_attr) --- CHANGELOG | 3 +- doc/api.rst | 8 +- doc/filters/date_modify.rst | 4 +- doc/filters/escape.rst | 27 ++- doc/templates.rst | 32 ++- ext/twig/php_twig.h | 2 +- lib/Twig/Environment.php | 2 +- lib/Twig/Extension/Core.php | 363 +++++++++++++++++++- test/Twig/Tests/EnvironmentTest.php | 2 +- test/Twig/Tests/Fixtures/filters/force_escape.test | 2 +- .../Tests/Fixtures/tags/autoescape/functions.test | 2 +- .../Tests/Fixtures/tags/autoescape/strategy.test | 4 +- test/Twig/Tests/Fixtures/tags/autoescape/type.test | 10 +- test/Twig/Tests/escapingTest.php | 324 +++++++++++++++++ 14 files changed, 752 insertions(+), 33 deletions(-) create mode 100644 test/Twig/Tests/escapingTest.php diff --git a/CHANGELOG b/CHANGELOG index 394d561..e9286a9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ -* 1.8.4 (2012-XX-XX) +* 1.9.0 (2012-XX-XX) + * added escaping strategies for CSS, URL, and HTML attributes * fixed nested embed tag calls * added the date_modify filter diff --git a/doc/api.rst b/doc/api.rst index a462066..8786c35 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -96,10 +96,10 @@ The following options are available: * ``autoescape``: If set to ``true``, auto-escaping will be enabled by default for all templates (default to ``true``). As of Twig 1.8, you can set the - escaping strategy to use (``html``, ``js``, ``false`` to disable, or a PHP - callback that takes the template "filename" and must return the escaping - strategy to use -- the callback cannot be a function name to avoid collision - with built-in escaping strategies). + escaping strategy to use (``html``, ``js``, ``css``, ``false`` to disable, + or a PHP callback that takes the template "filename" and must return the + escaping strategy to use -- the callback cannot be a function name to avoid + collision with built-in escaping strategies). * ``optimizations``: A flag that indicates which optimizations to apply (default to ``-1`` -- all optimizations are enabled; set it to ``0`` to diff --git a/doc/filters/date_modify.rst b/doc/filters/date_modify.rst index a05b591..a7ec621 100644 --- a/doc/filters/date_modify.rst +++ b/doc/filters/date_modify.rst @@ -1,8 +1,8 @@ ``date_modify`` =============== -.. versionadded:: 1.8.4 - The date_modify filter has been added in Twig 1.8.4. +.. versionadded:: 1.9.0 + The date_modify filter has been added in Twig 1.9.0. The ``date_modify`` filter modifies a date with a given modifier string: diff --git a/doc/filters/escape.rst b/doc/filters/escape.rst index 5142977..9685585 100644 --- a/doc/filters/escape.rst +++ b/doc/filters/escape.rst @@ -1,9 +1,15 @@ ``escape`` ========== -The ``escape`` filter converts the characters ``&``, ``<``, ``>``, ``'``, and -``"`` in strings to HTML-safe sequences. Use this if you need to display text -that might contain such characters in HTML: +.. versionadded:: 1.9.0 + The ``css``, ``url``, and ``html_attr`` strategies were added in Twig + 1.9.0. + +The ``escape`` filter escapes a string for safe insertion into the final +output. It supports different escaping strategies depending on the template +context. + +By default, it uses the HTML escaping strategy: .. code-block:: jinja @@ -31,6 +37,21 @@ And here is how to escape variables included in JavaScript code: {{ user.username|escape('js') }} {{ user.username|e('js') }} +The ``escape`` filter supports the following escaping strategies: + +* ``html``: escapes a string for the **HTML body** context. + +* ``js``: escapes a string for the **JavaScript context**. + +* ``css``: escapes a string for the **CSS context**. CSS escaping can be + applied to any string being inserted into CSS and escapes everything except + alphanumerics. + +* ``url``: escapes a string for the **URI or parameter contexts**. This should + not be used to escape an entire URI; only a subcomponent being inserted. + +* ``html_attr``: escapes a string for the **HTML attribute** context. + .. note:: Internally, ``escape`` uses the PHP native `htmlspecialchars`_ function diff --git a/doc/templates.rst b/doc/templates.rst index 278e56d..9fabd53 100644 --- a/doc/templates.rst +++ b/doc/templates.rst @@ -371,16 +371,24 @@ Twig supports both, automatic escaping is enabled by default. Working with Manual Escaping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If manual escaping is enabled it's **your** responsibility to escape variables -if needed. What to escape? If you have a variable that *may* include any of -the following chars (``>``, ``<``, ``&``, or ``"``) you **have to** escape it -unless the variable contains well-formed and trusted HTML. Escaping works by -piping the variable through the :doc:`escape` or ``e`` filter: +If manual escaping is enabled, it is **your** responsibility to escape +variables if needed. What to escape? Any variable you don't trust. + +Escaping works by piping the variable through the +:doc:`escape` or ``e`` filter: .. code-block:: jinja {{ user.username|e }} + +By default, the ``escape`` filter uses the ``html`` strategy, but depending on +the escaping context, you might want to explicitly use any other available +strategies: + {{ user.username|e('js') }} + {{ user.username|e('css') }} + {{ user.username|e('url') }} + {{ user.username|e('html_attr') }} Working with Automatic Escaping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -391,8 +399,18 @@ tag: .. code-block:: jinja - {% autoescape true %} - Everything will be automatically escaped in this block + {% autoescape %} + Everything will be automatically escaped in this block (using the HTML strategy) + {% endautoescape %} + +By default, auto-escaping uses the ``html`` escaping strategy. If you output +variables in other contexts, you need to explicitly escape them with the +appropriate escaping strategy: + +.. code-block:: jinja + + {% autoescape 'js' %} + Everything will be automatically escaped in this block (using the JS strategy) {% endautoescape %} Escaping diff --git a/ext/twig/php_twig.h b/ext/twig/php_twig.h index f0f25e4..ee6e9f2 100644 --- a/ext/twig/php_twig.h +++ b/ext/twig/php_twig.h @@ -15,7 +15,7 @@ #ifndef PHP_TWIG_H #define PHP_TWIG_H -#define PHP_TWIG_VERSION "1.8.4-DEV" +#define PHP_TWIG_VERSION "1.9.0-DEV" #include "php.h" diff --git a/lib/Twig/Environment.php b/lib/Twig/Environment.php index 8cf70fd..916979f 100644 --- a/lib/Twig/Environment.php +++ b/lib/Twig/Environment.php @@ -17,7 +17,7 @@ */ class Twig_Environment { - const VERSION = '1.8.4-DEV'; + const VERSION = '1.9.0-DEV'; protected $charset; protected $loader; diff --git a/lib/Twig/Extension/Core.php b/lib/Twig/Extension/Core.php index 9a2926a..bfdfac3 100644 --- a/lib/Twig/Extension/Core.php +++ b/lib/Twig/Extension/Core.php @@ -791,10 +791,46 @@ function twig_escape_filter(Twig_Environment $env, $string, $strategy = 'html', $string = twig_convert_encoding($string, 'UTF-8', $charset); } - if (null === $string = preg_replace_callback('#[^\p{L}\p{N} ]#u', '_twig_escape_js_callback', $string)) { + if (0 == strlen($string) ? false : (1 == preg_match('/^./su', $string) ? false : true)) { throw new Twig_Error_Runtime('The string to escape is not a valid UTF-8 string.'); } + $string = preg_replace_callback('#[^a-zA-Z0-9,\._]#Su', '_twig_escape_js_callback', $string); + + if ('UTF-8' != $charset) { + $string = twig_convert_encoding($string, $charset, 'UTF-8'); + } + + return $string; + + case 'css': + if ('UTF-8' != $charset) { + $string = twig_convert_encoding($string, 'UTF-8', $charset); + } + + if (0 == strlen($string) ? false : (1 == preg_match('/^./su', $string) ? false : true)) { + throw new Twig_Error_Runtime('The string to escape is not a valid UTF-8 string.'); + } + + $string = preg_replace_callback('#[^a-zA-Z0-9]#Su', '_twig_escape_css_callback', $string); + + if ('UTF-8' != $charset) { + $string = twig_convert_encoding($string, $charset, 'UTF-8'); + } + + return $string; + + case 'html_attr': + if ('UTF-8' != $charset) { + $string = twig_convert_encoding($string, 'UTF-8', $charset); + } + + if (0 == strlen($string) ? false : (1 == preg_match('/^./su', $string) ? false : true)) { + throw new Twig_Error_Runtime('The string to escape is not a valid UTF-8 string.'); + } + + $string = preg_replace_callback('#[^a-zA-Z0-9,\.\-_]#Su', '_twig_escape_html_attr_callback', $string); + if ('UTF-8' != $charset) { $string = twig_convert_encoding($string, $charset, 'UTF-8'); } @@ -833,8 +869,11 @@ function twig_escape_filter(Twig_Environment $env, $string, $strategy = 'html', return twig_convert_encoding($string, $charset, 'UTF-8'); + case 'url': + return rawurlencode($string); + default: - throw new Twig_Error_Runtime(sprintf('Invalid escaping strategy "%s" (valid ones: html, js).', $strategy)); + throw new Twig_Error_Runtime(sprintf('Invalid escaping strategy "%s" (valid ones: html, js, url, css, and html_attr).', $strategy)); } } @@ -875,13 +914,329 @@ function _twig_escape_js_callback($matches) // \xHH if (!isset($char[1])) { - return '\\x'.substr('00'.bin2hex($char), -2); + return '\\x'.strtoupper(substr('00'.bin2hex($char), -2)); } // \uHHHH $char = twig_convert_encoding($char, 'UTF-16BE', 'UTF-8'); - return '\\u'.substr('0000'.bin2hex($char), -4); + return '\\u'.strtoupper(substr('0000'.bin2hex($char), -4)); +} + +function _twig_escape_css_callback($matches) +{ + $char = $matches[0]; + + // \xHH + if (!isset($char[1])) { + $hex = ltrim(strtoupper(bin2hex($char)), '0'); + if (0 === strlen($hex)) { + $hex = '0'; + } + return '\\'.$hex.' '; + } + + // \uHHHH + $char = twig_convert_encoding($char, 'UTF-16BE', 'UTF-8'); + + return '\\'.ltrim(strtoupper(bin2hex($char)), '0').' '; +} + +/** + * This function is adapted from code coming from Zend Framework. + * + * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ +function _twig_escape_html_attr_callback($matches) +{ + static $entityMap = array( + 34 => 'quot', /* quotation mark */ + 38 => 'amp', /* ampersand */ + 60 => 'lt', /* less-than sign */ + 62 => 'gt', /* greater-than sign */ + 160 => 'nbsp', /* no-break space */ + 161 => 'iexcl', /* inverted exclamation mark */ + 162 => 'cent', /* cent sign */ + 163 => 'pound', /* pound sign */ + 164 => 'curren', /* currency sign */ + 165 => 'yen', /* yen sign */ + 166 => 'brvbar', /* broken bar */ + 167 => 'sect', /* section sign */ + 168 => 'uml', /* diaeresis */ + 169 => 'copy', /* copyright sign */ + 170 => 'ordf', /* feminine ordinal indicator */ + 171 => 'laquo', /* left-pointing double angle quotation mark */ + 172 => 'not', /* not sign */ + 173 => 'shy', /* soft hyphen */ + 174 => 'reg', /* registered sign */ + 175 => 'macr', /* macron */ + 176 => 'deg', /* degree sign */ + 177 => 'plusmn', /* plus-minus sign */ + 178 => 'sup2', /* superscript two */ + 179 => 'sup3', /* superscript three */ + 180 => 'acute', /* acute accent */ + 181 => 'micro', /* micro sign */ + 182 => 'para', /* pilcrow sign */ + 183 => 'middot', /* middle dot */ + 184 => 'cedil', /* cedilla */ + 185 => 'sup1', /* superscript one */ + 186 => 'ordm', /* masculine ordinal indicator */ + 187 => 'raquo', /* right-pointing double angle quotation mark */ + 188 => 'frac14', /* vulgar fraction one quarter */ + 189 => 'frac12', /* vulgar fraction one half */ + 190 => 'frac34', /* vulgar fraction three quarters */ + 191 => 'iquest', /* inverted question mark */ + 192 => 'Agrave', /* Latin capital letter a with grave */ + 193 => 'Aacute', /* Latin capital letter a with acute */ + 194 => 'Acirc', /* Latin capital letter a with circumflex */ + 195 => 'Atilde', /* Latin capital letter a with tilde */ + 196 => 'Auml', /* Latin capital letter a with diaeresis */ + 197 => 'Aring', /* Latin capital letter a with ring above */ + 198 => 'AElig', /* Latin capital letter ae */ + 199 => 'Ccedil', /* Latin capital letter c with cedilla */ + 200 => 'Egrave', /* Latin capital letter e with grave */ + 201 => 'Eacute', /* Latin capital letter e with acute */ + 202 => 'Ecirc', /* Latin capital letter e with circumflex */ + 203 => 'Euml', /* Latin capital letter e with diaeresis */ + 204 => 'Igrave', /* Latin capital letter i with grave */ + 205 => 'Iacute', /* Latin capital letter i with acute */ + 206 => 'Icirc', /* Latin capital letter i with circumflex */ + 207 => 'Iuml', /* Latin capital letter i with diaeresis */ + 208 => 'ETH', /* Latin capital letter eth */ + 209 => 'Ntilde', /* Latin capital letter n with tilde */ + 210 => 'Ograve', /* Latin capital letter o with grave */ + 211 => 'Oacute', /* Latin capital letter o with acute */ + 212 => 'Ocirc', /* Latin capital letter o with circumflex */ + 213 => 'Otilde', /* Latin capital letter o with tilde */ + 214 => 'Ouml', /* Latin capital letter o with diaeresis */ + 215 => 'times', /* multiplication sign */ + 216 => 'Oslash', /* Latin capital letter o with stroke */ + 217 => 'Ugrave', /* Latin capital letter u with grave */ + 218 => 'Uacute', /* Latin capital letter u with acute */ + 219 => 'Ucirc', /* Latin capital letter u with circumflex */ + 220 => 'Uuml', /* Latin capital letter u with diaeresis */ + 221 => 'Yacute', /* Latin capital letter y with acute */ + 222 => 'THORN', /* Latin capital letter thorn */ + 223 => 'szlig', /* Latin small letter sharp sXCOMMAX German Eszett */ + 224 => 'agrave', /* Latin small letter a with grave */ + 225 => 'aacute', /* Latin small letter a with acute */ + 226 => 'acirc', /* Latin small letter a with circumflex */ + 227 => 'atilde', /* Latin small letter a with tilde */ + 228 => 'auml', /* Latin small letter a with diaeresis */ + 229 => 'aring', /* Latin small letter a with ring above */ + 230 => 'aelig', /* Latin lowercase ligature ae */ + 231 => 'ccedil', /* Latin small letter c with cedilla */ + 232 => 'egrave', /* Latin small letter e with grave */ + 233 => 'eacute', /* Latin small letter e with acute */ + 234 => 'ecirc', /* Latin small letter e with circumflex */ + 235 => 'euml', /* Latin small letter e with diaeresis */ + 236 => 'igrave', /* Latin small letter i with grave */ + 237 => 'iacute', /* Latin small letter i with acute */ + 238 => 'icirc', /* Latin small letter i with circumflex */ + 239 => 'iuml', /* Latin small letter i with diaeresis */ + 240 => 'eth', /* Latin small letter eth */ + 241 => 'ntilde', /* Latin small letter n with tilde */ + 242 => 'ograve', /* Latin small letter o with grave */ + 243 => 'oacute', /* Latin small letter o with acute */ + 244 => 'ocirc', /* Latin small letter o with circumflex */ + 245 => 'otilde', /* Latin small letter o with tilde */ + 246 => 'ouml', /* Latin small letter o with diaeresis */ + 247 => 'divide', /* division sign */ + 248 => 'oslash', /* Latin small letter o with stroke */ + 249 => 'ugrave', /* Latin small letter u with grave */ + 250 => 'uacute', /* Latin small letter u with acute */ + 251 => 'ucirc', /* Latin small letter u with circumflex */ + 252 => 'uuml', /* Latin small letter u with diaeresis */ + 253 => 'yacute', /* Latin small letter y with acute */ + 254 => 'thorn', /* Latin small letter thorn */ + 255 => 'yuml', /* Latin small letter y with diaeresis */ + 338 => 'OElig', /* Latin capital ligature oe */ + 339 => 'oelig', /* Latin small ligature oe */ + 352 => 'Scaron', /* Latin capital letter s with caron */ + 353 => 'scaron', /* Latin small letter s with caron */ + 376 => 'Yuml', /* Latin capital letter y with diaeresis */ + 402 => 'fnof', /* Latin small letter f with hook */ + 710 => 'circ', /* modifier letter circumflex accent */ + 732 => 'tilde', /* small tilde */ + 913 => 'Alpha', /* Greek capital letter alpha */ + 914 => 'Beta', /* Greek capital letter beta */ + 915 => 'Gamma', /* Greek capital letter gamma */ + 916 => 'Delta', /* Greek capital letter delta */ + 917 => 'Epsilon', /* Greek capital letter epsilon */ + 918 => 'Zeta', /* Greek capital letter zeta */ + 919 => 'Eta', /* Greek capital letter eta */ + 920 => 'Theta', /* Greek capital letter theta */ + 921 => 'Iota', /* Greek capital letter iota */ + 922 => 'Kappa', /* Greek capital letter kappa */ + 923 => 'Lambda', /* Greek capital letter lambda */ + 924 => 'Mu', /* Greek capital letter mu */ + 925 => 'Nu', /* Greek capital letter nu */ + 926 => 'Xi', /* Greek capital letter xi */ + 927 => 'Omicron', /* Greek capital letter omicron */ + 928 => 'Pi', /* Greek capital letter pi */ + 929 => 'Rho', /* Greek capital letter rho */ + 931 => 'Sigma', /* Greek capital letter sigma */ + 932 => 'Tau', /* Greek capital letter tau */ + 933 => 'Upsilon', /* Greek capital letter upsilon */ + 934 => 'Phi', /* Greek capital letter phi */ + 935 => 'Chi', /* Greek capital letter chi */ + 936 => 'Psi', /* Greek capital letter psi */ + 937 => 'Omega', /* Greek capital letter omega */ + 945 => 'alpha', /* Greek small letter alpha */ + 946 => 'beta', /* Greek small letter beta */ + 947 => 'gamma', /* Greek small letter gamma */ + 948 => 'delta', /* Greek small letter delta */ + 949 => 'epsilon', /* Greek small letter epsilon */ + 950 => 'zeta', /* Greek small letter zeta */ + 951 => 'eta', /* Greek small letter eta */ + 952 => 'theta', /* Greek small letter theta */ + 953 => 'iota', /* Greek small letter iota */ + 954 => 'kappa', /* Greek small letter kappa */ + 955 => 'lambda', /* Greek small letter lambda */ + 956 => 'mu', /* Greek small letter mu */ + 957 => 'nu', /* Greek small letter nu */ + 958 => 'xi', /* Greek small letter xi */ + 959 => 'omicron', /* Greek small letter omicron */ + 960 => 'pi', /* Greek small letter pi */ + 961 => 'rho', /* Greek small letter rho */ + 962 => 'sigmaf', /* Greek small letter final sigma */ + 963 => 'sigma', /* Greek small letter sigma */ + 964 => 'tau', /* Greek small letter tau */ + 965 => 'upsilon', /* Greek small letter upsilon */ + 966 => 'phi', /* Greek small letter phi */ + 967 => 'chi', /* Greek small letter chi */ + 968 => 'psi', /* Greek small letter psi */ + 969 => 'omega', /* Greek small letter omega */ + 977 => 'thetasym', /* Greek theta symbol */ + 978 => 'upsih', /* Greek upsilon with hook symbol */ + 982 => 'piv', /* Greek pi symbol */ + 8194 => 'ensp', /* en space */ + 8195 => 'emsp', /* em space */ + 8201 => 'thinsp', /* thin space */ + 8204 => 'zwnj', /* zero width non-joiner */ + 8205 => 'zwj', /* zero width joiner */ + 8206 => 'lrm', /* left-to-right mark */ + 8207 => 'rlm', /* right-to-left mark */ + 8211 => 'ndash', /* en dash */ + 8212 => 'mdash', /* em dash */ + 8216 => 'lsquo', /* left single quotation mark */ + 8217 => 'rsquo', /* right single quotation mark */ + 8218 => 'sbquo', /* single low-9 quotation mark */ + 8220 => 'ldquo', /* left double quotation mark */ + 8221 => 'rdquo', /* right double quotation mark */ + 8222 => 'bdquo', /* double low-9 quotation mark */ + 8224 => 'dagger', /* dagger */ + 8225 => 'Dagger', /* double dagger */ + 8226 => 'bull', /* bullet */ + 8230 => 'hellip', /* horizontal ellipsis */ + 8240 => 'permil', /* per mille sign */ + 8242 => 'prime', /* prime */ + 8243 => 'Prime', /* double prime */ + 8249 => 'lsaquo', /* single left-pointing angle quotation mark */ + 8250 => 'rsaquo', /* single right-pointing angle quotation mark */ + 8254 => 'oline', /* overline */ + 8260 => 'frasl', /* fraction slash */ + 8364 => 'euro', /* euro sign */ + 8465 => 'image', /* black-letter capital i */ + 8472 => 'weierp', /* script capital pXCOMMAX Weierstrass p */ + 8476 => 'real', /* black-letter capital r */ + 8482 => 'trade', /* trademark sign */ + 8501 => 'alefsym', /* alef symbol */ + 8592 => 'larr', /* leftwards arrow */ + 8593 => 'uarr', /* upwards arrow */ + 8594 => 'rarr', /* rightwards arrow */ + 8595 => 'darr', /* downwards arrow */ + 8596 => 'harr', /* left right arrow */ + 8629 => 'crarr', /* downwards arrow with corner leftwards */ + 8656 => 'lArr', /* leftwards double arrow */ + 8657 => 'uArr', /* upwards double arrow */ + 8658 => 'rArr', /* rightwards double arrow */ + 8659 => 'dArr', /* downwards double arrow */ + 8660 => 'hArr', /* left right double arrow */ + 8704 => 'forall', /* for all */ + 8706 => 'part', /* partial differential */ + 8707 => 'exist', /* there exists */ + 8709 => 'empty', /* empty set */ + 8711 => 'nabla', /* nabla */ + 8712 => 'isin', /* element of */ + 8713 => 'notin', /* not an element of */ + 8715 => 'ni', /* contains as member */ + 8719 => 'prod', /* n-ary product */ + 8721 => 'sum', /* n-ary summation */ + 8722 => 'minus', /* minus sign */ + 8727 => 'lowast', /* asterisk operator */ + 8730 => 'radic', /* square root */ + 8733 => 'prop', /* proportional to */ + 8734 => 'infin', /* infinity */ + 8736 => 'ang', /* angle */ + 8743 => 'and', /* logical and */ + 8744 => 'or', /* logical or */ + 8745 => 'cap', /* intersection */ + 8746 => 'cup', /* union */ + 8747 => 'int', /* integral */ + 8756 => 'there4', /* therefore */ + 8764 => 'sim', /* tilde operator */ + 8773 => 'cong', /* congruent to */ + 8776 => 'asymp', /* almost equal to */ + 8800 => 'ne', /* not equal to */ + 8801 => 'equiv', /* identical toXCOMMAX equivalent to */ + 8804 => 'le', /* less-than or equal to */ + 8805 => 'ge', /* greater-than or equal to */ + 8834 => 'sub', /* subset of */ + 8835 => 'sup', /* superset of */ + 8836 => 'nsub', /* not a subset of */ + 8838 => 'sube', /* subset of or equal to */ + 8839 => 'supe', /* superset of or equal to */ + 8853 => 'oplus', /* circled plus */ + 8855 => 'otimes', /* circled times */ + 8869 => 'perp', /* up tack */ + 8901 => 'sdot', /* dot operator */ + 8968 => 'lceil', /* left ceiling */ + 8969 => 'rceil', /* right ceiling */ + 8970 => 'lfloor', /* left floor */ + 8971 => 'rfloor', /* right floor */ + 9001 => 'lang', /* left-pointing angle bracket */ + 9002 => 'rang', /* right-pointing angle bracket */ + 9674 => 'loz', /* lozenge */ + 9824 => 'spades', /* black spade suit */ + 9827 => 'clubs', /* black club suit */ + 9829 => 'hearts', /* black heart suit */ + 9830 => 'diams', /* black diamond suit */ + ); + + $chr = $matches[0]; + $ord = ord($chr); + + /** + * The following replaces characters undefined in HTML with the + * hex entity for the Unicode replacement character. + */ + if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r") || ($ord >= 0x7f && $ord <= 0x9f)) { + return '�'; + } + + /** + * Check if the current character to escape has a name entity we should + * replace it with while grabbing the hex value of the character. + */ + if (strlen($chr) == 1) { + $hex = strtoupper(substr('00'.bin2hex($chr), -2)); + } else { + $chr = twig_convert_encoding($chr, 'UTF-16BE', 'UTF-8'); + $hex = strtoupper(substr('0000'.bin2hex($chr), -4)); + } + + $int = hexdec($hex); + if (array_key_exists($int, $entityMap)) { + return sprintf('&%s;', $entityMap[$int]); + } + + /** + * Per OWASP recommendations, we'll use hex entities for any other + * characters where a named entity does not exist. + */ + return sprintf('&#x%s;', $hex); } // add multibyte extensions if possible diff --git a/test/Twig/Tests/EnvironmentTest.php b/test/Twig/Tests/EnvironmentTest.php index bb31690..76731ea 100644 --- a/test/Twig/Tests/EnvironmentTest.php +++ b/test/Twig/Tests/EnvironmentTest.php @@ -25,7 +25,7 @@ class Twig_Tests_EnvironmentTest extends PHPUnit_Framework_TestCase )); $this->assertEquals('foo<br/ > foo<br/ >', $twig->render('html', array('foo' => 'foo
'))); - $this->assertEquals('foo\x3cbr\x2f \x3e foo\x3cbr\x2f \x3e', $twig->render('js', array('bar' => 'foo
'))); + $this->assertEquals('foo\x3Cbr\x2F\x20\x3E foo\x3Cbr\x2F\x20\x3E', $twig->render('js', array('bar' => 'foo
'))); } public function escapingStrategyCallback($filename) diff --git a/test/Twig/Tests/Fixtures/filters/force_escape.test b/test/Twig/Tests/Fixtures/filters/force_escape.test index 3690e71..85a9b71 100644 --- a/test/Twig/Tests/Fixtures/filters/force_escape.test +++ b/test/Twig/Tests/Fixtures/filters/force_escape.test @@ -14,5 +14,5 @@ return array() --EXPECT-- foo<br /> - foo\x3cbr \x2f\x3e\x0a +\x20\x20\x20\x20foo\x3Cbr\x20\x2F\x3E\x0A foo
diff --git a/test/Twig/Tests/Fixtures/tags/autoescape/functions.test b/test/Twig/Tests/Fixtures/tags/autoescape/functions.test index 864655c..ce7ea78 100644 --- a/test/Twig/Tests/Fixtures/tags/autoescape/functions.test +++ b/test/Twig/Tests/Fixtures/tags/autoescape/functions.test @@ -80,4 +80,4 @@ unsafe_br()|escape autoescape js safe_br -\x3cbr \x2f\x3e +\x3Cbr\x20\x2F\x3E diff --git a/test/Twig/Tests/Fixtures/tags/autoescape/strategy.test b/test/Twig/Tests/Fixtures/tags/autoescape/strategy.test index 101d5af..cf8ccee 100644 --- a/test/Twig/Tests/Fixtures/tags/autoescape/strategy.test +++ b/test/Twig/Tests/Fixtures/tags/autoescape/strategy.test @@ -11,7 +11,7 @@ --DATA-- return array('var' => '
"') --EXPECT-- -\x3cbr \x2f\x3e\x22 +\x3Cbr\x20\x2F\x3E\x22 <br />" -\x3cbr \x2f\x3e\x22 +\x3Cbr\x20\x2F\x3E\x22 <br />" diff --git a/test/Twig/Tests/Fixtures/tags/autoescape/type.test b/test/Twig/Tests/Fixtures/tags/autoescape/type.test index 10fd63f..4f41520 100644 --- a/test/Twig/Tests/Fixtures/tags/autoescape/type.test +++ b/test/Twig/Tests/Fixtures/tags/autoescape/type.test @@ -44,15 +44,15 @@ return array('msg' => "<>\n'\"") 1. autoescape 'html' |escape('js') - + 2. autoescape 'html' |escape('js') - + 3. autoescape 'js' |escape('js') - + 4. no escape @@ -61,9 +61,9 @@ return array('msg' => "<>\n'\"") 5. |escape('js')|escape('html') - + 6. autoescape 'html' |escape('js')|escape('html') - + diff --git a/test/Twig/Tests/escapingTest.php b/test/Twig/Tests/escapingTest.php new file mode 100644 index 0000000..608ef22 --- /dev/null +++ b/test/Twig/Tests/escapingTest.php @@ -0,0 +1,324 @@ + ''', + '"' => '"', + '<' => '<', + '>' => '>', + '&' => '&' + ); + + protected $htmlAttrSpecialChars = array( + '\'' => ''', + '"' => '"', + '<' => '<', + '>' => '>', + '&' => '&', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => 'Ā', + /* Immune chars excluded */ + ',' => ',', + '.' => '.', + '-' => '-', + '_' => '_', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => ' ', + "\n" => ' ', + "\t" => ' ', + "\0" => '�', // should use Unicode replacement char + /* Encode chars as named entities where possible */ + '˜' => '˜', + 'ÿ' => 'ÿ', + '¾' => '¾', + '♦' => '♦', + /* Encode spaces for quoteless attribute protection */ + ' ' => ' ', + ); + + protected $jsSpecialChars = array( + /* HTML special chars - escape without exception to hex */ + '<' => '\\x3C', + '>' => '\\x3E', + '\'' => '\\x27', + '"' => '\\x22', + '&' => '\\x26', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => '\\u0100', + /* Immune chars excluded */ + ',' => ',', + '.' => '.', + '_' => '_', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '\\x0D', + "\n" => '\\x0A', + "\t" => '\\x09', + "\0" => '\\x00', + /* Encode spaces for quoteless attribute protection */ + ' ' => '\\x20', + ); + + protected $urlSpecialChars = array( + /* HTML special chars - escape without exception to percent encoding */ + '<' => '%3C', + '>' => '%3E', + '\'' => '%27', + '"' => '%22', + '&' => '%26', + /* Characters beyond ASCII value 255 to hex sequence */ + 'Ā' => '%C4%80', + /* Punctuation and unreserved check */ + ',' => '%2C', + '.' => '.', + '_' => '_', + '-' => '-', + ':' => '%3A', + ';' => '%3B', + '!' => '%21', + /* Basic alnums excluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '%0D', + "\n" => '%0A', + "\t" => '%09', + "\0" => '%00', + /* PHP quirks from the past */ + ' ' => '%20', + '~' => '~', + '+' => '%2B', + ); + + protected $cssSpecialChars = array( + /* HTML special chars - escape without exception to hex */ + '<' => '\\3C ', + '>' => '\\3E ', + '\'' => '\\27 ', + '"' => '\\22 ', + '&' => '\\26 ', + /* Characters beyond ASCII value 255 to unicode escape */ + 'Ā' => '\\100 ', + /* Immune chars excluded */ + ',' => '\\2C ', + '.' => '\\2E ', + '_' => '\\5F ', + /* Basic alnums exluded */ + 'a' => 'a', + 'A' => 'A', + 'z' => 'z', + 'Z' => 'Z', + '0' => '0', + '9' => '9', + /* Basic control characters and null */ + "\r" => '\\D ', + "\n" => '\\A ', + "\t" => '\\9 ', + "\0" => '\\0 ', + /* Encode spaces for quoteless attribute protection */ + ' ' => '\\20 ', + ); + + protected $env; + + public function setUp() + { + $this->env = new Twig_Environment(); + } + + public function testHtmlEscapingConvertsSpecialChars() + { + foreach ($this->htmlSpecialChars as $key => $value) { + $this->assertEquals($value, twig_escape_filter($this->env, $key, 'html'), 'Failed to escape: '.$key); + } + } + + public function testHtmlAttributeEscapingConvertsSpecialChars() + { + foreach ($this->htmlAttrSpecialChars as $key => $value) { + $this->assertEquals($value, twig_escape_filter($this->env, $key, 'html_attr'), 'Failed to escape: '.$key); + } + } + + public function testJavascriptEscapingConvertsSpecialChars() + { + foreach ($this->jsSpecialChars as $key => $value) { + $this->assertEquals($value, twig_escape_filter($this->env, $key, 'js'), 'Failed to escape: '.$key); + } + } + + public function testJavascriptEscapingReturnsStringIfZeroLength() + { + $this->assertEquals('', twig_escape_filter($this->env, '', 'js')); + } + + public function testJavascriptEscapingReturnsStringIfContainsOnlyDigits() + { + $this->assertEquals('123', twig_escape_filter($this->env, '123', 'js')); + } + + public function testCssEscapingConvertsSpecialChars() + { + foreach ($this->cssSpecialChars as $key => $value) { + $this->assertEquals($value, twig_escape_filter($this->env, $key, 'css'), 'Failed to escape: '.$key); + } + } + + public function testCssEscapingReturnsStringIfZeroLength() + { + $this->assertEquals('', twig_escape_filter($this->env, '', 'css')); + } + + public function testCssEscapingReturnsStringIfContainsOnlyDigits() + { + $this->assertEquals('123', twig_escape_filter($this->env, '123', 'css')); + } + + public function testUrlEscapingConvertsSpecialChars() + { + foreach ($this->urlSpecialChars as $key => $value) { + $this->assertEquals($value, twig_escape_filter($this->env, $key, 'url'), 'Failed to escape: '.$key); + } + } + + /** + * Range tests to confirm escaped range of characters is within OWASP recommendation + */ + + /** + * Only testing the first few 2 ranges on this prot. function as that's all these + * other range tests require + */ + public function testUnicodeCodepointConversionToUtf8() + { + $expected = " ~ޙ"; + $codepoints = array(0x20, 0x7e, 0x799); + $result = ''; + foreach ($codepoints as $value) { + $result .= $this->codepointToUtf8($value); + } + $this->assertEquals($expected, $result); + } + + /** + * Convert a Unicode Codepoint to a literal UTF-8 character. + * + * @param int Unicode codepoint in hex notation + * @return string UTF-8 literal string + */ + protected function codepointToUtf8($codepoint) + { + if ($codepoint < 0x80) { + return chr($codepoint); + } + if ($codepoint < 0x800) { + return chr($codepoint >> 6 & 0x3f | 0xc0) + . chr($codepoint & 0x3f | 0x80); + } + if ($codepoint < 0x10000) { + return chr($codepoint >> 12 & 0x0f | 0xe0) + . chr($codepoint >> 6 & 0x3f | 0x80) + . chr($codepoint & 0x3f | 0x80); + } + if ($codepoint < 0x110000) { + return chr($codepoint >> 18 & 0x07 | 0xf0) + . chr($codepoint >> 12 & 0x3f | 0x80) + . chr($codepoint >> 6 & 0x3f | 0x80) + . chr($codepoint & 0x3f | 0x80); + } + throw new \Exception('Codepoint requested outside of Unicode range'); + } + + public function testJavascriptEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(',', '.', '_'); // Exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, twig_escape_filter($this->env, $literal, 'js')); + } else { + $literal = $this->codepointToUtf8($chr); + if (in_array($literal, $immune)) { + $this->assertEquals($literal, twig_escape_filter($this->env, $literal, 'js')); + } else { + $this->assertNotEquals( + $literal, + twig_escape_filter($this->env, $literal, 'js'), + "$literal should be escaped!"); + } + } + } + } + + public function testHtmlAttributeEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(',', '.', '-', '_'); // Exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, twig_escape_filter($this->env, $literal, 'html_attr')); + } else { + $literal = $this->codepointToUtf8($chr); + if (in_array($literal, $immune)) { + $this->assertEquals($literal, twig_escape_filter($this->env, $literal, 'html_attr')); + } else { + $this->assertNotEquals( + $literal, + twig_escape_filter($this->env, $literal, 'html_attr'), + "$literal should be escaped!"); + } + } + } + } + + public function testCssEscapingEscapesOwaspRecommendedRanges() + { + $immune = array(); // CSS has no exceptions to escaping ranges + for ($chr=0; $chr < 0xFF; $chr++) { + if ($chr >= 0x30 && $chr <= 0x39 + || $chr >= 0x41 && $chr <= 0x5A + || $chr >= 0x61 && $chr <= 0x7A) { + $literal = $this->codepointToUtf8($chr); + $this->assertEquals($literal, twig_escape_filter($this->env, $literal, 'css')); + } else { + $literal = $this->codepointToUtf8($chr); + $this->assertNotEquals( + $literal, + twig_escape_filter($this->env, $literal, 'css'), + "$literal should be escaped!"); + } + } + } +} -- 1.7.2.5