From 731139b512db04dcb52db4ef3c4c1ad51007e2c7 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Tue, 11 Dec 2012 00:25:55 +0100 Subject: [PATCH] Parse identifiers according to spec If a unicode escape sequence decodes to a char that is not allowed in the identifier we need to throw a parse error. So decode before checking whether the char is valid. Also fix the set of accepted unicode characters for the start and in the middle of an identifier to what the spec demands. Change-Id: I35075b0587ef4e4edb745750cec6d5c764631c0d Reviewed-by: Erik Verbruggen --- src/qml/qml/parser/qqmljslexer.cpp | 125 +++++++++++++++++++++++++++--------- 1 files changed, 94 insertions(+), 31 deletions(-) diff --git a/src/qml/qml/parser/qqmljslexer.cpp b/src/qml/qml/parser/qqmljslexer.cpp index 0142e94..1270fdb 100644 --- a/src/qml/qml/parser/qqmljslexer.cpp +++ b/src/qml/qml/parser/qqmljslexer.cpp @@ -275,6 +275,59 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok) return QChar(); } +static inline bool isIdentifierStart(QChar ch) +{ + // fast path for ascii + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || + (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || + ch == '$' || ch == '_') + return true; + + switch (ch.category()) { + case QChar::Number_Letter: + case QChar::Letter_Uppercase: + case QChar::Letter_Lowercase: + case QChar::Letter_Titlecase: + case QChar::Letter_Modifier: + case QChar::Letter_Other: + return true; + default: + break; + } + return false; +} + +static bool isIdentifierPart(QChar ch) +{ + // fast path for ascii + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || + (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || + (ch.unicode() >= '0' && ch.unicode() <= '9') || + ch == '$' || ch == '_' || + ch.unicode() == 0x200c /* ZWNJ */ || ch.unicode() == 0x200d /* ZWJ */) + return true; + + switch (ch.category()) { + case QChar::Mark_NonSpacing: + case QChar::Mark_SpacingCombining: + + case QChar::Number_DecimalDigit: + case QChar::Number_Letter: + + case QChar::Letter_Uppercase: + case QChar::Letter_Lowercase: + case QChar::Letter_Titlecase: + case QChar::Letter_Modifier: + case QChar::Letter_Other: + + case QChar::Punctuation_Connector: + return true; + default: + break; + } + return false; +} + int Lexer::scanToken() { if (_stackToken != -1) { @@ -697,28 +750,28 @@ again: case '9': return scanNumber(ch); - default: - if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) { - bool identifierWithEscapeChars = false; - if (ch == QLatin1Char('\\')) { - identifierWithEscapeChars = true; + default: { + QChar c = ch; + bool identifierWithEscapeChars = false; + if (c == QLatin1Char('\\') && _char == QLatin1Char('u')) { + identifierWithEscapeChars = true; + bool ok = false; + c = decodeUnicodeEscapeCharacter(&ok); + if (! ok) { + _errorCode = IllegalUnicodeEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); + return T_ERROR; + } + } + if (isIdentifierStart(c)) { + if (identifierWithEscapeChars) { _tokenText.resize(0); - bool ok = false; - _tokenText += decodeUnicodeEscapeCharacter(&ok); + _tokenText += c; _validTokenText = true; - if (! ok) { - _errorCode = IllegalUnicodeEscapeSequence; - _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); - return T_ERROR; - } } while (true) { - if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) { - if (identifierWithEscapeChars) - _tokenText += _char; - - scanChar(); - } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) { + c = _char; + if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) { if (! identifierWithEscapeChars) { identifierWithEscapeChars = true; _tokenText.resize(0); @@ -728,31 +781,41 @@ again: scanChar(); // skip '\\' bool ok = false; - _tokenText += decodeUnicodeEscapeCharacter(&ok); + c = decodeUnicodeEscapeCharacter(&ok); if (! ok) { _errorCode = IllegalUnicodeEscapeSequence; _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); return T_ERROR; } - } else { - _tokenLength = _codePtr - _tokenStartPtr - 1; + if (isIdentifierPart(c)) + _tokenText += c; + continue; + } else if (isIdentifierPart(c)) { + if (identifierWithEscapeChars) + _tokenText += c; + + scanChar(); + continue; + } - int kind = T_IDENTIFIER; + _tokenLength = _codePtr - _tokenStartPtr - 1; - if (! identifierWithEscapeChars) - kind = classify(_tokenStartPtr, _tokenLength, _qmlMode); + int kind = T_IDENTIFIER; - if (_engine) { - if (kind == T_IDENTIFIER && identifierWithEscapeChars) - _tokenSpell = _engine->newStringRef(_tokenText); - else - _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength); - } + if (! identifierWithEscapeChars) + kind = classify(_tokenStartPtr, _tokenLength, _qmlMode); - return kind; + if (_engine) { + if (kind == T_IDENTIFIER && identifierWithEscapeChars) + _tokenSpell = _engine->newStringRef(_tokenText); + else + _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength); } + + return kind; } } + } break; } -- 1.7.2.5