/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
#include "config.h"
#include "Lexer.h"
-#include "JSFunction.h"
+#include "JSFunctionInlines.h"
+#include "BuiltinNames.h"
#include "JSGlobalObjectFunctions.h"
#include "Identifier.h"
#include "NodeInfo.h"
#include "Nodes.h"
+#include "JSCInlines.h"
#include <wtf/dtoa.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
-using namespace WTF;
-using namespace Unicode;
-
#include "KeywordLookup.h"
#include "Lexer.lut.h"
#include "Parser.h"
namespace JSC {
-Keywords::Keywords(JSGlobalData* globalData)
- : m_globalData(globalData)
+Keywords::Keywords(VM& vm)
+ : m_vm(vm)
, m_keywordTable(JSC::mainTable)
{
}
// Other types (only one so far)
CharacterWhiteSpace,
+ CharacterPrivateIdentifierStart
};
// 256 Latin-1 codes
/* 61 - = */ CharacterEqual,
/* 62 - > */ CharacterGreater,
/* 63 - ? */ CharacterQuestion,
-/* 64 - @ */ CharacterInvalid,
+/* 64 - @ */ CharacterPrivateIdentifierStart,
/* 65 - A */ CharacterIdentifierStart,
/* 66 - B */ CharacterIdentifierStart,
/* 67 - C */ CharacterIdentifierStart,
/* 255 - Ll category */ CharacterIdentifierStart
};
+// This table provides the character that results from \X where X is the index in the table beginning
+// with SPACE. A table value of 0 means that more processing needs to be done.
+static const LChar singleCharacterEscapeValuesForASCII[128] = {
+/* 0 - Null */ 0,
+/* 1 - Start of Heading */ 0,
+/* 2 - Start of Text */ 0,
+/* 3 - End of Text */ 0,
+/* 4 - End of Transm. */ 0,
+/* 5 - Enquiry */ 0,
+/* 6 - Acknowledgment */ 0,
+/* 7 - Bell */ 0,
+/* 8 - Back Space */ 0,
+/* 9 - Horizontal Tab */ 0,
+/* 10 - Line Feed */ 0,
+/* 11 - Vertical Tab */ 0,
+/* 12 - Form Feed */ 0,
+/* 13 - Carriage Return */ 0,
+/* 14 - Shift Out */ 0,
+/* 15 - Shift In */ 0,
+/* 16 - Data Line Escape */ 0,
+/* 17 - Device Control 1 */ 0,
+/* 18 - Device Control 2 */ 0,
+/* 19 - Device Control 3 */ 0,
+/* 20 - Device Control 4 */ 0,
+/* 21 - Negative Ack. */ 0,
+/* 22 - Synchronous Idle */ 0,
+/* 23 - End of Transmit */ 0,
+/* 24 - Cancel */ 0,
+/* 25 - End of Medium */ 0,
+/* 26 - Substitute */ 0,
+/* 27 - Escape */ 0,
+/* 28 - File Separator */ 0,
+/* 29 - Group Separator */ 0,
+/* 30 - Record Separator */ 0,
+/* 31 - Unit Separator */ 0,
+/* 32 - Space */ ' ',
+/* 33 - ! */ '!',
+/* 34 - " */ '"',
+/* 35 - # */ '#',
+/* 36 - $ */ '$',
+/* 37 - % */ '%',
+/* 38 - & */ '&',
+/* 39 - ' */ '\'',
+/* 40 - ( */ '(',
+/* 41 - ) */ ')',
+/* 42 - * */ '*',
+/* 43 - + */ '+',
+/* 44 - , */ ',',
+/* 45 - - */ '-',
+/* 46 - . */ '.',
+/* 47 - / */ '/',
+/* 48 - 0 */ 0,
+/* 49 - 1 */ 0,
+/* 50 - 2 */ 0,
+/* 51 - 3 */ 0,
+/* 52 - 4 */ 0,
+/* 53 - 5 */ 0,
+/* 54 - 6 */ 0,
+/* 55 - 7 */ 0,
+/* 56 - 8 */ 0,
+/* 57 - 9 */ 0,
+/* 58 - : */ ':',
+/* 59 - ; */ ';',
+/* 60 - < */ '<',
+/* 61 - = */ '=',
+/* 62 - > */ '>',
+/* 63 - ? */ '?',
+/* 64 - @ */ '@',
+/* 65 - A */ 'A',
+/* 66 - B */ 'B',
+/* 67 - C */ 'C',
+/* 68 - D */ 'D',
+/* 69 - E */ 'E',
+/* 70 - F */ 'F',
+/* 71 - G */ 'G',
+/* 72 - H */ 'H',
+/* 73 - I */ 'I',
+/* 74 - J */ 'J',
+/* 75 - K */ 'K',
+/* 76 - L */ 'L',
+/* 77 - M */ 'M',
+/* 78 - N */ 'N',
+/* 79 - O */ 'O',
+/* 80 - P */ 'P',
+/* 81 - Q */ 'Q',
+/* 82 - R */ 'R',
+/* 83 - S */ 'S',
+/* 84 - T */ 'T',
+/* 85 - U */ 'U',
+/* 86 - V */ 'V',
+/* 87 - W */ 'W',
+/* 88 - X */ 'X',
+/* 89 - Y */ 'Y',
+/* 90 - Z */ 'Z',
+/* 91 - [ */ '[',
+/* 92 - \ */ '\\',
+/* 93 - ] */ ']',
+/* 94 - ^ */ '^',
+/* 95 - _ */ '_',
+/* 96 - ` */ '`',
+/* 97 - a */ 'a',
+/* 98 - b */ 0x08,
+/* 99 - c */ 'c',
+/* 100 - d */ 'd',
+/* 101 - e */ 'e',
+/* 102 - f */ 0x0C,
+/* 103 - g */ 'g',
+/* 104 - h */ 'h',
+/* 105 - i */ 'i',
+/* 106 - j */ 'j',
+/* 107 - k */ 'k',
+/* 108 - l */ 'l',
+/* 109 - m */ 'm',
+/* 110 - n */ 0x0A,
+/* 111 - o */ 'o',
+/* 112 - p */ 'p',
+/* 113 - q */ 'q',
+/* 114 - r */ 0x0D,
+/* 115 - s */ 's',
+/* 116 - t */ 0x09,
+/* 117 - u */ 0,
+/* 118 - v */ 0x0B,
+/* 119 - w */ 'w',
+/* 120 - x */ 0,
+/* 121 - y */ 'y',
+/* 122 - z */ 'z',
+/* 123 - { */ '{',
+/* 124 - | */ '|',
+/* 125 - } */ '}',
+/* 126 - ~ */ '~',
+/* 127 - Delete */ 0
+};
+
template <typename T>
-Lexer<T>::Lexer(JSGlobalData* globalData)
+Lexer<T>::Lexer(VM* vm, JSParserStrictness strictness)
: m_isReparsing(false)
- , m_globalData(globalData)
+ , m_vm(vm)
+ , m_parsingBuiltinFunction(strictness == JSParseBuiltin)
{
}
}
template <typename T>
-UString Lexer<T>::invalidCharacterMessage() const
+String Lexer<T>::invalidCharacterMessage() const
{
switch (m_current) {
case 0:
}
template <typename T>
-ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const
+ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
{
ASSERT(m_code <= m_codeEnd);
return m_code;
m_lineNumber = source.firstLine();
m_lastToken = -1;
- const StringImpl* sourceString = source.provider()->data();
+ const String& sourceString = source.provider()->source();
- if (sourceString)
- setCodeStart(sourceString);
+ if (!sourceString.isNull())
+ setCodeStart(sourceString.impl());
else
m_codeStart = 0;
m_source = &source;
- m_code = m_codeStart + source.startOffset();
+ m_sourceOffset = source.startOffset();
+ m_codeStartPlusOffset = m_codeStart + source.startOffset();
+ m_code = m_codeStartPlusOffset;
m_codeEnd = m_codeStart + source.endOffset();
m_error = false;
m_atLineStart = true;
- m_lexErrorMessage = UString();
+ m_lineStart = m_code;
+ m_lexErrorMessage = String();
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
{
m_code += shiftAmount;
+ ASSERT(currentOffset() >= currentLineStartOffset());
m_current = *m_code;
}
}
template <typename T>
-int Lexer<T>::parseFourDigitUnicodeHex()
+typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
{
T char1 = peek(1);
T char2 = peek(2);
T char3 = peek(3);
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
- return -1;
+ return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
int result = convertUnicode(m_current, char1, char2, char3);
shift();
shift();
shift();
shift();
- return result;
+ return UnicodeHexValue(result);
}
template <typename T>
{
ASSERT(isLineTerminator(m_current));
+ m_positionBeforeLastNewline = currentPosition();
T prev = m_current;
shift();
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}
-static NEVER_INLINE bool isNonLatin1IdentStart(int c)
+static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
{
- return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
+ return U_GET_GC_MASK(c) & U_GC_L_MASK;
}
static ALWAYS_INLINE bool isLatin1(LChar)
static NEVER_INLINE bool isNonLatin1IdentPart(int c)
{
- return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
- | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) || c == 0x200C || c == 0x200D;
+ return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
}
static ALWAYS_INLINE bool isIdentPart(LChar c)
return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
}
-static inline int singleEscape(int c)
+static inline LChar singleEscape(int c)
{
- switch (c) {
- case 'b':
- return 0x08;
- case 't':
- return 0x09;
- case 'n':
- return 0x0A;
- case 'v':
- return 0x0B;
- case 'f':
- return 0x0C;
- case 'r':
- return 0x0D;
- case '\\':
- return '\\';
- case '\'':
- return '\'';
- case '"':
- return '"';
- default:
- return 0;
+ if (c < 128) {
+ ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
+ return singleCharacterEscapeValuesForASCII[c];
}
+ return 0;
}
template <typename T>
ASSERT(c <= static_cast<int>(USHRT_MAX));
m_buffer16.append(static_cast<UChar>(c));
}
-
+
+#if !ASSERT_DISABLED
+bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
+{
+ if (!ident)
+ return true;
+ /* Just block any use of suspicious identifiers. This is intended to
+ * be used as a safety net while implementing builtins.
+ */
+ if (*ident == vm.propertyNames->builtinNames().callPublicName())
+ return false;
+ if (*ident == vm.propertyNames->builtinNames().applyPublicName())
+ return false;
+ if (*ident == vm.propertyNames->eval)
+ return false;
+ if (*ident == vm.propertyNames->Function)
+ return false;
+ return true;
+}
+#endif
+
template <>
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
-
- const LChar* identifierStart = currentCharacter();
+
+ bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+ if (isPrivateName)
+ shift();
+
+ const LChar* identifierStart = currentSourcePtr();
+ unsigned identifierLineStart = currentLineStartOffset();
while (isIdentPart(m_current))
shift();
if (UNLIKELY(m_current == '\\')) {
- setOffsetFromCharOffset(identifierStart);
+ setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
const Identifier* ident = 0;
- if (shouldCreateIdentifier) {
- int identifierLength = currentCharacter() - identifierStart;
+ if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+ int identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
-
+ if (m_parsingBuiltinFunction) {
+ if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+ m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+ return ERRORTOK;
+ }
+ if (isPrivateName)
+ ident = m_vm->propertyNames->getPrivateName(*ident);
+ else if (*ident == m_vm->propertyNames->undefinedKeyword)
+ tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+ if (!ident)
+ return INVALID_PRIVATE_NAME_ERRORTOK;
+ }
tokenData->ident = ident;
} else
tokenData->ident = 0;
- if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
+ if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
- const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
+
+ bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+ if (isPrivateName)
+ shift();
- const UChar* identifierStart = currentCharacter();
+ const UChar* identifierStart = currentSourcePtr();
+ int identifierLineStart = currentLineStartOffset();
UChar orAllChars = 0;
}
if (UNLIKELY(m_current == '\\')) {
- setOffsetFromCharOffset(identifierStart);
+ ASSERT(!isPrivateName);
+ setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
const Identifier* ident = 0;
- if (shouldCreateIdentifier) {
- int identifierLength = currentCharacter() - identifierStart;
+ if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+ int identifierLength = currentSourcePtr() - identifierStart;
if (isAll8Bit)
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
else
ident = makeIdentifier(identifierStart, identifierLength);
-
+ if (m_parsingBuiltinFunction) {
+ if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+ m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+ return ERRORTOK;
+ }
+ if (isPrivateName)
+ ident = m_vm->propertyNames->getPrivateName(*ident);
+ else if (*ident == m_vm->propertyNames->undefinedKeyword)
+ tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+ if (!ident)
+ return INVALID_PRIVATE_NAME_ERRORTOK;
+ }
tokenData->ident = ident;
} else
tokenData->ident = 0;
- if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
+ if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
- const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
const ptrdiff_t remaining = m_codeEnd - m_code;
- const T* identifierStart = currentCharacter();
+ const T* identifierStart = currentSourcePtr();
bool bufferRequired = false;
while (true) {
// \uXXXX unicode characters.
bufferRequired = true;
- if (identifierStart != currentCharacter())
- m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+ if (identifierStart != currentSourcePtr())
+ m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
shift();
if (UNLIKELY(m_current != 'u'))
- return ERRORTOK;
+ return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
shift();
- int character = parseFourDigitUnicodeHex();
- if (UNLIKELY(character == -1))
- return ERRORTOK;
- UChar ucharacter = static_cast<UChar>(character);
+ UnicodeHexValue character = parseFourDigitUnicodeHex();
+ if (UNLIKELY(!character.isValid()))
+ return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+ UChar ucharacter = static_cast<UChar>(character.value());
if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
- return ERRORTOK;
+ return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
if (shouldCreateIdentifier)
record16(ucharacter);
- identifierStart = currentCharacter();
+ identifierStart = currentSourcePtr();
}
int identifierLength;
const Identifier* ident = 0;
if (shouldCreateIdentifier) {
if (!bufferRequired) {
- identifierLength = currentCharacter() - identifierStart;
+ identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
} else {
- if (identifierStart != currentCharacter())
- m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+ if (identifierStart != currentSourcePtr())
+ m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
}
ASSERT(shouldCreateIdentifier);
// Keywords must not be recognized if there was an \uXXXX in the identifier.
if (remaining < maxTokenLength) {
- const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
}
template <typename T>
-template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
+template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
{
int startingOffset = currentOffset();
+ int startingLineStartOffset = currentLineStartOffset();
int startingLineNumber = lineNumber();
T stringQuoteCharacter = m_current;
shift();
- const T* stringStart = currentCharacter();
+ const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
- if (stringStart != currentCharacter() && shouldBuildStrings)
- append8(stringStart, currentCharacter() - stringStart);
+ if (stringStart != currentSourcePtr() && shouldBuildStrings)
+ append8(stringStart, currentSourcePtr() - stringStart);
shift();
- int escape = singleEscape(m_current);
+ LChar escape = singleEscape(m_current);
// Most common escape sequences first
if (escape) {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
- return false;
+ return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
}
T prev = m_current;
shift();
record8(convertHex(prev, m_current));
shift();
} else {
- setOffset(startingOffset);
+ setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.resize(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
}
- stringStart = currentCharacter();
+ stringStart = currentSourcePtr();
continue;
}
if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
- setOffset(startingOffset);
+ setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.resize(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
shift();
}
- if (currentCharacter() != stringStart && shouldBuildStrings)
- append8(stringStart, currentCharacter() - stringStart);
+ if (currentSourcePtr() != stringStart && shouldBuildStrings)
+ append8(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings) {
tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
m_buffer8.resize(0);
} else
tokenData->ident = 0;
- return true;
+ return StringParsedSuccessfully;
}
template <typename T>
-template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
+template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
{
T stringQuoteCharacter = m_current;
shift();
- const T* stringStart = currentCharacter();
+ const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
- if (stringStart != currentCharacter() && shouldBuildStrings)
- append16(stringStart, currentCharacter() - stringStart);
+ if (stringStart != currentSourcePtr() && shouldBuildStrings)
+ append16(stringStart, currentSourcePtr() - stringStart);
shift();
- int escape = singleEscape(m_current);
+ LChar escape = singleEscape(m_current);
// Most common escape sequences first
if (escape) {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
- return false;
+ return StringCannotBeParsed;
}
T prev = m_current;
shift();
shift();
} else if (m_current == 'u') {
shift();
- int character = parseFourDigitUnicodeHex();
- if (character != -1) {
+ UnicodeHexValue character = parseFourDigitUnicodeHex();
+ if (character.isValid()) {
if (shouldBuildStrings)
- record16(character);
+ record16(character.value());
} else if (m_current == stringQuoteCharacter) {
if (shouldBuildStrings)
record16('u');
} else {
m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
- return false;
+ return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
}
} else if (strictMode && isASCIIDigit(m_current)) {
// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
shift();
if (character1 != '0' || isASCIIDigit(m_current)) {
m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
- return false;
+ return StringCannotBeParsed;
}
if (shouldBuildStrings)
record16(0);
shift();
} else {
m_lexErrorMessage = "Unterminated string constant";
- return false;
+ return StringUnterminated;
}
- stringStart = currentCharacter();
+ stringStart = currentSourcePtr();
continue;
}
// Fast check for characters that require special handling.
// New-line or end of input is not allowed
if (atEnd() || isLineTerminator(m_current)) {
m_lexErrorMessage = "Unexpected EOF";
- return false;
+ return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
// Anything else is just a normal character
}
shift();
}
- if (currentCharacter() != stringStart && shouldBuildStrings)
- append16(stringStart, currentCharacter() - stringStart);
+ if (currentSourcePtr() != stringStart && shouldBuildStrings)
+ append16(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings)
tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
else
tokenData->ident = 0;
m_buffer16.resize(0);
- return true;
+ return StringParsedSuccessfully;
}
template <typename T>
}
template <typename T>
-JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
+JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
{
+ JSTokenData* tokenData = &tokenRecord->m_data;
+ JSTokenLocation* tokenLocation = &tokenRecord->m_location;
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
ASSERT(m_buffer16.isEmpty());
if (atEnd())
return EOFTOK;
- tokenInfo->startOffset = currentOffset();
+ tokenLocation->startOffset = currentOffset();
+ ASSERT(currentOffset() >= currentLineStartOffset());
+ tokenRecord->m_startPosition = currentPosition();
CharacterType type;
if (LIKELY(isLatin1(m_current)))
if (parseMultilineComment())
goto start;
m_lexErrorMessage = "Multiline comment was not closed properly";
+ token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
goto returnError;
}
if (m_current == '=') {
token = SEMICOLON;
break;
case CharacterOpenBrace:
- tokenData->intValue = currentOffset();
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = OPENBRACE;
break;
case CharacterCloseBrace:
- tokenData->intValue = currentOffset();
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = CLOSEBRACE;
break;
case CharacterDot:
shift();
if (!isASCIIDigit(m_current)) {
+ if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
+ shift();
+ shift();
+ token = DOTDOTDOT;
+ break;
+ }
token = DOT;
break;
}
goto inNumberAfterDecimalPoint;
case CharacterZero:
shift();
- if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
+ if ((m_current | 0x20) == 'x') {
+ if (!isASCIIHexDigit(peek(1))) {
+ m_lexErrorMessage = "No hexadecimal digits after '0x'";
+ token = INVALID_HEX_NUMBER_ERRORTOK;
+ goto returnError;
+ }
parseHex(tokenData->doubleValue);
+ if (isIdentStart(m_current)) {
+ m_lexErrorMessage = "No space between hexadecimal literal and identifier";
+ token = INVALID_HEX_NUMBER_ERRORTOK;
+ goto returnError;
+ }
token = NUMBER;
- } else {
- record8('0');
- if (isASCIIOctalDigit(m_current)) {
- if (parseOctal(tokenData->doubleValue)) {
- if (strictMode) {
- m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
- goto returnError;
- }
- token = NUMBER;
+ m_buffer8.resize(0);
+ break;
+ }
+
+ record8('0');
+ if (isASCIIOctalDigit(m_current)) {
+ if (parseOctal(tokenData->doubleValue)) {
+ if (strictMode) {
+ m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
+ token = INVALID_OCTAL_NUMBER_ERRORTOK;
+ goto returnError;
}
+ token = NUMBER;
}
}
- // Fall through into CharacterNumber
+ FALLTHROUGH;
case CharacterNumber:
if (LIKELY(token != NUMBER)) {
if (!parseDecimal(tokenData->doubleValue)) {
if ((m_current | 0x20) == 'e') {
if (!parseNumberAfterExponentIndicator()) {
m_lexErrorMessage = "Non-number found after exponent indicator";
+ token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
}
// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
if (UNLIKELY(isIdentStart(m_current))) {
m_lexErrorMessage = "At least one digit must occur after a decimal point";
+ token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
m_buffer8.resize(0);
break;
case CharacterQuote:
if (lexerFlags & LexerFlagsDontBuildStrings) {
- if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
+ StringParseResult result = parseString<false>(tokenData, strictMode);
+ if (UNLIKELY(result != StringParsedSuccessfully)) {
+ token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
goto returnError;
+ }
} else {
- if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
+ StringParseResult result = parseString<true>(tokenData, strictMode);
+ if (UNLIKELY(result != StringParsedSuccessfully)) {
+ token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
goto returnError;
+ }
}
shift();
token = STRING;
break;
case CharacterIdentifierStart:
ASSERT(isIdentStart(m_current));
- // Fall through into CharacterBackSlash.
+ FALLTHROUGH;
case CharacterBackSlash:
+ parseIdent:
if (lexerFlags & LexexFlagsDontBuildKeywords)
token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
else
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
+ m_lineStart = m_code;
goto start;
+ case CharacterPrivateIdentifierStart:
+ if (m_parsingBuiltinFunction)
+ goto parseIdent;
+
+ FALLTHROUGH;
case CharacterInvalid:
m_lexErrorMessage = invalidCharacterMessage();
+ token = ERRORTOK;
goto returnError;
default:
- ASSERT_NOT_REACHED();
+ RELEASE_ASSERT_NOT_REACHED();
m_lexErrorMessage = "Internal Error";
+ token = ERRORTOK;
goto returnError;
}
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
+ m_lineStart = m_code;
if (!lastTokenWasRestrKeyword())
goto start;
// Fall through into returnToken.
returnToken:
- tokenInfo->line = m_lineNumber;
- tokenInfo->endOffset = currentOffset();
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->endOffset = currentOffset();
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_endPosition = currentPosition();
m_lastToken = token;
return token;
returnError:
m_error = true;
- tokenInfo->line = m_lineNumber;
- tokenInfo->endOffset = currentOffset();
- return ERRORTOK;
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->endOffset = currentOffset();
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_endPosition = currentPosition();
+ RELEASE_ASSERT(token & ErrorTokenFlag);
+ return token;
+}
+
+template <typename T>
+static inline void orCharacter(UChar&, UChar);
+
+template <>
+inline void orCharacter<LChar>(UChar&, UChar) { }
+
+template <>
+inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
+{
+ orAccumulator |= character;
}
template <typename T>
bool lastWasEscape = false;
bool inBrackets = false;
+ UChar charactersOredTogether = 0;
if (patternPrefix) {
ASSERT(!isLineTerminator(patternPrefix));
break;
record16(prev);
+ orCharacter<T>(charactersOredTogether, prev);
if (lastWasEscape) {
lastWasEscape = false;
}
}
- pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
+
m_buffer16.resize(0);
+ charactersOredTogether = 0;
while (isIdentPart(m_current)) {
record16(m_current);
+ orCharacter<T>(charactersOredTogether, m_current);
shift();
}
- flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
m_buffer16.resize(0);
return true;
m_isReparsing = false;
}
-template <typename T>
-SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine)
-{
- ASSERT((*m_source->provider()->data())[openBrace] == '{');
- ASSERT((*m_source->provider()->data())[closeBrace] == '}');
- return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
-}
-
// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
template class Lexer<LChar>;
template class Lexer<UChar>;