/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
+ * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
+ * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
#include "config.h"
#include "Lexer.h"
-#include "JSFunction.h"
+#include "JSFunctionInlines.h"
+
+#include "BuiltinNames.h"
#include "JSGlobalObjectFunctions.h"
-#include "NodeInfo.h"
+#include "Identifier.h"
#include "Nodes.h"
-#include "dtoa.h"
+#include "JSCInlines.h"
+#include <wtf/dtoa.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
-using namespace WTF;
-using namespace Unicode;
-
-// We can't specify the namespace in yacc's C output, so do it here instead.
-using namespace JSC;
-
-#include "Grammar.h"
-#include "Lookup.h"
+#include "KeywordLookup.h"
#include "Lexer.lut.h"
+#include "Parser.h"
namespace JSC {
-static const UChar byteOrderMark = 0xFEFF;
+Keywords::Keywords(VM& vm)
+ : m_vm(vm)
+ , m_keywordTable(JSC::mainTable)
+{
+}
-Lexer::Lexer(JSGlobalData* globalData)
+enum CharacterType {
+ // Types for the main switch
+
+ // The first three types are fixed, and also used for identifying
+ // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
+ CharacterIdentifierStart,
+ CharacterZero,
+ CharacterNumber,
+
+ CharacterInvalid,
+ CharacterLineTerminator,
+ CharacterExclamationMark,
+ CharacterOpenParen,
+ CharacterCloseParen,
+ CharacterOpenBracket,
+ CharacterCloseBracket,
+ CharacterComma,
+ CharacterColon,
+ CharacterQuestion,
+ CharacterTilde,
+ CharacterQuote,
+ CharacterBackQuote,
+ CharacterDot,
+ CharacterSlash,
+ CharacterBackSlash,
+ CharacterSemicolon,
+ CharacterOpenBrace,
+ CharacterCloseBrace,
+
+ CharacterAdd,
+ CharacterSub,
+ CharacterMultiply,
+ CharacterModulo,
+ CharacterAnd,
+ CharacterXor,
+ CharacterOr,
+ CharacterLess,
+ CharacterGreater,
+ CharacterEqual,
+
+ // Other types (only one so far)
+ CharacterWhiteSpace,
+ CharacterPrivateIdentifierStart
+};
+
+// 256 Latin-1 codes
+static const unsigned short typesOfLatin1Characters[256] = {
+/* 0 - Null */ CharacterInvalid,
+/* 1 - Start of Heading */ CharacterInvalid,
+/* 2 - Start of Text */ CharacterInvalid,
+/* 3 - End of Text */ CharacterInvalid,
+/* 4 - End of Transm. */ CharacterInvalid,
+/* 5 - Enquiry */ CharacterInvalid,
+/* 6 - Acknowledgment */ CharacterInvalid,
+/* 7 - Bell */ CharacterInvalid,
+/* 8 - Back Space */ CharacterInvalid,
+/* 9 - Horizontal Tab */ CharacterWhiteSpace,
+/* 10 - Line Feed */ CharacterLineTerminator,
+/* 11 - Vertical Tab */ CharacterWhiteSpace,
+/* 12 - Form Feed */ CharacterWhiteSpace,
+/* 13 - Carriage Return */ CharacterLineTerminator,
+/* 14 - Shift Out */ CharacterInvalid,
+/* 15 - Shift In */ CharacterInvalid,
+/* 16 - Data Line Escape */ CharacterInvalid,
+/* 17 - Device Control 1 */ CharacterInvalid,
+/* 18 - Device Control 2 */ CharacterInvalid,
+/* 19 - Device Control 3 */ CharacterInvalid,
+/* 20 - Device Control 4 */ CharacterInvalid,
+/* 21 - Negative Ack. */ CharacterInvalid,
+/* 22 - Synchronous Idle */ CharacterInvalid,
+/* 23 - End of Transmit */ CharacterInvalid,
+/* 24 - Cancel */ CharacterInvalid,
+/* 25 - End of Medium */ CharacterInvalid,
+/* 26 - Substitute */ CharacterInvalid,
+/* 27 - Escape */ CharacterInvalid,
+/* 28 - File Separator */ CharacterInvalid,
+/* 29 - Group Separator */ CharacterInvalid,
+/* 30 - Record Separator */ CharacterInvalid,
+/* 31 - Unit Separator */ CharacterInvalid,
+/* 32 - Space */ CharacterWhiteSpace,
+/* 33 - ! */ CharacterExclamationMark,
+/* 34 - " */ CharacterQuote,
+/* 35 - # */ CharacterInvalid,
+/* 36 - $ */ CharacterIdentifierStart,
+/* 37 - % */ CharacterModulo,
+/* 38 - & */ CharacterAnd,
+/* 39 - ' */ CharacterQuote,
+/* 40 - ( */ CharacterOpenParen,
+/* 41 - ) */ CharacterCloseParen,
+/* 42 - * */ CharacterMultiply,
+/* 43 - + */ CharacterAdd,
+/* 44 - , */ CharacterComma,
+/* 45 - - */ CharacterSub,
+/* 46 - . */ CharacterDot,
+/* 47 - / */ CharacterSlash,
+/* 48 - 0 */ CharacterZero,
+/* 49 - 1 */ CharacterNumber,
+/* 50 - 2 */ CharacterNumber,
+/* 51 - 3 */ CharacterNumber,
+/* 52 - 4 */ CharacterNumber,
+/* 53 - 5 */ CharacterNumber,
+/* 54 - 6 */ CharacterNumber,
+/* 55 - 7 */ CharacterNumber,
+/* 56 - 8 */ CharacterNumber,
+/* 57 - 9 */ CharacterNumber,
+/* 58 - : */ CharacterColon,
+/* 59 - ; */ CharacterSemicolon,
+/* 60 - < */ CharacterLess,
+/* 61 - = */ CharacterEqual,
+/* 62 - > */ CharacterGreater,
+/* 63 - ? */ CharacterQuestion,
+/* 64 - @ */ CharacterPrivateIdentifierStart,
+/* 65 - A */ CharacterIdentifierStart,
+/* 66 - B */ CharacterIdentifierStart,
+/* 67 - C */ CharacterIdentifierStart,
+/* 68 - D */ CharacterIdentifierStart,
+/* 69 - E */ CharacterIdentifierStart,
+/* 70 - F */ CharacterIdentifierStart,
+/* 71 - G */ CharacterIdentifierStart,
+/* 72 - H */ CharacterIdentifierStart,
+/* 73 - I */ CharacterIdentifierStart,
+/* 74 - J */ CharacterIdentifierStart,
+/* 75 - K */ CharacterIdentifierStart,
+/* 76 - L */ CharacterIdentifierStart,
+/* 77 - M */ CharacterIdentifierStart,
+/* 78 - N */ CharacterIdentifierStart,
+/* 79 - O */ CharacterIdentifierStart,
+/* 80 - P */ CharacterIdentifierStart,
+/* 81 - Q */ CharacterIdentifierStart,
+/* 82 - R */ CharacterIdentifierStart,
+/* 83 - S */ CharacterIdentifierStart,
+/* 84 - T */ CharacterIdentifierStart,
+/* 85 - U */ CharacterIdentifierStart,
+/* 86 - V */ CharacterIdentifierStart,
+/* 87 - W */ CharacterIdentifierStart,
+/* 88 - X */ CharacterIdentifierStart,
+/* 89 - Y */ CharacterIdentifierStart,
+/* 90 - Z */ CharacterIdentifierStart,
+/* 91 - [ */ CharacterOpenBracket,
+/* 92 - \ */ CharacterBackSlash,
+/* 93 - ] */ CharacterCloseBracket,
+/* 94 - ^ */ CharacterXor,
+/* 95 - _ */ CharacterIdentifierStart,
+#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
+/* 96 - ` */ CharacterBackQuote,
+#else
+/* 96 - ` */ CharacterInvalid,
+#endif
+/* 97 - a */ CharacterIdentifierStart,
+/* 98 - b */ CharacterIdentifierStart,
+/* 99 - c */ CharacterIdentifierStart,
+/* 100 - d */ CharacterIdentifierStart,
+/* 101 - e */ CharacterIdentifierStart,
+/* 102 - f */ CharacterIdentifierStart,
+/* 103 - g */ CharacterIdentifierStart,
+/* 104 - h */ CharacterIdentifierStart,
+/* 105 - i */ CharacterIdentifierStart,
+/* 106 - j */ CharacterIdentifierStart,
+/* 107 - k */ CharacterIdentifierStart,
+/* 108 - l */ CharacterIdentifierStart,
+/* 109 - m */ CharacterIdentifierStart,
+/* 110 - n */ CharacterIdentifierStart,
+/* 111 - o */ CharacterIdentifierStart,
+/* 112 - p */ CharacterIdentifierStart,
+/* 113 - q */ CharacterIdentifierStart,
+/* 114 - r */ CharacterIdentifierStart,
+/* 115 - s */ CharacterIdentifierStart,
+/* 116 - t */ CharacterIdentifierStart,
+/* 117 - u */ CharacterIdentifierStart,
+/* 118 - v */ CharacterIdentifierStart,
+/* 119 - w */ CharacterIdentifierStart,
+/* 120 - x */ CharacterIdentifierStart,
+/* 121 - y */ CharacterIdentifierStart,
+/* 122 - z */ CharacterIdentifierStart,
+/* 123 - { */ CharacterOpenBrace,
+/* 124 - | */ CharacterOr,
+/* 125 - } */ CharacterCloseBrace,
+/* 126 - ~ */ CharacterTilde,
+/* 127 - Delete */ CharacterInvalid,
+/* 128 - Cc category */ CharacterInvalid,
+/* 129 - Cc category */ CharacterInvalid,
+/* 130 - Cc category */ CharacterInvalid,
+/* 131 - Cc category */ CharacterInvalid,
+/* 132 - Cc category */ CharacterInvalid,
+/* 133 - Cc category */ CharacterInvalid,
+/* 134 - Cc category */ CharacterInvalid,
+/* 135 - Cc category */ CharacterInvalid,
+/* 136 - Cc category */ CharacterInvalid,
+/* 137 - Cc category */ CharacterInvalid,
+/* 138 - Cc category */ CharacterInvalid,
+/* 139 - Cc category */ CharacterInvalid,
+/* 140 - Cc category */ CharacterInvalid,
+/* 141 - Cc category */ CharacterInvalid,
+/* 142 - Cc category */ CharacterInvalid,
+/* 143 - Cc category */ CharacterInvalid,
+/* 144 - Cc category */ CharacterInvalid,
+/* 145 - Cc category */ CharacterInvalid,
+/* 146 - Cc category */ CharacterInvalid,
+/* 147 - Cc category */ CharacterInvalid,
+/* 148 - Cc category */ CharacterInvalid,
+/* 149 - Cc category */ CharacterInvalid,
+/* 150 - Cc category */ CharacterInvalid,
+/* 151 - Cc category */ CharacterInvalid,
+/* 152 - Cc category */ CharacterInvalid,
+/* 153 - Cc category */ CharacterInvalid,
+/* 154 - Cc category */ CharacterInvalid,
+/* 155 - Cc category */ CharacterInvalid,
+/* 156 - Cc category */ CharacterInvalid,
+/* 157 - Cc category */ CharacterInvalid,
+/* 158 - Cc category */ CharacterInvalid,
+/* 159 - Cc category */ CharacterInvalid,
+/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
+/* 161 - Po category */ CharacterInvalid,
+/* 162 - Sc category */ CharacterInvalid,
+/* 163 - Sc category */ CharacterInvalid,
+/* 164 - Sc category */ CharacterInvalid,
+/* 165 - Sc category */ CharacterInvalid,
+/* 166 - So category */ CharacterInvalid,
+/* 167 - So category */ CharacterInvalid,
+/* 168 - Sk category */ CharacterInvalid,
+/* 169 - So category */ CharacterInvalid,
+/* 170 - Ll category */ CharacterIdentifierStart,
+/* 171 - Pi category */ CharacterInvalid,
+/* 172 - Sm category */ CharacterInvalid,
+/* 173 - Cf category */ CharacterInvalid,
+/* 174 - So category */ CharacterInvalid,
+/* 175 - Sk category */ CharacterInvalid,
+/* 176 - So category */ CharacterInvalid,
+/* 177 - Sm category */ CharacterInvalid,
+/* 178 - No category */ CharacterInvalid,
+/* 179 - No category */ CharacterInvalid,
+/* 180 - Sk category */ CharacterInvalid,
+/* 181 - Ll category */ CharacterIdentifierStart,
+/* 182 - So category */ CharacterInvalid,
+/* 183 - Po category */ CharacterInvalid,
+/* 184 - Sk category */ CharacterInvalid,
+/* 185 - No category */ CharacterInvalid,
+/* 186 - Ll category */ CharacterIdentifierStart,
+/* 187 - Pf category */ CharacterInvalid,
+/* 188 - No category */ CharacterInvalid,
+/* 189 - No category */ CharacterInvalid,
+/* 190 - No category */ CharacterInvalid,
+/* 191 - Po category */ CharacterInvalid,
+/* 192 - Lu category */ CharacterIdentifierStart,
+/* 193 - Lu category */ CharacterIdentifierStart,
+/* 194 - Lu category */ CharacterIdentifierStart,
+/* 195 - Lu category */ CharacterIdentifierStart,
+/* 196 - Lu category */ CharacterIdentifierStart,
+/* 197 - Lu category */ CharacterIdentifierStart,
+/* 198 - Lu category */ CharacterIdentifierStart,
+/* 199 - Lu category */ CharacterIdentifierStart,
+/* 200 - Lu category */ CharacterIdentifierStart,
+/* 201 - Lu category */ CharacterIdentifierStart,
+/* 202 - Lu category */ CharacterIdentifierStart,
+/* 203 - Lu category */ CharacterIdentifierStart,
+/* 204 - Lu category */ CharacterIdentifierStart,
+/* 205 - Lu category */ CharacterIdentifierStart,
+/* 206 - Lu category */ CharacterIdentifierStart,
+/* 207 - Lu category */ CharacterIdentifierStart,
+/* 208 - Lu category */ CharacterIdentifierStart,
+/* 209 - Lu category */ CharacterIdentifierStart,
+/* 210 - Lu category */ CharacterIdentifierStart,
+/* 211 - Lu category */ CharacterIdentifierStart,
+/* 212 - Lu category */ CharacterIdentifierStart,
+/* 213 - Lu category */ CharacterIdentifierStart,
+/* 214 - Lu category */ CharacterIdentifierStart,
+/* 215 - Sm category */ CharacterInvalid,
+/* 216 - Lu category */ CharacterIdentifierStart,
+/* 217 - Lu category */ CharacterIdentifierStart,
+/* 218 - Lu category */ CharacterIdentifierStart,
+/* 219 - Lu category */ CharacterIdentifierStart,
+/* 220 - Lu category */ CharacterIdentifierStart,
+/* 221 - Lu category */ CharacterIdentifierStart,
+/* 222 - Lu category */ CharacterIdentifierStart,
+/* 223 - Ll category */ CharacterIdentifierStart,
+/* 224 - Ll category */ CharacterIdentifierStart,
+/* 225 - Ll category */ CharacterIdentifierStart,
+/* 226 - Ll category */ CharacterIdentifierStart,
+/* 227 - Ll category */ CharacterIdentifierStart,
+/* 228 - Ll category */ CharacterIdentifierStart,
+/* 229 - Ll category */ CharacterIdentifierStart,
+/* 230 - Ll category */ CharacterIdentifierStart,
+/* 231 - Ll category */ CharacterIdentifierStart,
+/* 232 - Ll category */ CharacterIdentifierStart,
+/* 233 - Ll category */ CharacterIdentifierStart,
+/* 234 - Ll category */ CharacterIdentifierStart,
+/* 235 - Ll category */ CharacterIdentifierStart,
+/* 236 - Ll category */ CharacterIdentifierStart,
+/* 237 - Ll category */ CharacterIdentifierStart,
+/* 238 - Ll category */ CharacterIdentifierStart,
+/* 239 - Ll category */ CharacterIdentifierStart,
+/* 240 - Ll category */ CharacterIdentifierStart,
+/* 241 - Ll category */ CharacterIdentifierStart,
+/* 242 - Ll category */ CharacterIdentifierStart,
+/* 243 - Ll category */ CharacterIdentifierStart,
+/* 244 - Ll category */ CharacterIdentifierStart,
+/* 245 - Ll category */ CharacterIdentifierStart,
+/* 246 - Ll category */ CharacterIdentifierStart,
+/* 247 - Sm category */ CharacterInvalid,
+/* 248 - Ll category */ CharacterIdentifierStart,
+/* 249 - Ll category */ CharacterIdentifierStart,
+/* 250 - Ll category */ CharacterIdentifierStart,
+/* 251 - Ll category */ CharacterIdentifierStart,
+/* 252 - Ll category */ CharacterIdentifierStart,
+/* 253 - Ll category */ CharacterIdentifierStart,
+/* 254 - Ll category */ CharacterIdentifierStart,
+/* 255 - Ll category */ CharacterIdentifierStart
+};
+
+// This table provides the character that results from \X where X is the index in the table beginning
+// with SPACE. A table value of 0 means that more processing needs to be done.
+static const LChar singleCharacterEscapeValuesForASCII[128] = {
+/* 0 - Null */ 0,
+/* 1 - Start of Heading */ 0,
+/* 2 - Start of Text */ 0,
+/* 3 - End of Text */ 0,
+/* 4 - End of Transm. */ 0,
+/* 5 - Enquiry */ 0,
+/* 6 - Acknowledgment */ 0,
+/* 7 - Bell */ 0,
+/* 8 - Back Space */ 0,
+/* 9 - Horizontal Tab */ 0,
+/* 10 - Line Feed */ 0,
+/* 11 - Vertical Tab */ 0,
+/* 12 - Form Feed */ 0,
+/* 13 - Carriage Return */ 0,
+/* 14 - Shift Out */ 0,
+/* 15 - Shift In */ 0,
+/* 16 - Data Line Escape */ 0,
+/* 17 - Device Control 1 */ 0,
+/* 18 - Device Control 2 */ 0,
+/* 19 - Device Control 3 */ 0,
+/* 20 - Device Control 4 */ 0,
+/* 21 - Negative Ack. */ 0,
+/* 22 - Synchronous Idle */ 0,
+/* 23 - End of Transmit */ 0,
+/* 24 - Cancel */ 0,
+/* 25 - End of Medium */ 0,
+/* 26 - Substitute */ 0,
+/* 27 - Escape */ 0,
+/* 28 - File Separator */ 0,
+/* 29 - Group Separator */ 0,
+/* 30 - Record Separator */ 0,
+/* 31 - Unit Separator */ 0,
+/* 32 - Space */ ' ',
+/* 33 - ! */ '!',
+/* 34 - " */ '"',
+/* 35 - # */ '#',
+/* 36 - $ */ '$',
+/* 37 - % */ '%',
+/* 38 - & */ '&',
+/* 39 - ' */ '\'',
+/* 40 - ( */ '(',
+/* 41 - ) */ ')',
+/* 42 - * */ '*',
+/* 43 - + */ '+',
+/* 44 - , */ ',',
+/* 45 - - */ '-',
+/* 46 - . */ '.',
+/* 47 - / */ '/',
+/* 48 - 0 */ 0,
+/* 49 - 1 */ 0,
+/* 50 - 2 */ 0,
+/* 51 - 3 */ 0,
+/* 52 - 4 */ 0,
+/* 53 - 5 */ 0,
+/* 54 - 6 */ 0,
+/* 55 - 7 */ 0,
+/* 56 - 8 */ 0,
+/* 57 - 9 */ 0,
+/* 58 - : */ ':',
+/* 59 - ; */ ';',
+/* 60 - < */ '<',
+/* 61 - = */ '=',
+/* 62 - > */ '>',
+/* 63 - ? */ '?',
+/* 64 - @ */ '@',
+/* 65 - A */ 'A',
+/* 66 - B */ 'B',
+/* 67 - C */ 'C',
+/* 68 - D */ 'D',
+/* 69 - E */ 'E',
+/* 70 - F */ 'F',
+/* 71 - G */ 'G',
+/* 72 - H */ 'H',
+/* 73 - I */ 'I',
+/* 74 - J */ 'J',
+/* 75 - K */ 'K',
+/* 76 - L */ 'L',
+/* 77 - M */ 'M',
+/* 78 - N */ 'N',
+/* 79 - O */ 'O',
+/* 80 - P */ 'P',
+/* 81 - Q */ 'Q',
+/* 82 - R */ 'R',
+/* 83 - S */ 'S',
+/* 84 - T */ 'T',
+/* 85 - U */ 'U',
+/* 86 - V */ 'V',
+/* 87 - W */ 'W',
+/* 88 - X */ 'X',
+/* 89 - Y */ 'Y',
+/* 90 - Z */ 'Z',
+/* 91 - [ */ '[',
+/* 92 - \ */ '\\',
+/* 93 - ] */ ']',
+/* 94 - ^ */ '^',
+/* 95 - _ */ '_',
+/* 96 - ` */ '`',
+/* 97 - a */ 'a',
+/* 98 - b */ 0x08,
+/* 99 - c */ 'c',
+/* 100 - d */ 'd',
+/* 101 - e */ 'e',
+/* 102 - f */ 0x0C,
+/* 103 - g */ 'g',
+/* 104 - h */ 'h',
+/* 105 - i */ 'i',
+/* 106 - j */ 'j',
+/* 107 - k */ 'k',
+/* 108 - l */ 'l',
+/* 109 - m */ 'm',
+/* 110 - n */ 0x0A,
+/* 111 - o */ 'o',
+/* 112 - p */ 'p',
+/* 113 - q */ 'q',
+/* 114 - r */ 0x0D,
+/* 115 - s */ 's',
+/* 116 - t */ 0x09,
+/* 117 - u */ 0,
+/* 118 - v */ 0x0B,
+/* 119 - w */ 'w',
+/* 120 - x */ 0,
+/* 121 - y */ 'y',
+/* 122 - z */ 'z',
+/* 123 - { */ '{',
+/* 124 - | */ '|',
+/* 125 - } */ '}',
+/* 126 - ~ */ '~',
+/* 127 - Delete */ 0
+};
+
+template <typename T>
+Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
: m_isReparsing(false)
- , m_globalData(globalData)
- , m_keywordTable(JSC::mainTable)
+ , m_vm(vm)
+ , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
{
- m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
- m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
}
-Lexer::~Lexer()
+static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
+{
+ if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
+ return INTEGER;
+ return DOUBLE;
+}
+
+template <typename T>
+Lexer<T>::~Lexer()
{
- m_keywordTable.deleteTable();
}
-inline const UChar* Lexer::currentCharacter() const
+template <typename T>
+String Lexer<T>::invalidCharacterMessage() const
{
- return m_code - 4;
+ switch (m_current) {
+ case 0:
+ return ASCIILiteral("Invalid character: '\\0'");
+ case 10:
+ return ASCIILiteral("Invalid character: '\\n'");
+ case 11:
+ return ASCIILiteral("Invalid character: '\\v'");
+ case 13:
+ return ASCIILiteral("Invalid character: '\\r'");
+ case 35:
+ return ASCIILiteral("Invalid character: '#'");
+ case 64:
+ return ASCIILiteral("Invalid character: '@'");
+ case 96:
+ return ASCIILiteral("Invalid character: '`'");
+ default:
+ return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
+ }
}
-inline int Lexer::currentOffset() const
+template <typename T>
+ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
{
- return currentCharacter() - m_codeStart;
+ ASSERT(m_code <= m_codeEnd);
+ return m_code;
}
-ALWAYS_INLINE void Lexer::shift1()
+template <typename T>
+void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
{
- m_current = m_next1;
- m_next1 = m_next2;
- m_next2 = m_next3;
+ m_arena = &arena->identifierArena();
+
+ m_lineNumber = source.firstLine();
+ m_lastToken = -1;
+
+ const String& sourceString = source.provider()->source();
+
+ if (!sourceString.isNull())
+ setCodeStart(sourceString.impl());
+ else
+ m_codeStart = 0;
+
+ m_source = &source;
+ m_sourceOffset = source.startOffset();
+ m_codeStartPlusOffset = m_codeStart + source.startOffset();
+ m_code = m_codeStartPlusOffset;
+ m_codeEnd = m_codeStart + source.endOffset();
+ m_error = false;
+ m_atLineStart = true;
+ m_lineStart = m_code;
+ m_lexErrorMessage = String();
+
+ m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
+ m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
+ m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
+
if (LIKELY(m_code < m_codeEnd))
- m_next3 = m_code[0];
+ m_current = *m_code;
else
- m_next3 = -1;
+ m_current = 0;
+ ASSERT(currentOffset() == source.startOffset());
+}
+
+template <typename T>
+template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
+{
+ m_code += shiftAmount;
+ ASSERT(currentOffset() >= currentLineStartOffset());
+ m_current = *m_code;
+}
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::shift()
+{
+ // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
+ m_current = 0;
++m_code;
+ if (LIKELY(m_code < m_codeEnd))
+ m_current = *m_code;
}
-ALWAYS_INLINE void Lexer::shift2()
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::atEnd() const
{
- m_current = m_next2;
- m_next1 = m_next3;
- if (LIKELY(m_code + 1 < m_codeEnd)) {
- m_next2 = m_code[0];
- m_next3 = m_code[1];
- } else {
- m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
- m_next3 = -1;
+ ASSERT(!m_current || m_code < m_codeEnd);
+ return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
+}
+
+template <typename T>
+ALWAYS_INLINE T Lexer<T>::peek(int offset) const
+{
+ ASSERT(offset > 0 && offset < 5);
+ const T* code = m_code + offset;
+ return (code < m_codeEnd) ? *code : 0;
+}
+
+struct ParsedUnicodeEscapeValue {
+ ParsedUnicodeEscapeValue(UChar32 value)
+ : m_value(value)
+ {
+ ASSERT(isValid());
+ }
+
+ enum SpecialValueType { Incomplete = -2, Invalid = -1 };
+ ParsedUnicodeEscapeValue(SpecialValueType type)
+ : m_value(type)
+ {
+ }
+
+ bool isValid() const { return m_value >= 0; }
+ bool isIncomplete() const { return m_value == Incomplete; }
+
+ UChar32 value() const
+ {
+ ASSERT(isValid());
+ return m_value;
+ }
+
+private:
+ UChar32 m_value;
+};
+
+template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
+{
+ if (m_current == '{') {
+ shift();
+ UChar32 codePoint = 0;
+ do {
+ if (!isASCIIHexDigit(m_current))
+ return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
+ codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
+ if (codePoint > UCHAR_MAX_VALUE)
+ return ParsedUnicodeEscapeValue::Invalid;
+ shift();
+ } while (m_current != '}');
+ shift();
+ return codePoint;
}
- m_code += 2;
+ auto character2 = peek(1);
+ auto character3 = peek(2);
+ auto character4 = peek(3);
+ if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4)))
+ return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
+ auto result = convertUnicode(m_current, character2, character3, character4);
+ shift();
+ shift();
+ shift();
+ shift();
+ return result;
}
-ALWAYS_INLINE void Lexer::shift3()
+template <typename T>
+void Lexer<T>::shiftLineTerminator()
{
- m_current = m_next3;
- if (LIKELY(m_code + 2 < m_codeEnd)) {
- m_next1 = m_code[0];
- m_next2 = m_code[1];
- m_next3 = m_code[2];
- } else {
- m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
- m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
- m_next3 = -1;
+ ASSERT(isLineTerminator(m_current));
+
+ m_positionBeforeLastNewline = currentPosition();
+ T prev = m_current;
+ shift();
+
+ // Allow both CRLF and LFCR.
+ if (prev + m_current == '\n' + '\r')
+ shift();
+
+ ++m_lineNumber;
+}
+
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
+{
+ return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
+}
+
+static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
+{
+ return U_GET_GC_MASK(c) & U_GC_L_MASK;
+}
+
+static ALWAYS_INLINE bool isLatin1(LChar)
+{
+ return true;
+}
+
+static ALWAYS_INLINE bool isLatin1(UChar c)
+{
+ return c < 256;
+}
+
+static ALWAYS_INLINE bool isLatin1(UChar32 c)
+{
+ return !(c & ~0xFF);
+}
+
+static inline bool isIdentStart(LChar c)
+{
+ return typesOfLatin1Characters[c] == CharacterIdentifierStart;
+}
+
+static inline bool isIdentStart(UChar32 c)
+{
+ return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
+}
+
+static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
+{
+ // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
+ return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
+}
+
+static ALWAYS_INLINE bool isIdentPart(LChar c)
+{
+ // Character types are divided into two groups depending on whether they can be part of an
+ // identifier or not. Those whose type value is less or equal than CharacterNumber can be
+ // part of an identifier. (See the CharacterType definition for more details.)
+ return typesOfLatin1Characters[c] <= CharacterNumber;
+}
+
+static ALWAYS_INLINE bool isIdentPart(UChar32 c)
+{
+ return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
+}
+
+static ALWAYS_INLINE bool isIdentPart(UChar c)
+{
+ return isIdentPart(static_cast<UChar32>(c));
+}
+
+template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
+{
+ if (isIdentPart(code[0]))
+ return true;
+
+ // Shortest sequence handled below is \u{0}, which is 5 characters.
+ if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
+ return false;
+
+ if (code[2] == '{') {
+ UChar32 codePoint = 0;
+ const CharacterType* pointer;
+ for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
+ auto digit = *pointer;
+ if (!isASCIIHexDigit(digit))
+ break;
+ codePoint = (codePoint << 4) | toASCIIHexValue(digit);
+ if (codePoint > UCHAR_MAX_VALUE)
+ return false;
+ }
+ return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
}
- m_code += 3;
+ // Shortest sequence handled below is \uXXXX, which is 6 characters.
+ if (codeEnd - code < 6)
+ return false;
+
+ auto character1 = code[2];
+ auto character2 = code[3];
+ auto character3 = code[4];
+ auto character4 = code[5];
+ return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
+ && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
}
-ALWAYS_INLINE void Lexer::shift4()
+static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
{
- if (LIKELY(m_code + 3 < m_codeEnd)) {
- m_current = m_code[0];
- m_next1 = m_code[1];
- m_next2 = m_code[2];
- m_next3 = m_code[3];
- } else {
- m_current = m_code < m_codeEnd ? m_code[0] : -1;
- m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
- m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
- m_next3 = -1;
+ return isIdentPartIncludingEscapeTemplate(code, codeEnd);
+}
+
+static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
+{
+ return isIdentPartIncludingEscapeTemplate(code, codeEnd);
+}
+
+static inline LChar singleEscape(int c)
+{
+ if (c < 128) {
+ ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
+ return singleCharacterEscapeValuesForASCII[c];
}
+ return 0;
+}
- m_code += 4;
+template <typename T>
+inline void Lexer<T>::record8(int c)
+{
+ ASSERT(c >= 0);
+ ASSERT(c <= 0xFF);
+ m_buffer8.append(static_cast<LChar>(c));
}
-void Lexer::setCode(const SourceCode& source, ParserArena& arena)
+template <typename T>
+inline void assertCharIsIn8BitRange(T c)
{
- m_arena = &arena.identifierArena();
+ UNUSED_PARAM(c);
+ ASSERT(c >= 0);
+ ASSERT(c <= 0xFF);
+}
- m_lineNumber = source.firstLine();
- m_delimited = false;
- m_lastToken = -1;
+template <>
+inline void assertCharIsIn8BitRange(UChar c)
+{
+ UNUSED_PARAM(c);
+ ASSERT(c <= 0xFF);
+}
- const UChar* data = source.provider()->data();
+template <>
+inline void assertCharIsIn8BitRange(LChar)
+{
+}
- m_source = &source;
- m_codeStart = data;
- m_code = data + source.startOffset();
- m_codeEnd = data + source.endOffset();
- m_error = false;
- m_atLineStart = true;
+template <typename T>
+inline void Lexer<T>::append8(const T* p, size_t length)
+{
+ size_t currentSize = m_buffer8.size();
+ m_buffer8.grow(currentSize + length);
+ LChar* rawBuffer = m_buffer8.data() + currentSize;
+
+ for (size_t i = 0; i < length; i++) {
+ T c = p[i];
+ assertCharIsIn8BitRange(c);
+ rawBuffer[i] = c;
+ }
+}
- // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
- // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
- if (source.provider()->hasBOMs()) {
- for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
- if (UNLIKELY(*p == byteOrderMark)) {
- copyCodeWithoutBOMs();
- break;
+template <typename T>
+inline void Lexer<T>::append16(const LChar* p, size_t length)
+{
+ size_t currentSize = m_buffer16.size();
+ m_buffer16.grow(currentSize + length);
+ UChar* rawBuffer = m_buffer16.data() + currentSize;
+
+ for (size_t i = 0; i < length; i++)
+ rawBuffer[i] = p[i];
+}
+
+template <typename T>
+inline void Lexer<T>::record16(T c)
+{
+ m_buffer16.append(c);
+}
+
+template <typename T>
+inline void Lexer<T>::record16(int c)
+{
+ ASSERT(c >= 0);
+ ASSERT(c <= static_cast<int>(USHRT_MAX));
+ m_buffer16.append(static_cast<UChar>(c));
+}
+
+template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
+{
+ ASSERT(codePoint >= 0);
+ ASSERT(codePoint <= UCHAR_MAX_VALUE);
+ if (U_IS_BMP(codePoint))
+ record16(codePoint);
+ else {
+ UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
+ append16(codeUnits, 2);
+ }
+}
+
+#if !ASSERT_DISABLED
+bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
+{
+ if (!ident)
+ return true;
+ /* Just block any use of suspicious identifiers. This is intended to
+ * be used as a safety net while implementing builtins.
+ */
+ // FIXME: How can a debug-only assertion be a safety net?
+ if (*ident == vm.propertyNames->builtinNames().callPublicName())
+ return false;
+ if (*ident == vm.propertyNames->builtinNames().applyPublicName())
+ return false;
+ if (*ident == vm.propertyNames->eval)
+ return false;
+ if (*ident == vm.propertyNames->Function)
+ return false;
+ return true;
+}
+#endif
+
+template <>
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+{
+ const ptrdiff_t remaining = m_codeEnd - m_code;
+ if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
+ JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
+ if (keyword != IDENT) {
+ ASSERT((!shouldCreateIdentifier) || tokenData->ident);
+ return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
+ }
+ }
+
+ bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+ if (isPrivateName)
+ shift();
+
+ const LChar* identifierStart = currentSourcePtr();
+ unsigned identifierLineStart = currentLineStartOffset();
+
+ while (isIdentPart(m_current))
+ shift();
+
+ if (UNLIKELY(m_current == '\\')) {
+ setOffsetFromSourcePtr(identifierStart, identifierLineStart);
+ return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
+ }
+
+ const Identifier* ident = 0;
+
+ if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+ int identifierLength = currentSourcePtr() - identifierStart;
+ ident = makeIdentifier(identifierStart, identifierLength);
+ if (m_parsingBuiltinFunction) {
+ if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+ m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+ return ERRORTOK;
}
+ if (isPrivateName)
+ ident = m_vm->propertyNames->getPrivateName(*ident);
+ else if (*ident == m_vm->propertyNames->undefinedKeyword)
+ tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+ if (!ident)
+ return INVALID_PRIVATE_NAME_ERRORTOK;
+ }
+ tokenData->ident = ident;
+ } else
+ tokenData->ident = 0;
+
+ if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
+ ASSERT(shouldCreateIdentifier);
+ if (remaining < maxTokenLength) {
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+ ASSERT((remaining < maxTokenLength) || !entry);
+ if (!entry)
+ return IDENT;
+ JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+ return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
+ return IDENT;
}
- // Read the first characters into the 4-character buffer.
- shift4();
- ASSERT(currentOffset() == source.startOffset());
+ return IDENT;
}
-void Lexer::copyCodeWithoutBOMs()
+template <>
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
- // Note: In this case, the character offset data for debugging will be incorrect.
- // If it's important to correctly debug code with extraneous BOMs, then the caller
- // should strip the BOMs when creating the SourceProvider object and do its own
- // mapping of offsets within the stripped text to original text offset.
-
- m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
- for (const UChar* p = m_code; p < m_codeEnd; ++p) {
- UChar c = *p;
- if (c != byteOrderMark)
- m_codeWithoutBOMs.append(c);
+ const ptrdiff_t remaining = m_codeEnd - m_code;
+ if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
+ JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
+ if (keyword != IDENT) {
+ ASSERT((!shouldCreateIdentifier) || tokenData->ident);
+ return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
+ }
}
- ptrdiff_t startDelta = m_codeStart - m_code;
- m_code = m_codeWithoutBOMs.data();
- m_codeStart = m_code + startDelta;
- m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
+
+ bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+ if (isPrivateName)
+ shift();
+
+ const UChar* identifierStart = currentSourcePtr();
+ int identifierLineStart = currentLineStartOffset();
+
+ UChar orAllChars = 0;
+
+ while (isIdentPart(m_current)) {
+ orAllChars |= m_current;
+ shift();
+ }
+
+ if (UNLIKELY(m_current == '\\')) {
+ ASSERT(!isPrivateName);
+ setOffsetFromSourcePtr(identifierStart, identifierLineStart);
+ return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
+ }
+
+ bool isAll8Bit = false;
+
+ if (!(orAllChars & ~0xff))
+ isAll8Bit = true;
+
+ const Identifier* ident = 0;
+
+ if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+ int identifierLength = currentSourcePtr() - identifierStart;
+ if (isAll8Bit)
+ ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
+ else
+ ident = makeIdentifier(identifierStart, identifierLength);
+ if (m_parsingBuiltinFunction) {
+ if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+ m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+ return ERRORTOK;
+ }
+ if (isPrivateName)
+ ident = m_vm->propertyNames->getPrivateName(*ident);
+ else if (*ident == m_vm->propertyNames->undefinedKeyword)
+ tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+ if (!ident)
+ return INVALID_PRIVATE_NAME_ERRORTOK;
+ }
+ tokenData->ident = ident;
+ } else
+ tokenData->ident = 0;
+
+ if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
+ ASSERT(shouldCreateIdentifier);
+ if (remaining < maxTokenLength) {
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+ ASSERT((remaining < maxTokenLength) || !entry);
+ if (!entry)
+ return IDENT;
+ JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+ return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
+ }
+ return IDENT;
+ }
+
+ return IDENT;
}
-void Lexer::shiftLineTerminator()
+template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
- ASSERT(isLineTerminator(m_current));
+ auto identifierStart = currentSourcePtr();
+ bool bufferRequired = false;
- // Allow both CRLF and LFCR.
- if (m_current + m_next1 == '\n' + '\r')
- shift2();
+ while (true) {
+ if (LIKELY(isIdentPart(m_current))) {
+ shift();
+ continue;
+ }
+ if (LIKELY(m_current != '\\'))
+ break;
+
+ // \uXXXX unicode characters.
+ bufferRequired = true;
+ if (identifierStart != currentSourcePtr())
+ m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
+ shift();
+ if (UNLIKELY(m_current != 'u'))
+ return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
+ shift();
+ auto character = parseUnicodeEscape();
+ if (UNLIKELY(!character.isValid()))
+ return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+ if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
+ return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+ if (shouldCreateIdentifier)
+ recordUnicodeCodePoint(character.value());
+ identifierStart = currentSourcePtr();
+ }
+
+ int identifierLength;
+ const Identifier* ident = nullptr;
+ if (shouldCreateIdentifier) {
+ if (!bufferRequired) {
+ identifierLength = currentSourcePtr() - identifierStart;
+ ident = makeIdentifier(identifierStart, identifierLength);
+ } else {
+ if (identifierStart != currentSourcePtr())
+ m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
+ ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ }
+
+ tokenData->ident = ident;
+ } else
+ tokenData->ident = nullptr;
+
+ m_buffer16.shrink(0);
+
+ if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
+ ASSERT(shouldCreateIdentifier);
+ const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+ if (!entry)
+ return IDENT;
+ JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+ return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
+ }
+
+ return IDENT;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
+{
+ return character < 0xE;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
+{
+ return character < 0xE || character > 0xFF;
+}
+
+template <typename T>
+template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
+{
+ int startingOffset = currentOffset();
+ int startingLineStartOffset = currentLineStartOffset();
+ int startingLineNumber = lineNumber();
+ T stringQuoteCharacter = m_current;
+ shift();
+
+ const T* stringStart = currentSourcePtr();
+
+ while (m_current != stringQuoteCharacter) {
+ if (UNLIKELY(m_current == '\\')) {
+ if (stringStart != currentSourcePtr() && shouldBuildStrings)
+ append8(stringStart, currentSourcePtr() - stringStart);
+ shift();
+
+ LChar escape = singleEscape(m_current);
+
+ // Most common escape sequences first.
+ if (escape) {
+ if (shouldBuildStrings)
+ record8(escape);
+ shift();
+ } else if (UNLIKELY(isLineTerminator(m_current)))
+ shiftLineTerminator();
+ else if (m_current == 'x') {
+ shift();
+ if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+ m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
+ return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
+ }
+ T prev = m_current;
+ shift();
+ if (shouldBuildStrings)
+ record8(convertHex(prev, m_current));
+ shift();
+ } else {
+ setOffset(startingOffset, startingLineStartOffset);
+ setLineNumber(startingLineNumber);
+ m_buffer8.shrink(0);
+ return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
+ }
+ stringStart = currentSourcePtr();
+ continue;
+ }
+
+ if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
+ setOffset(startingOffset, startingLineStartOffset);
+ setLineNumber(startingLineNumber);
+ m_buffer8.shrink(0);
+ return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
+ }
+
+ shift();
+ }
+
+ if (currentSourcePtr() != stringStart && shouldBuildStrings)
+ append8(stringStart, currentSourcePtr() - stringStart);
+ if (shouldBuildStrings) {
+ tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
+ m_buffer8.shrink(0);
+ } else
+ tokenData->ident = 0;
+
+ return StringParsedSuccessfully;
+}
+
+template <typename T>
+template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
+{
+ if (m_current == 'x') {
+ shift();
+ if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+ m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
+ return StringCannotBeParsed;
+ }
+ T prev = m_current;
+ shift();
+ if (shouldBuildStrings)
+ record16(convertHex(prev, m_current));
+ shift();
+ return StringParsedSuccessfully;
+ }
+
+ if (m_current == 'u') {
+ shift();
+
+ if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
+ if (shouldBuildStrings)
+ record16('u');
+ return StringParsedSuccessfully;
+ }
+
+ auto character = parseUnicodeEscape();
+ if (character.isValid()) {
+ if (shouldBuildStrings)
+ recordUnicodeCodePoint(character.value());
+ return StringParsedSuccessfully;
+ }
+
+ m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
+ return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed;
+ }
+
+ if (strictMode) {
+ if (isASCIIDigit(m_current)) {
+ // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
+ int character1 = m_current;
+ shift();
+ if (character1 != '0' || isASCIIDigit(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
+ return StringCannotBeParsed;
+ }
+ if (shouldBuildStrings)
+ record16(0);
+ return StringParsedSuccessfully;
+ }
+ } else {
+ if (isASCIIOctalDigit(m_current)) {
+ // Octal character sequences
+ T character1 = m_current;
+ shift();
+ if (isASCIIOctalDigit(m_current)) {
+ // Two octal characters
+ T character2 = m_current;
+ shift();
+ if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
+ if (shouldBuildStrings)
+ record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
+ shift();
+ } else {
+ if (shouldBuildStrings)
+ record16((character1 - '0') * 8 + character2 - '0');
+ }
+ } else {
+ if (shouldBuildStrings)
+ record16(character1 - '0');
+ }
+ return StringParsedSuccessfully;
+ }
+ }
+
+ if (!atEnd()) {
+ if (shouldBuildStrings)
+ record16(m_current);
+ shift();
+ return StringParsedSuccessfully;
+ }
+
+ m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
+ return StringUnterminated;
+}
+
+template <typename T>
+template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
+{
+ T stringQuoteCharacter = m_current;
+ shift();
+
+ const T* stringStart = currentSourcePtr();
+
+ while (m_current != stringQuoteCharacter) {
+ if (UNLIKELY(m_current == '\\')) {
+ if (stringStart != currentSourcePtr() && shouldBuildStrings)
+ append16(stringStart, currentSourcePtr() - stringStart);
+ shift();
+
+ LChar escape = singleEscape(m_current);
+
+ // Most common escape sequences first
+ if (escape) {
+ if (shouldBuildStrings)
+ record16(escape);
+ shift();
+ } else if (UNLIKELY(isLineTerminator(m_current)))
+ shiftLineTerminator();
+ else {
+ StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
+ if (result != StringParsedSuccessfully)
+ return result;
+ }
+
+ stringStart = currentSourcePtr();
+ continue;
+ }
+ // Fast check for characters that require special handling.
+ // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
+ // as possible, and lets through all common ASCII characters.
+ if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+ // New-line or end of input is not allowed
+ if (atEnd() || isLineTerminator(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
+ return atEnd() ? StringUnterminated : StringCannotBeParsed;
+ }
+ // Anything else is just a normal character
+ }
+ shift();
+ }
+
+ if (currentSourcePtr() != stringStart && shouldBuildStrings)
+ append16(stringStart, currentSourcePtr() - stringStart);
+ if (shouldBuildStrings)
+ tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
else
- shift1();
+ tokenData->ident = 0;
- ++m_lineNumber;
+ m_buffer16.shrink(0);
+ return StringParsedSuccessfully;
}
-ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
+#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
+// While the lexer accepts <LF><CR> (not <CR><LF>) sequence
+// as one line terminator and increments one line number,
+// TemplateLiteral considers it as two line terminators <LF> and <CR>.
+//
+// TemplateLiteral normalizes line terminators as follows.
+//
+// <LF> => <LF>
+// <CR> => <LF>
+// <CR><LF> => <LF>
+// <\u2028> => <\u2028>
+// <\u2029> => <\u2029>
+//
+// So, <LF><CR> should be normalized to <LF><LF>.
+// However, the lexer should increment the line number only once for <LF><CR>.
+//
+// To achieve this, LineNumberAdder holds the current status of line terminator sequence.
+// When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
+// LineNumberAdder maintains the status and increments the line number when it's necessary.
+// For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
+template<typename CharacterType>
+class LineNumberAdder {
+public:
+ LineNumberAdder(int& lineNumber)
+ : m_lineNumber(lineNumber)
+ {
+ }
+
+ void clear()
+ {
+ m_previous = 0;
+ }
+
+ void add(CharacterType character)
+ {
+ ASSERT(Lexer<CharacterType>::isLineTerminator(character));
+ if ((character + m_previous) == ('\n' + '\r'))
+ m_previous = 0;
+ else {
+ ++m_lineNumber;
+ m_previous = character;
+ }
+ }
+
+private:
+ int& m_lineNumber;
+ CharacterType m_previous { 0 };
+};
+
+template <typename T>
+template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
{
- return &m_arena->makeIdentifier(m_globalData, characters, length);
+ const T* stringStart = currentSourcePtr();
+ const T* rawStringStart = currentSourcePtr();
+
+ LineNumberAdder<T> lineNumberAdder(m_lineNumber);
+
+ while (m_current != '`') {
+ if (UNLIKELY(m_current == '\\')) {
+ lineNumberAdder.clear();
+ if (stringStart != currentSourcePtr() && shouldBuildStrings)
+ append16(stringStart, currentSourcePtr() - stringStart);
+ shift();
+
+ LChar escape = singleEscape(m_current);
+
+ // Most common escape sequences first.
+ if (escape) {
+ if (shouldBuildStrings)
+ record16(escape);
+ shift();
+ } else if (UNLIKELY(isLineTerminator(m_current))) {
+ if (m_current == '\r') {
+ lineNumberAdder.add(m_current);
+ shift();
+ if (m_current == '\n') {
+ lineNumberAdder.add(m_current);
+ shift();
+ }
+ } else {
+ lineNumberAdder.add(m_current);
+ shift();
+ }
+ } else {
+ bool strictMode = true;
+ StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
+ if (result != StringParsedSuccessfully)
+ return result;
+ }
+
+ stringStart = currentSourcePtr();
+ continue;
+ }
+
+ if (m_current == '$' && peek(1) == '{')
+ break;
+
+ // Fast check for characters that require special handling.
+ // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
+ // as possible, and lets through all common ASCII characters.
+ if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+ // End of input is not allowed.
+ // Unlike String, line terminator is allowed.
+ if (atEnd()) {
+ m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
+ return atEnd() ? StringUnterminated : StringCannotBeParsed;
+ }
+
+ if (isLineTerminator(m_current)) {
+ if (m_current == '\r') {
+ // Normalize <CR>, <CR><LF> to <LF>.
+ if (shouldBuildStrings) {
+ if (stringStart != currentSourcePtr())
+ append16(stringStart, currentSourcePtr() - stringStart);
+ if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
+ m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
+
+ record16('\n');
+ if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
+ m_bufferForRawTemplateString16.append('\n');
+ }
+ lineNumberAdder.add(m_current);
+ shift();
+ if (m_current == '\n') {
+ lineNumberAdder.add(m_current);
+ shift();
+ }
+ stringStart = currentSourcePtr();
+ rawStringStart = currentSourcePtr();
+ } else {
+ lineNumberAdder.add(m_current);
+ shift();
+ }
+ continue;
+ }
+ // Anything else is just a normal character
+ }
+
+ lineNumberAdder.clear();
+ shift();
+ }
+
+ bool isTail = m_current == '`';
+
+ if (shouldBuildStrings) {
+ if (currentSourcePtr() != stringStart)
+ append16(stringStart, currentSourcePtr() - stringStart);
+ if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
+ m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
+ }
+
+ if (shouldBuildStrings) {
+ tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
+ if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
+ tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
+ else
+ tokenData->raw = makeEmptyIdentifier();
+ } else {
+ tokenData->cooked = makeEmptyIdentifier();
+ tokenData->raw = makeEmptyIdentifier();
+ }
+ tokenData->isTail = isTail;
+
+ m_buffer16.shrink(0);
+ m_bufferForRawTemplateString16.shrink(0);
+
+ if (isTail) {
+ // Skip `
+ shift();
+ } else {
+ // Skip $ and {
+ shift();
+ shift();
+ }
+
+ return StringParsedSuccessfully;
}
+#endif
-inline bool Lexer::lastTokenWasRestrKeyword() const
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
{
- return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
+ // Optimization: most hexadecimal values fit into 4 bytes.
+ uint32_t hexValue = 0;
+ int maximumDigits = 7;
+
+ do {
+ hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
+ shift();
+ --maximumDigits;
+ } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
+
+ if (maximumDigits >= 0) {
+ returnValue = hexValue;
+ return;
+ }
+
+ // No more place in the hexValue buffer.
+ // The values are shifted out and placed into the m_buffer8 vector.
+ for (int i = 0; i < 8; ++i) {
+ int digit = hexValue >> 28;
+ if (digit < 10)
+ record8(digit + '0');
+ else
+ record8(digit - 10 + 'a');
+ hexValue <<= 4;
+ }
+
+ while (isASCIIHexDigit(m_current)) {
+ record8(m_current);
+ shift();
+ }
+
+ returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
}
-static NEVER_INLINE bool isNonASCIIIdentStart(int c)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
{
- return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
+ // Optimization: most binary values fit into 4 bytes.
+ uint32_t binaryValue = 0;
+ const unsigned maximumDigits = 32;
+ int digit = maximumDigits - 1;
+ // Temporary buffer for the digits. Makes easier
+ // to reconstruct the input characters when needed.
+ LChar digits[maximumDigits];
+
+ do {
+ binaryValue = (binaryValue << 1) + (m_current - '0');
+ digits[digit] = m_current;
+ shift();
+ --digit;
+ } while (isASCIIBinaryDigit(m_current) && digit >= 0);
+
+ if (!isASCIIDigit(m_current) && digit >= 0) {
+ returnValue = binaryValue;
+ return true;
+ }
+
+ for (int i = maximumDigits - 1; i > digit; --i)
+ record8(digits[i]);
+
+ while (isASCIIBinaryDigit(m_current)) {
+ record8(m_current);
+ shift();
+ }
+
+ if (isASCIIDigit(m_current))
+ return false;
+
+ returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
+ return true;
}
-static inline bool isIdentStart(int c)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
{
- return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
+ // Optimization: most octal values fit into 4 bytes.
+ uint32_t octalValue = 0;
+ const unsigned maximumDigits = 10;
+ int digit = maximumDigits - 1;
+ // Temporary buffer for the digits. Makes easier
+ // to reconstruct the input characters when needed.
+ LChar digits[maximumDigits];
+
+ do {
+ octalValue = octalValue * 8 + (m_current - '0');
+ digits[digit] = m_current;
+ shift();
+ --digit;
+ } while (isASCIIOctalDigit(m_current) && digit >= 0);
+
+ if (!isASCIIDigit(m_current) && digit >= 0) {
+ returnValue = octalValue;
+ return true;
+ }
+
+ for (int i = maximumDigits - 1; i > digit; --i)
+ record8(digits[i]);
+
+ while (isASCIIOctalDigit(m_current)) {
+ record8(m_current);
+ shift();
+ }
+
+ if (isASCIIDigit(m_current))
+ return false;
+
+ returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
+ return true;
}
-static NEVER_INLINE bool isNonASCIIIdentPart(int c)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
{
- return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
- | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
+ // Optimization: most decimal values fit into 4 bytes.
+ uint32_t decimalValue = 0;
+
+ // Since parseOctal may be executed before parseDecimal,
+ // the m_buffer8 may hold ascii digits.
+ if (!m_buffer8.size()) {
+ const unsigned maximumDigits = 10;
+ int digit = maximumDigits - 1;
+ // Temporary buffer for the digits. Makes easier
+ // to reconstruct the input characters when needed.
+ LChar digits[maximumDigits];
+
+ do {
+ decimalValue = decimalValue * 10 + (m_current - '0');
+ digits[digit] = m_current;
+ shift();
+ --digit;
+ } while (isASCIIDigit(m_current) && digit >= 0);
+
+ if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
+ returnValue = decimalValue;
+ return true;
+ }
+
+ for (int i = maximumDigits - 1; i > digit; --i)
+ record8(digits[i]);
+ }
+
+ while (isASCIIDigit(m_current)) {
+ record8(m_current);
+ shift();
+ }
+
+ return false;
}
-static inline bool isIdentPart(int c)
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
{
- return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
+ record8('.');
+ while (isASCIIDigit(m_current)) {
+ record8(m_current);
+ shift();
+ }
}
-static inline int singleEscape(int c)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
{
- switch (c) {
- case 'b':
- return 0x08;
- case 't':
- return 0x09;
- case 'n':
- return 0x0A;
- case 'v':
- return 0x0B;
- case 'f':
- return 0x0C;
- case 'r':
- return 0x0D;
- default:
- return c;
+ record8('e');
+ shift();
+ if (m_current == '+' || m_current == '-') {
+ record8(m_current);
+ shift();
}
+
+ if (!isASCIIDigit(m_current))
+ return false;
+
+ do {
+ record8(m_current);
+ shift();
+ } while (isASCIIDigit(m_current));
+ return true;
}
-inline void Lexer::record8(int c)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
{
- ASSERT(c >= 0);
- ASSERT(c <= 0xFF);
- m_buffer8.append(static_cast<char>(c));
+ while (true) {
+ while (UNLIKELY(m_current == '*')) {
+ shift();
+ if (m_current == '/') {
+ shift();
+ return true;
+ }
+ }
+
+ if (atEnd())
+ return false;
+
+ if (isLineTerminator(m_current)) {
+ shiftLineTerminator();
+ m_terminator = true;
+ } else
+ shift();
+ }
}
-inline void Lexer::record16(UChar c)
+template <typename T>
+bool Lexer<T>::nextTokenIsColon()
{
- m_buffer16.append(c);
+ const T* code = m_code;
+ while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
+ code++;
+
+ return code < m_codeEnd && *code == ':';
}
-inline void Lexer::record16(int c)
+#if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
+template <typename T>
+void Lexer<T>::setTokenPosition(JSToken* tokenRecord)
{
- ASSERT(c >= 0);
- ASSERT(c <= USHRT_MAX);
- record16(UChar(static_cast<unsigned short>(c)));
+ JSTokenData* tokenData = &tokenRecord->m_data;
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
}
+#endif
-int Lexer::lex(void* p1, void* p2)
+template <typename T>
+JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
{
+ JSTokenData* tokenData = &tokenRecord->m_data;
+ JSTokenLocation* tokenLocation = &tokenRecord->m_location;
+ m_lastTockenLocation = JSTokenLocation(tokenRecord->m_location);
+
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
ASSERT(m_buffer16.isEmpty());
- YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
- YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
- int token = 0;
+ JSTokenType token = ERRORTOK;
m_terminator = false;
start:
while (isWhiteSpace(m_current))
- shift1();
-
- int startOffset = currentOffset();
-
- if (m_current == -1) {
- if (!m_terminator && !m_delimited && !m_isReparsing) {
- // automatic semicolon insertion if program incomplete
- token = ';';
- goto doneSemicolon;
- }
- return 0;
- }
-
- m_delimited = false;
- switch (m_current) {
- case '>':
- if (m_next1 == '>' && m_next2 == '>') {
- if (m_next3 == '=') {
- shift4();
+ shift();
+
+ if (atEnd())
+ return EOFTOK;
+
+ tokenLocation->startOffset = currentOffset();
+ ASSERT(currentOffset() >= currentLineStartOffset());
+ tokenRecord->m_startPosition = currentPosition();
+
+ CharacterType type;
+ if (LIKELY(isLatin1(m_current)))
+ type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
+ else if (isNonLatin1IdentStart(m_current))
+ type = CharacterIdentifierStart;
+ else if (isLineTerminator(m_current))
+ type = CharacterLineTerminator;
+ else
+ type = CharacterInvalid;
+
+ switch (type) {
+ case CharacterGreater:
+ shift();
+ if (m_current == '>') {
+ shift();
+ if (m_current == '>') {
+ shift();
+ if (m_current == '=') {
+ shift();
token = URSHIFTEQUAL;
break;
}
- shift3();
token = URSHIFT;
break;
}
- if (m_next1 == '>') {
- if (m_next2 == '=') {
- shift3();
- token = RSHIFTEQUAL;
- break;
- }
- shift2();
- token = RSHIFT;
- break;
- }
- if (m_next1 == '=') {
- shift2();
- token = GE;
+ if (m_current == '=') {
+ shift();
+ token = RSHIFTEQUAL;
break;
}
- shift1();
- token = '>';
+ token = RSHIFT;
break;
- case '=':
- if (m_next1 == '=') {
- if (m_next2 == '=') {
- shift3();
- token = STREQ;
- break;
- }
- shift2();
- token = EQEQ;
- break;
- }
- shift1();
- token = '=';
+ }
+ if (m_current == '=') {
+ shift();
+ token = GE;
break;
- case '!':
- if (m_next1 == '=') {
- if (m_next2 == '=') {
- shift3();
- token = STRNEQ;
- break;
- }
- shift2();
- token = NE;
- break;
- }
- shift1();
- token = '!';
+ }
+ token = GT;
+ break;
+ case CharacterEqual: {
+#if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
+ if (peek(1) == '>') {
+ token = ARROWFUNCTION;
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
+ shift();
+ shift();
break;
- case '<':
- if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
- // <!-- marks the beginning of a line comment (for www usage)
- shift4();
- goto inSingleLineComment;
- }
- if (m_next1 == '<') {
- if (m_next2 == '=') {
- shift3();
- token = LSHIFTEQUAL;
- break;
- }
- shift2();
- token = LSHIFT;
- break;
- }
- if (m_next1 == '=') {
- shift2();
- token = LE;
+ }
+#endif
+ shift();
+ if (m_current == '=') {
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = STREQ;
break;
}
- shift1();
- token = '<';
+ token = EQEQ;
break;
- case '+':
- if (m_next1 == '+') {
- shift2();
- if (m_terminator) {
- token = AUTOPLUSPLUS;
- break;
- }
- token = PLUSPLUS;
- break;
- }
- if (m_next1 == '=') {
- shift2();
- token = PLUSEQUAL;
+ }
+ token = EQUAL;
+ break;
+ }
+ case CharacterLess:
+ shift();
+ if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
+ // <!-- marks the beginning of a line comment (for www usage)
+ goto inSingleLineComment;
+ }
+ if (m_current == '<') {
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = LSHIFTEQUAL;
break;
}
- shift1();
- token = '+';
+ token = LSHIFT;
break;
- case '-':
- if (m_next1 == '-') {
- if (m_atLineStart && m_next2 == '>') {
- shift3();
- goto inSingleLineComment;
- }
- shift2();
- if (m_terminator) {
- token = AUTOMINUSMINUS;
- break;
- }
- token = MINUSMINUS;
- break;
- }
- if (m_next1 == '=') {
- shift2();
- token = MINUSEQUAL;
- break;
- }
- shift1();
- token = '-';
+ }
+ if (m_current == '=') {
+ shift();
+ token = LE;
break;
- case '*':
- if (m_next1 == '=') {
- shift2();
- token = MULTEQUAL;
+ }
+ token = LT;
+ break;
+ case CharacterExclamationMark:
+ shift();
+ if (m_current == '=') {
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = STRNEQ;
break;
}
- shift1();
- token = '*';
+ token = NE;
+ break;
+ }
+ token = EXCLAMATION;
+ break;
+ case CharacterAdd:
+ shift();
+ if (m_current == '+') {
+ shift();
+ token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
break;
- case '/':
- if (m_next1 == '/') {
- shift2();
+ }
+ if (m_current == '=') {
+ shift();
+ token = PLUSEQUAL;
+ break;
+ }
+ token = PLUS;
+ break;
+ case CharacterSub:
+ shift();
+ if (m_current == '-') {
+ shift();
+ if (m_atLineStart && m_current == '>') {
+ shift();
goto inSingleLineComment;
}
- if (m_next1 == '*')
- goto inMultiLineComment;
- if (m_next1 == '=') {
- shift2();
- token = DIVEQUAL;
- break;
- }
- shift1();
- token = '/';
+ token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
break;
- case '&':
- if (m_next1 == '&') {
- shift2();
- token = AND;
- break;
- }
- if (m_next1 == '=') {
- shift2();
- token = ANDEQUAL;
- break;
- }
- shift1();
- token = '&';
+ }
+ if (m_current == '=') {
+ shift();
+ token = MINUSEQUAL;
break;
- case '^':
- if (m_next1 == '=') {
- shift2();
- token = XOREQUAL;
- break;
- }
- shift1();
- token = '^';
+ }
+ token = MINUS;
+ break;
+ case CharacterMultiply:
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = MULTEQUAL;
break;
- case '%':
- if (m_next1 == '=') {
- shift2();
- token = MODEQUAL;
- break;
- }
- shift1();
- token = '%';
+ }
+ token = TIMES;
+ break;
+ case CharacterSlash:
+ shift();
+ if (m_current == '/') {
+ shift();
+ goto inSingleLineComment;
+ }
+ if (m_current == '*') {
+ shift();
+ if (parseMultilineComment())
+ goto start;
+ m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
+ token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
+ goto returnError;
+ }
+ if (m_current == '=') {
+ shift();
+ token = DIVEQUAL;
break;
- case '|':
- if (m_next1 == '=') {
- shift2();
- token = OREQUAL;
- break;
- }
- if (m_next1 == '|') {
- shift2();
- token = OR;
- break;
- }
- shift1();
- token = '|';
+ }
+ token = DIVIDE;
+ break;
+ case CharacterAnd:
+ shift();
+ if (m_current == '&') {
+ shift();
+ token = AND;
break;
- case '.':
- if (isASCIIDigit(m_next1)) {
- record8('.');
- shift1();
- goto inNumberAfterDecimalPoint;
- }
- token = '.';
- shift1();
+ }
+ if (m_current == '=') {
+ shift();
+ token = ANDEQUAL;
break;
- case ',':
- case '~':
- case '?':
- case ':':
- case '(':
- case ')':
- case '[':
- case ']':
- token = m_current;
- shift1();
+ }
+ token = BITAND;
+ break;
+ case CharacterXor:
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = XOREQUAL;
break;
- case ';':
- shift1();
- m_delimited = true;
- token = ';';
+ }
+ token = BITXOR;
+ break;
+ case CharacterModulo:
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = MODEQUAL;
break;
- case '{':
- lvalp->intValue = currentOffset();
- shift1();
- token = OPENBRACE;
+ }
+ token = MOD;
+ break;
+ case CharacterOr:
+ shift();
+ if (m_current == '=') {
+ shift();
+ token = OREQUAL;
break;
- case '}':
- lvalp->intValue = currentOffset();
- shift1();
- m_delimited = true;
- token = CLOSEBRACE;
+ }
+ if (m_current == '|') {
+ shift();
+ token = OR;
break;
- case '\\':
- goto startIdentifierWithBackslash;
- case '0':
- goto startNumberWithZeroDigit;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- goto startNumber;
- case '"':
- case '\'':
- goto startString;
- default:
- if (isIdentStart(m_current))
- goto startIdentifierOrKeyword;
- if (isLineTerminator(m_current)) {
- shiftLineTerminator();
- m_atLineStart = true;
- m_terminator = true;
- if (lastTokenWasRestrKeyword()) {
- token = ';';
- goto doneSemicolon;
- }
- goto start;
+ }
+ token = BITOR;
+ break;
+ case CharacterOpenParen:
+ token = OPENPAREN;
+ shift();
+ break;
+ case CharacterCloseParen:
+ token = CLOSEPAREN;
+ shift();
+ break;
+ case CharacterOpenBracket:
+ token = OPENBRACKET;
+ shift();
+ break;
+ case CharacterCloseBracket:
+ token = CLOSEBRACKET;
+ shift();
+ break;
+ case CharacterComma:
+ token = COMMA;
+ shift();
+ break;
+ case CharacterColon:
+ token = COLON;
+ shift();
+ break;
+ case CharacterQuestion:
+ token = QUESTION;
+ shift();
+ break;
+ case CharacterTilde:
+ token = TILDE;
+ shift();
+ break;
+ case CharacterSemicolon:
+ shift();
+ token = SEMICOLON;
+ break;
+ case CharacterOpenBrace:
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
+ shift();
+ token = OPENBRACE;
+ break;
+ case CharacterCloseBrace:
+ tokenData->line = lineNumber();
+ tokenData->offset = currentOffset();
+ tokenData->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenData->offset >= tokenData->lineStartOffset);
+ shift();
+ token = CLOSEBRACE;
+ break;
+ case CharacterDot:
+ shift();
+ if (!isASCIIDigit(m_current)) {
+ if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
+ shift();
+ shift();
+ token = DOTDOTDOT;
+ break;
+ }
+ token = DOT;
+ break;
+ }
+ goto inNumberAfterDecimalPoint;
+ case CharacterZero:
+ shift();
+ if ((m_current | 0x20) == 'x') {
+ if (!isASCIIHexDigit(peek(1))) {
+ m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
+ token = INVALID_HEX_NUMBER_ERRORTOK;
+ goto returnError;
}
- goto returnError;
- }
- m_atLineStart = false;
- goto returnToken;
+ // Shift out the 'x' prefix.
+ shift();
-startString: {
- int stringQuoteCharacter = m_current;
- shift1();
+ parseHex(tokenData->doubleValue);
+ if (isIdentStart(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
+ token = INVALID_HEX_NUMBER_ERRORTOK;
+ goto returnError;
+ }
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ m_buffer8.shrink(0);
+ break;
+ }
+ if ((m_current | 0x20) == 'b') {
+ if (!isASCIIBinaryDigit(peek(1))) {
+ m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
+ token = INVALID_BINARY_NUMBER_ERRORTOK;
+ goto returnError;
+ }
- const UChar* stringStart = currentCharacter();
- while (m_current != stringQuoteCharacter) {
- // Fast check for characters that require special handling.
- // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
- // as possible, and lets through all common ASCII characters.
- if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
- m_buffer16.append(stringStart, currentCharacter() - stringStart);
- goto inString;
+ // Shift out the 'b' prefix.
+ shift();
+
+ parseBinary(tokenData->doubleValue);
+ if (isIdentStart(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
+ token = INVALID_BINARY_NUMBER_ERRORTOK;
+ goto returnError;
+ }
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ m_buffer8.shrink(0);
+ break;
}
- shift1();
- }
- lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
- shift1();
- m_atLineStart = false;
- m_delimited = false;
- token = STRING;
- goto returnToken;
-inString:
- while (m_current != stringQuoteCharacter) {
- if (m_current == '\\')
- goto inStringEscapeSequence;
- if (UNLIKELY(isLineTerminator(m_current)))
- goto returnError;
- if (UNLIKELY(m_current == -1))
+ if ((m_current | 0x20) == 'o') {
+ if (!isASCIIOctalDigit(peek(1))) {
+ m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
+ token = INVALID_OCTAL_NUMBER_ERRORTOK;
+ goto returnError;
+ }
+
+ // Shift out the 'o' prefix.
+ shift();
+
+ parseOctal(tokenData->doubleValue);
+ if (isIdentStart(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
+ token = INVALID_OCTAL_NUMBER_ERRORTOK;
+ goto returnError;
+ }
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ m_buffer8.shrink(0);
+ break;
+ }
+
+ record8('0');
+ if (strictMode && isASCIIDigit(m_current)) {
+ m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
+ token = INVALID_OCTAL_NUMBER_ERRORTOK;
goto returnError;
- record16(m_current);
- shift1();
- }
- goto doneString;
+ }
+ if (isASCIIOctalDigit(m_current)) {
+ if (parseOctal(tokenData->doubleValue)) {
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ }
+ }
+ FALLTHROUGH;
+ case CharacterNumber:
+ if (LIKELY(token != INTEGER && token != DOUBLE)) {
+ if (!parseDecimal(tokenData->doubleValue)) {
+ token = INTEGER;
+ if (m_current == '.') {
+ shift();
+inNumberAfterDecimalPoint:
+ parseNumberAfterDecimalPoint();
+ token = DOUBLE;
+ }
+ if ((m_current | 0x20) == 'e') {
+ if (!parseNumberAfterExponentIndicator()) {
+ m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
+ token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
+ goto returnError;
+ }
+ }
+ size_t parsedLength;
+ tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
+ if (token == INTEGER)
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ } else
+ token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
+ }
-inStringEscapeSequence:
- shift1();
- if (m_current == 'x') {
- shift1();
- if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
- record16(convertHex(m_current, m_next1));
- shift2();
- goto inString;
+ // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
+ if (UNLIKELY(isIdentStart(m_current))) {
+ m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
+ token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
+ goto returnError;
}
- record16('x');
- if (m_current == stringQuoteCharacter)
- goto doneString;
- goto inString;
- }
- if (m_current == 'u') {
- shift1();
- if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
- record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
- shift4();
- goto inString;
+ m_buffer8.shrink(0);
+ break;
+ case CharacterQuote: {
+ StringParseResult result = StringCannotBeParsed;
+ if (lexerFlags & LexerFlagsDontBuildStrings)
+ result = parseString<false>(tokenData, strictMode);
+ else
+ result = parseString<true>(tokenData, strictMode);
+
+ if (UNLIKELY(result != StringParsedSuccessfully)) {
+ token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
+ goto returnError;
}
- if (m_current == stringQuoteCharacter) {
- record16('u');
- goto doneString;
+ shift();
+ token = STRING;
+ break;
}
- goto returnError;
- }
- if (isASCIIOctalDigit(m_current)) {
- if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
- record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
- shift3();
- goto inString;
+#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
+ case CharacterBackQuote: {
+ // Skip backquote.
+ shift();
+ StringParseResult result = StringCannotBeParsed;
+ if (lexerFlags & LexerFlagsDontBuildStrings)
+ result = parseTemplateLiteral<false>(tokenData, RawStringsBuildMode::BuildRawStrings);
+ else
+ result = parseTemplateLiteral<true>(tokenData, RawStringsBuildMode::BuildRawStrings);
+
+ if (UNLIKELY(result != StringParsedSuccessfully)) {
+ token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
+ goto returnError;
}
- if (isASCIIOctalDigit(m_next1)) {
- record16((m_current - '0') * 8 + m_next1 - '0');
- shift2();
- goto inString;
+ token = TEMPLATE;
+ break;
}
- record16(m_current - '0');
- shift1();
- goto inString;
- }
- if (isLineTerminator(m_current)) {
+#endif
+ case CharacterIdentifierStart:
+ ASSERT(isIdentStart(m_current));
+ FALLTHROUGH;
+ case CharacterBackSlash:
+ parseIdent:
+ if (lexerFlags & LexexFlagsDontBuildKeywords)
+ token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
+ else
+ token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
+ break;
+ case CharacterLineTerminator:
+ ASSERT(isLineTerminator(m_current));
shiftLineTerminator();
- goto inString;
- }
- if (m_current == -1)
+ m_atLineStart = true;
+ m_terminator = true;
+ m_lineStart = m_code;
+ goto start;
+ case CharacterPrivateIdentifierStart:
+ if (m_parsingBuiltinFunction)
+ goto parseIdent;
+
+ FALLTHROUGH;
+ case CharacterInvalid:
+ m_lexErrorMessage = invalidCharacterMessage();
+ token = ERRORTOK;
goto returnError;
- record16(singleEscape(m_current));
- shift1();
- goto inString;
-}
-
-startIdentifierWithBackslash:
- shift1();
- if (UNLIKELY(m_current != 'u'))
- goto returnError;
- shift1();
- if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
- goto returnError;
- token = convertUnicode(m_current, m_next1, m_next2, m_next3);
- if (UNLIKELY(!isIdentStart(token)))
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ m_lexErrorMessage = ASCIILiteral("Internal Error");
+ token = ERRORTOK;
goto returnError;
- goto inIdentifierAfterCharacterCheck;
-
-startIdentifierOrKeyword: {
- const UChar* identifierStart = currentCharacter();
- shift1();
- while (isIdentPart(m_current))
- shift1();
- if (LIKELY(m_current != '\\')) {
- lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
- goto doneIdentifierOrKeyword;
}
- m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
-}
-
- do {
- shift1();
- if (UNLIKELY(m_current != 'u'))
- goto returnError;
- shift1();
- if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
- goto returnError;
- token = convertUnicode(m_current, m_next1, m_next2, m_next3);
- if (UNLIKELY(!isIdentPart(token)))
- goto returnError;
-inIdentifierAfterCharacterCheck:
- record16(token);
- shift4();
- while (isIdentPart(m_current)) {
- record16(m_current);
- shift1();
- }
- } while (UNLIKELY(m_current == '\\'));
- goto doneIdentifier;
+ m_atLineStart = false;
+ goto returnToken;
inSingleLineComment:
while (!isLineTerminator(m_current)) {
- if (UNLIKELY(m_current == -1))
- return 0;
- shift1();
+ if (atEnd())
+ return EOFTOK;
+ shift();
}
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
- if (lastTokenWasRestrKeyword())
- goto doneSemicolon;
- goto start;
-
-inMultiLineComment:
- shift2();
- while (m_current != '*' || m_next1 != '/') {
- if (isLineTerminator(m_current))
- shiftLineTerminator();
- else {
- shift1();
- if (UNLIKELY(m_current == -1))
- goto returnError;
- }
- }
- shift2();
- m_atLineStart = false;
- goto start;
-
-startNumberWithZeroDigit:
- shift1();
- if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
- shift1();
- goto inHex;
- }
- if (m_current == '.') {
- record8('0');
- record8('.');
- shift1();
- goto inNumberAfterDecimalPoint;
- }
- if ((m_current | 0x20) == 'e') {
- record8('0');
- record8('e');
- shift1();
- goto inExponentIndicator;
- }
- if (isASCIIOctalDigit(m_current))
- goto inOctal;
- if (isASCIIDigit(m_current))
- goto startNumber;
- lvalp->doubleValue = 0;
- goto doneNumeric;
-
-inNumberAfterDecimalPoint:
- while (isASCIIDigit(m_current)) {
- record8(m_current);
- shift1();
- }
- if ((m_current | 0x20) == 'e') {
- record8('e');
- shift1();
- goto inExponentIndicator;
- }
- goto doneNumber;
-
-inExponentIndicator:
- if (m_current == '+' || m_current == '-') {
- record8(m_current);
- shift1();
- }
- if (!isASCIIDigit(m_current))
- goto returnError;
- do {
- record8(m_current);
- shift1();
- } while (isASCIIDigit(m_current));
- goto doneNumber;
-
-inOctal: {
- do {
- record8(m_current);
- shift1();
- } while (isASCIIOctalDigit(m_current));
- if (isASCIIDigit(m_current))
- goto startNumber;
-
- double dval = 0;
-
- const char* end = m_buffer8.end();
- for (const char* p = m_buffer8.data(); p < end; ++p) {
- dval *= 8;
- dval += *p - '0';
- }
- if (dval >= mantissaOverflowLowerBound)
- dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
-
- m_buffer8.resize(0);
-
- lvalp->doubleValue = dval;
- goto doneNumeric;
-}
-
-inHex: {
- do {
- record8(m_current);
- shift1();
- } while (isASCIIHexDigit(m_current));
-
- double dval = 0;
-
- const char* end = m_buffer8.end();
- for (const char* p = m_buffer8.data(); p < end; ++p) {
- dval *= 16;
- dval += toASCIIHexValue(*p);
- }
- if (dval >= mantissaOverflowLowerBound)
- dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
-
- m_buffer8.resize(0);
-
- lvalp->doubleValue = dval;
- goto doneNumeric;
-}
-
-startNumber:
- record8(m_current);
- shift1();
- while (isASCIIDigit(m_current)) {
- record8(m_current);
- shift1();
- }
- if (m_current == '.') {
- record8('.');
- shift1();
- goto inNumberAfterDecimalPoint;
- }
- if ((m_current | 0x20) == 'e') {
- record8('e');
- shift1();
- goto inExponentIndicator;
- }
-
- // Fall through into doneNumber.
-
-doneNumber:
- // Null-terminate string for strtod.
- m_buffer8.append('\0');
- lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
- m_buffer8.resize(0);
-
- // Fall through into doneNumeric.
-
-doneNumeric:
- // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
- if (UNLIKELY(isIdentStart(m_current)))
- goto returnError;
-
- m_atLineStart = false;
- m_delimited = false;
- token = NUMBER;
- goto returnToken;
-
-doneSemicolon:
- token = ';';
- m_delimited = true;
- goto returnToken;
-
-doneIdentifier:
- m_atLineStart = false;
- m_delimited = false;
- lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
- m_buffer16.resize(0);
- token = IDENT;
- goto returnToken;
-
-doneIdentifierOrKeyword: {
- m_atLineStart = false;
- m_delimited = false;
- m_buffer16.resize(0);
- const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
- token = entry ? entry->lexerValue() : IDENT;
- goto returnToken;
-}
-
-doneString:
- // Atomize constant strings in case they're later used in property lookup.
- shift1();
- m_atLineStart = false;
- m_delimited = false;
- lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
- m_buffer16.resize(0);
- token = STRING;
+ m_lineStart = m_code;
+ if (!lastTokenWasRestrKeyword())
+ goto start;
+ token = SEMICOLON;
// Fall through into returnToken.
-returnToken: {
- int lineNumber = m_lineNumber;
- llocp->first_line = lineNumber;
- llocp->last_line = lineNumber;
- llocp->first_column = startOffset;
- llocp->last_column = currentOffset();
-
+returnToken:
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->endOffset = currentOffset();
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_endPosition = currentPosition();
m_lastToken = token;
return token;
-}
returnError:
m_error = true;
- return -1;
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->endOffset = currentOffset();
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_endPosition = currentPosition();
+ RELEASE_ASSERT(token & ErrorTokenFlag);
+ return token;
+}
+
+template <typename T>
+static inline void orCharacter(UChar&, UChar);
+
+template <>
+inline void orCharacter<LChar>(UChar&, UChar) { }
+
+template <>
+inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
+{
+ orAccumulator |= character;
}
-bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
+template <typename T>
+bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
{
ASSERT(m_buffer16.isEmpty());
bool lastWasEscape = false;
bool inBrackets = false;
+ UChar charactersOredTogether = 0;
if (patternPrefix) {
ASSERT(!isLineTerminator(patternPrefix));
}
while (true) {
- int current = m_current;
-
- if (isLineTerminator(current) || current == -1) {
- m_buffer16.resize(0);
+ if (isLineTerminator(m_current) || atEnd()) {
+ m_buffer16.shrink(0);
return false;
}
- shift1();
+ T prev = m_current;
+
+ shift();
- if (current == '/' && !lastWasEscape && !inBrackets)
+ if (prev == '/' && !lastWasEscape && !inBrackets)
break;
- record16(current);
+ record16(prev);
+ orCharacter<T>(charactersOredTogether, prev);
if (lastWasEscape) {
lastWasEscape = false;
continue;
}
- switch (current) {
+ switch (prev) {
case '[':
inBrackets = true;
break;
}
}
- pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
- m_buffer16.resize(0);
+ pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
+
+ m_buffer16.shrink(0);
+ charactersOredTogether = 0;
while (isIdentPart(m_current)) {
record16(m_current);
- shift1();
+ orCharacter<T>(charactersOredTogether, m_current);
+ shift();
}
- flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
- m_buffer16.resize(0);
+ flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
+ m_buffer16.shrink(0);
return true;
}
-bool Lexer::skipRegExp()
+template <typename T>
+bool Lexer<T>::skipRegExp()
{
bool lastWasEscape = false;
bool inBrackets = false;
while (true) {
- int current = m_current;
-
- if (isLineTerminator(current) || current == -1)
+ if (isLineTerminator(m_current) || atEnd())
return false;
- shift1();
+ T prev = m_current;
+
+ shift();
- if (current == '/' && !lastWasEscape && !inBrackets)
+ if (prev == '/' && !lastWasEscape && !inBrackets)
break;
if (lastWasEscape) {
continue;
}
- switch (current) {
+ switch (prev) {
case '[':
inBrackets = true;
break;
}
while (isIdentPart(m_current))
- shift1();
+ shift();
return true;
}
-void Lexer::clear()
+#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
+template <typename T>
+JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
{
- m_arena = 0;
- m_codeWithoutBOMs.clear();
+ JSTokenData* tokenData = &tokenRecord->m_data;
+ JSTokenLocation* tokenLocation = &tokenRecord->m_location;
+ ASSERT(!m_error);
+ ASSERT(m_buffer16.isEmpty());
- Vector<char> newBuffer8;
- newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
- m_buffer8.swap(newBuffer8);
+ // Leading closing brace } is already shifted in the previous token scan.
+ // So in this re-scan phase, shift() is not needed here.
+ StringParseResult result = parseTemplateLiteral<true>(tokenData, rawStringsBuildMode);
+ JSTokenType token = ERRORTOK;
+ if (UNLIKELY(result != StringParsedSuccessfully)) {
+ token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
+ m_error = true;
+ } else {
+ token = TEMPLATE;
+ m_lastToken = token;
+ }
- Vector<UChar> newBuffer16;
- newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
- m_buffer16.swap(newBuffer16);
+ // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
+ m_atLineStart = false;
- m_isReparsing = false;
+ // Adjust current tokenLocation data for TemplateString.
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->endOffset = currentOffset();
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_endPosition = currentPosition();
+ return token;
}
+#endif
-SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
+template <typename T>
+void Lexer<T>::clear()
{
- if (m_codeWithoutBOMs.isEmpty())
- return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
-
- const UChar* data = m_source->provider()->data();
+ m_arena = 0;
- ASSERT(openBrace < closeBrace);
+ Vector<LChar> newBuffer8;
+ m_buffer8.swap(newBuffer8);
- int numBOMsBeforeOpenBrace = 0;
- int numBOMsBetweenBraces = 0;
+ Vector<UChar> newBuffer16;
+ m_buffer16.swap(newBuffer16);
- int i;
- for (i = m_source->startOffset(); i < openBrace; ++i)
- numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
- for (; i < closeBrace; ++i)
- numBOMsBetweenBraces += data[i] == byteOrderMark;
+ Vector<UChar> newBufferForRawTemplateString16;
+ m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
- return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
- closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
+ m_isReparsing = false;
}
+// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
+template class Lexer<LChar>;
+template class Lexer<UChar>;
+
} // namespace JSC