]> git.saurik.com Git - apple/javascriptcore.git/blobdiff - parser/Lexer.cpp
JavaScriptCore-7600.1.4.15.12.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
index c2880dc681780bdc9123c7205e3d876fa5e07a1f..9ca761012e9c5b072fa6fba334cd598edbd239a0 100644 (file)
@@ -1,7 +1,9 @@
 /*
  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
+ *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
+ *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
+ *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Library General Public
 #include "config.h"
 #include "Lexer.h"
 
-#include "JSFunction.h"
+#include "JSFunctionInlines.h"
+
+#include "BuiltinNames.h"
 #include "JSGlobalObjectFunctions.h"
+#include "Identifier.h"
 #include "NodeInfo.h"
 #include "Nodes.h"
-#include "dtoa.h"
+#include "JSCInlines.h"
+#include <wtf/dtoa.h>
 #include <ctype.h>
 #include <limits.h>
 #include <string.h>
-#include <wtf/ASCIICType.h>
 #include <wtf/Assertions.h>
-#include <wtf/unicode/Unicode.h>
 
-using namespace WTF;
-using namespace Unicode;
+#include "KeywordLookup.h"
+#include "Lexer.lut.h"
+#include "Parser.h"
 
-// we can't specify the namespace in yacc's C output, so do it here
-using namespace JSC;
+namespace JSC {
 
-#ifndef KDE_USE_FINAL
-#include "Grammar.h"
-#endif
+Keywords::Keywords(VM& vm)
+    : m_vm(vm)
+    , m_keywordTable(JSC::mainTable)
+{
+}
 
-#include "Lookup.h"
-#include "Lexer.lut.h"
+enum CharacterType {
+    // Types for the main switch
+
+    // The first three types are fixed, and also used for identifying
+    // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
+    CharacterIdentifierStart,
+    CharacterZero,
+    CharacterNumber,
+
+    CharacterInvalid,
+    CharacterLineTerminator,
+    CharacterExclamationMark,
+    CharacterOpenParen,
+    CharacterCloseParen,
+    CharacterOpenBracket,
+    CharacterCloseBracket,
+    CharacterComma,
+    CharacterColon,
+    CharacterQuestion,
+    CharacterTilde,
+    CharacterQuote,
+    CharacterDot,
+    CharacterSlash,
+    CharacterBackSlash,
+    CharacterSemicolon,
+    CharacterOpenBrace,
+    CharacterCloseBrace,
+
+    CharacterAdd,
+    CharacterSub,
+    CharacterMultiply,
+    CharacterModulo,
+    CharacterAnd,
+    CharacterXor,
+    CharacterOr,
+    CharacterLess,
+    CharacterGreater,
+    CharacterEqual,
+
+    // Other types (only one so far)
+    CharacterWhiteSpace,
+    CharacterPrivateIdentifierStart
+};
+
+// 256 Latin-1 codes
+static const unsigned short typesOfLatin1Characters[256] = {
+/*   0 - Null               */ CharacterInvalid,
+/*   1 - Start of Heading   */ CharacterInvalid,
+/*   2 - Start of Text      */ CharacterInvalid,
+/*   3 - End of Text        */ CharacterInvalid,
+/*   4 - End of Transm.     */ CharacterInvalid,
+/*   5 - Enquiry            */ CharacterInvalid,
+/*   6 - Acknowledgment     */ CharacterInvalid,
+/*   7 - Bell               */ CharacterInvalid,
+/*   8 - Back Space         */ CharacterInvalid,
+/*   9 - Horizontal Tab     */ CharacterWhiteSpace,
+/*  10 - Line Feed          */ CharacterLineTerminator,
+/*  11 - Vertical Tab       */ CharacterWhiteSpace,
+/*  12 - Form Feed          */ CharacterWhiteSpace,
+/*  13 - Carriage Return    */ CharacterLineTerminator,
+/*  14 - Shift Out          */ CharacterInvalid,
+/*  15 - Shift In           */ CharacterInvalid,
+/*  16 - Data Line Escape   */ CharacterInvalid,
+/*  17 - Device Control 1   */ CharacterInvalid,
+/*  18 - Device Control 2   */ CharacterInvalid,
+/*  19 - Device Control 3   */ CharacterInvalid,
+/*  20 - Device Control 4   */ CharacterInvalid,
+/*  21 - Negative Ack.      */ CharacterInvalid,
+/*  22 - Synchronous Idle   */ CharacterInvalid,
+/*  23 - End of Transmit    */ CharacterInvalid,
+/*  24 - Cancel             */ CharacterInvalid,
+/*  25 - End of Medium      */ CharacterInvalid,
+/*  26 - Substitute         */ CharacterInvalid,
+/*  27 - Escape             */ CharacterInvalid,
+/*  28 - File Separator     */ CharacterInvalid,
+/*  29 - Group Separator    */ CharacterInvalid,
+/*  30 - Record Separator   */ CharacterInvalid,
+/*  31 - Unit Separator     */ CharacterInvalid,
+/*  32 - Space              */ CharacterWhiteSpace,
+/*  33 - !                  */ CharacterExclamationMark,
+/*  34 - "                  */ CharacterQuote,
+/*  35 - #                  */ CharacterInvalid,
+/*  36 - $                  */ CharacterIdentifierStart,
+/*  37 - %                  */ CharacterModulo,
+/*  38 - &                  */ CharacterAnd,
+/*  39 - '                  */ CharacterQuote,
+/*  40 - (                  */ CharacterOpenParen,
+/*  41 - )                  */ CharacterCloseParen,
+/*  42 - *                  */ CharacterMultiply,
+/*  43 - +                  */ CharacterAdd,
+/*  44 - ,                  */ CharacterComma,
+/*  45 - -                  */ CharacterSub,
+/*  46 - .                  */ CharacterDot,
+/*  47 - /                  */ CharacterSlash,
+/*  48 - 0                  */ CharacterZero,
+/*  49 - 1                  */ CharacterNumber,
+/*  50 - 2                  */ CharacterNumber,
+/*  51 - 3                  */ CharacterNumber,
+/*  52 - 4                  */ CharacterNumber,
+/*  53 - 5                  */ CharacterNumber,
+/*  54 - 6                  */ CharacterNumber,
+/*  55 - 7                  */ CharacterNumber,
+/*  56 - 8                  */ CharacterNumber,
+/*  57 - 9                  */ CharacterNumber,
+/*  58 - :                  */ CharacterColon,
+/*  59 - ;                  */ CharacterSemicolon,
+/*  60 - <                  */ CharacterLess,
+/*  61 - =                  */ CharacterEqual,
+/*  62 - >                  */ CharacterGreater,
+/*  63 - ?                  */ CharacterQuestion,
+/*  64 - @                  */ CharacterPrivateIdentifierStart,
+/*  65 - A                  */ CharacterIdentifierStart,
+/*  66 - B                  */ CharacterIdentifierStart,
+/*  67 - C                  */ CharacterIdentifierStart,
+/*  68 - D                  */ CharacterIdentifierStart,
+/*  69 - E                  */ CharacterIdentifierStart,
+/*  70 - F                  */ CharacterIdentifierStart,
+/*  71 - G                  */ CharacterIdentifierStart,
+/*  72 - H                  */ CharacterIdentifierStart,
+/*  73 - I                  */ CharacterIdentifierStart,
+/*  74 - J                  */ CharacterIdentifierStart,
+/*  75 - K                  */ CharacterIdentifierStart,
+/*  76 - L                  */ CharacterIdentifierStart,
+/*  77 - M                  */ CharacterIdentifierStart,
+/*  78 - N                  */ CharacterIdentifierStart,
+/*  79 - O                  */ CharacterIdentifierStart,
+/*  80 - P                  */ CharacterIdentifierStart,
+/*  81 - Q                  */ CharacterIdentifierStart,
+/*  82 - R                  */ CharacterIdentifierStart,
+/*  83 - S                  */ CharacterIdentifierStart,
+/*  84 - T                  */ CharacterIdentifierStart,
+/*  85 - U                  */ CharacterIdentifierStart,
+/*  86 - V                  */ CharacterIdentifierStart,
+/*  87 - W                  */ CharacterIdentifierStart,
+/*  88 - X                  */ CharacterIdentifierStart,
+/*  89 - Y                  */ CharacterIdentifierStart,
+/*  90 - Z                  */ CharacterIdentifierStart,
+/*  91 - [                  */ CharacterOpenBracket,
+/*  92 - \                  */ CharacterBackSlash,
+/*  93 - ]                  */ CharacterCloseBracket,
+/*  94 - ^                  */ CharacterXor,
+/*  95 - _                  */ CharacterIdentifierStart,
+/*  96 - `                  */ CharacterInvalid,
+/*  97 - a                  */ CharacterIdentifierStart,
+/*  98 - b                  */ CharacterIdentifierStart,
+/*  99 - c                  */ CharacterIdentifierStart,
+/* 100 - d                  */ CharacterIdentifierStart,
+/* 101 - e                  */ CharacterIdentifierStart,
+/* 102 - f                  */ CharacterIdentifierStart,
+/* 103 - g                  */ CharacterIdentifierStart,
+/* 104 - h                  */ CharacterIdentifierStart,
+/* 105 - i                  */ CharacterIdentifierStart,
+/* 106 - j                  */ CharacterIdentifierStart,
+/* 107 - k                  */ CharacterIdentifierStart,
+/* 108 - l                  */ CharacterIdentifierStart,
+/* 109 - m                  */ CharacterIdentifierStart,
+/* 110 - n                  */ CharacterIdentifierStart,
+/* 111 - o                  */ CharacterIdentifierStart,
+/* 112 - p                  */ CharacterIdentifierStart,
+/* 113 - q                  */ CharacterIdentifierStart,
+/* 114 - r                  */ CharacterIdentifierStart,
+/* 115 - s                  */ CharacterIdentifierStart,
+/* 116 - t                  */ CharacterIdentifierStart,
+/* 117 - u                  */ CharacterIdentifierStart,
+/* 118 - v                  */ CharacterIdentifierStart,
+/* 119 - w                  */ CharacterIdentifierStart,
+/* 120 - x                  */ CharacterIdentifierStart,
+/* 121 - y                  */ CharacterIdentifierStart,
+/* 122 - z                  */ CharacterIdentifierStart,
+/* 123 - {                  */ CharacterOpenBrace,
+/* 124 - |                  */ CharacterOr,
+/* 125 - }                  */ CharacterCloseBrace,
+/* 126 - ~                  */ CharacterTilde,
+/* 127 - Delete             */ CharacterInvalid,
+/* 128 - Cc category        */ CharacterInvalid,
+/* 129 - Cc category        */ CharacterInvalid,
+/* 130 - Cc category        */ CharacterInvalid,
+/* 131 - Cc category        */ CharacterInvalid,
+/* 132 - Cc category        */ CharacterInvalid,
+/* 133 - Cc category        */ CharacterInvalid,
+/* 134 - Cc category        */ CharacterInvalid,
+/* 135 - Cc category        */ CharacterInvalid,
+/* 136 - Cc category        */ CharacterInvalid,
+/* 137 - Cc category        */ CharacterInvalid,
+/* 138 - Cc category        */ CharacterInvalid,
+/* 139 - Cc category        */ CharacterInvalid,
+/* 140 - Cc category        */ CharacterInvalid,
+/* 141 - Cc category        */ CharacterInvalid,
+/* 142 - Cc category        */ CharacterInvalid,
+/* 143 - Cc category        */ CharacterInvalid,
+/* 144 - Cc category        */ CharacterInvalid,
+/* 145 - Cc category        */ CharacterInvalid,
+/* 146 - Cc category        */ CharacterInvalid,
+/* 147 - Cc category        */ CharacterInvalid,
+/* 148 - Cc category        */ CharacterInvalid,
+/* 149 - Cc category        */ CharacterInvalid,
+/* 150 - Cc category        */ CharacterInvalid,
+/* 151 - Cc category        */ CharacterInvalid,
+/* 152 - Cc category        */ CharacterInvalid,
+/* 153 - Cc category        */ CharacterInvalid,
+/* 154 - Cc category        */ CharacterInvalid,
+/* 155 - Cc category        */ CharacterInvalid,
+/* 156 - Cc category        */ CharacterInvalid,
+/* 157 - Cc category        */ CharacterInvalid,
+/* 158 - Cc category        */ CharacterInvalid,
+/* 159 - Cc category        */ CharacterInvalid,
+/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
+/* 161 - Po category        */ CharacterInvalid,
+/* 162 - Sc category        */ CharacterInvalid,
+/* 163 - Sc category        */ CharacterInvalid,
+/* 164 - Sc category        */ CharacterInvalid,
+/* 165 - Sc category        */ CharacterInvalid,
+/* 166 - So category        */ CharacterInvalid,
+/* 167 - So category        */ CharacterInvalid,
+/* 168 - Sk category        */ CharacterInvalid,
+/* 169 - So category        */ CharacterInvalid,
+/* 170 - Ll category        */ CharacterIdentifierStart,
+/* 171 - Pi category        */ CharacterInvalid,
+/* 172 - Sm category        */ CharacterInvalid,
+/* 173 - Cf category        */ CharacterInvalid,
+/* 174 - So category        */ CharacterInvalid,
+/* 175 - Sk category        */ CharacterInvalid,
+/* 176 - So category        */ CharacterInvalid,
+/* 177 - Sm category        */ CharacterInvalid,
+/* 178 - No category        */ CharacterInvalid,
+/* 179 - No category        */ CharacterInvalid,
+/* 180 - Sk category        */ CharacterInvalid,
+/* 181 - Ll category        */ CharacterIdentifierStart,
+/* 182 - So category        */ CharacterInvalid,
+/* 183 - Po category        */ CharacterInvalid,
+/* 184 - Sk category        */ CharacterInvalid,
+/* 185 - No category        */ CharacterInvalid,
+/* 186 - Ll category        */ CharacterIdentifierStart,
+/* 187 - Pf category        */ CharacterInvalid,
+/* 188 - No category        */ CharacterInvalid,
+/* 189 - No category        */ CharacterInvalid,
+/* 190 - No category        */ CharacterInvalid,
+/* 191 - Po category        */ CharacterInvalid,
+/* 192 - Lu category        */ CharacterIdentifierStart,
+/* 193 - Lu category        */ CharacterIdentifierStart,
+/* 194 - Lu category        */ CharacterIdentifierStart,
+/* 195 - Lu category        */ CharacterIdentifierStart,
+/* 196 - Lu category        */ CharacterIdentifierStart,
+/* 197 - Lu category        */ CharacterIdentifierStart,
+/* 198 - Lu category        */ CharacterIdentifierStart,
+/* 199 - Lu category        */ CharacterIdentifierStart,
+/* 200 - Lu category        */ CharacterIdentifierStart,
+/* 201 - Lu category        */ CharacterIdentifierStart,
+/* 202 - Lu category        */ CharacterIdentifierStart,
+/* 203 - Lu category        */ CharacterIdentifierStart,
+/* 204 - Lu category        */ CharacterIdentifierStart,
+/* 205 - Lu category        */ CharacterIdentifierStart,
+/* 206 - Lu category        */ CharacterIdentifierStart,
+/* 207 - Lu category        */ CharacterIdentifierStart,
+/* 208 - Lu category        */ CharacterIdentifierStart,
+/* 209 - Lu category        */ CharacterIdentifierStart,
+/* 210 - Lu category        */ CharacterIdentifierStart,
+/* 211 - Lu category        */ CharacterIdentifierStart,
+/* 212 - Lu category        */ CharacterIdentifierStart,
+/* 213 - Lu category        */ CharacterIdentifierStart,
+/* 214 - Lu category        */ CharacterIdentifierStart,
+/* 215 - Sm category        */ CharacterInvalid,
+/* 216 - Lu category        */ CharacterIdentifierStart,
+/* 217 - Lu category        */ CharacterIdentifierStart,
+/* 218 - Lu category        */ CharacterIdentifierStart,
+/* 219 - Lu category        */ CharacterIdentifierStart,
+/* 220 - Lu category        */ CharacterIdentifierStart,
+/* 221 - Lu category        */ CharacterIdentifierStart,
+/* 222 - Lu category        */ CharacterIdentifierStart,
+/* 223 - Ll category        */ CharacterIdentifierStart,
+/* 224 - Ll category        */ CharacterIdentifierStart,
+/* 225 - Ll category        */ CharacterIdentifierStart,
+/* 226 - Ll category        */ CharacterIdentifierStart,
+/* 227 - Ll category        */ CharacterIdentifierStart,
+/* 228 - Ll category        */ CharacterIdentifierStart,
+/* 229 - Ll category        */ CharacterIdentifierStart,
+/* 230 - Ll category        */ CharacterIdentifierStart,
+/* 231 - Ll category        */ CharacterIdentifierStart,
+/* 232 - Ll category        */ CharacterIdentifierStart,
+/* 233 - Ll category        */ CharacterIdentifierStart,
+/* 234 - Ll category        */ CharacterIdentifierStart,
+/* 235 - Ll category        */ CharacterIdentifierStart,
+/* 236 - Ll category        */ CharacterIdentifierStart,
+/* 237 - Ll category        */ CharacterIdentifierStart,
+/* 238 - Ll category        */ CharacterIdentifierStart,
+/* 239 - Ll category        */ CharacterIdentifierStart,
+/* 240 - Ll category        */ CharacterIdentifierStart,
+/* 241 - Ll category        */ CharacterIdentifierStart,
+/* 242 - Ll category        */ CharacterIdentifierStart,
+/* 243 - Ll category        */ CharacterIdentifierStart,
+/* 244 - Ll category        */ CharacterIdentifierStart,
+/* 245 - Ll category        */ CharacterIdentifierStart,
+/* 246 - Ll category        */ CharacterIdentifierStart,
+/* 247 - Sm category        */ CharacterInvalid,
+/* 248 - Ll category        */ CharacterIdentifierStart,
+/* 249 - Ll category        */ CharacterIdentifierStart,
+/* 250 - Ll category        */ CharacterIdentifierStart,
+/* 251 - Ll category        */ CharacterIdentifierStart,
+/* 252 - Ll category        */ CharacterIdentifierStart,
+/* 253 - Ll category        */ CharacterIdentifierStart,
+/* 254 - Ll category        */ CharacterIdentifierStart,
+/* 255 - Ll category        */ CharacterIdentifierStart
+};
+
+// This table provides the character that results from \X where X is the index in the table beginning
+// with SPACE. A table value of 0 means that more processing needs to be done.
+static const LChar singleCharacterEscapeValuesForASCII[128] = {
+/*   0 - Null               */ 0,
+/*   1 - Start of Heading   */ 0,
+/*   2 - Start of Text      */ 0,
+/*   3 - End of Text        */ 0,
+/*   4 - End of Transm.     */ 0,
+/*   5 - Enquiry            */ 0,
+/*   6 - Acknowledgment     */ 0,
+/*   7 - Bell               */ 0,
+/*   8 - Back Space         */ 0,
+/*   9 - Horizontal Tab     */ 0,
+/*  10 - Line Feed          */ 0,
+/*  11 - Vertical Tab       */ 0,
+/*  12 - Form Feed          */ 0,
+/*  13 - Carriage Return    */ 0,
+/*  14 - Shift Out          */ 0,
+/*  15 - Shift In           */ 0,
+/*  16 - Data Line Escape   */ 0,
+/*  17 - Device Control 1   */ 0,
+/*  18 - Device Control 2   */ 0,
+/*  19 - Device Control 3   */ 0,
+/*  20 - Device Control 4   */ 0,
+/*  21 - Negative Ack.      */ 0,
+/*  22 - Synchronous Idle   */ 0,
+/*  23 - End of Transmit    */ 0,
+/*  24 - Cancel             */ 0,
+/*  25 - End of Medium      */ 0,
+/*  26 - Substitute         */ 0,
+/*  27 - Escape             */ 0,
+/*  28 - File Separator     */ 0,
+/*  29 - Group Separator    */ 0,
+/*  30 - Record Separator   */ 0,
+/*  31 - Unit Separator     */ 0,
+/*  32 - Space              */ ' ',
+/*  33 - !                  */ '!',
+/*  34 - "                  */ '"',
+/*  35 - #                  */ '#',
+/*  36 - $                  */ '$',
+/*  37 - %                  */ '%',
+/*  38 - &                  */ '&',
+/*  39 - '                  */ '\'',
+/*  40 - (                  */ '(',
+/*  41 - )                  */ ')',
+/*  42 - *                  */ '*',
+/*  43 - +                  */ '+',
+/*  44 - ,                  */ ',',
+/*  45 - -                  */ '-',
+/*  46 - .                  */ '.',
+/*  47 - /                  */ '/',
+/*  48 - 0                  */ 0,
+/*  49 - 1                  */ 0,
+/*  50 - 2                  */ 0,
+/*  51 - 3                  */ 0,
+/*  52 - 4                  */ 0,
+/*  53 - 5                  */ 0,
+/*  54 - 6                  */ 0,
+/*  55 - 7                  */ 0,
+/*  56 - 8                  */ 0,
+/*  57 - 9                  */ 0,
+/*  58 - :                  */ ':',
+/*  59 - ;                  */ ';',
+/*  60 - <                  */ '<',
+/*  61 - =                  */ '=',
+/*  62 - >                  */ '>',
+/*  63 - ?                  */ '?',
+/*  64 - @                  */ '@',
+/*  65 - A                  */ 'A',
+/*  66 - B                  */ 'B',
+/*  67 - C                  */ 'C',
+/*  68 - D                  */ 'D',
+/*  69 - E                  */ 'E',
+/*  70 - F                  */ 'F',
+/*  71 - G                  */ 'G',
+/*  72 - H                  */ 'H',
+/*  73 - I                  */ 'I',
+/*  74 - J                  */ 'J',
+/*  75 - K                  */ 'K',
+/*  76 - L                  */ 'L',
+/*  77 - M                  */ 'M',
+/*  78 - N                  */ 'N',
+/*  79 - O                  */ 'O',
+/*  80 - P                  */ 'P',
+/*  81 - Q                  */ 'Q',
+/*  82 - R                  */ 'R',
+/*  83 - S                  */ 'S',
+/*  84 - T                  */ 'T',
+/*  85 - U                  */ 'U',
+/*  86 - V                  */ 'V',
+/*  87 - W                  */ 'W',
+/*  88 - X                  */ 'X',
+/*  89 - Y                  */ 'Y',
+/*  90 - Z                  */ 'Z',
+/*  91 - [                  */ '[',
+/*  92 - \                  */ '\\',
+/*  93 - ]                  */ ']',
+/*  94 - ^                  */ '^',
+/*  95 - _                  */ '_',
+/*  96 - `                  */ '`',
+/*  97 - a                  */ 'a',
+/*  98 - b                  */ 0x08,
+/*  99 - c                  */ 'c',
+/* 100 - d                  */ 'd',
+/* 101 - e                  */ 'e',
+/* 102 - f                  */ 0x0C,
+/* 103 - g                  */ 'g',
+/* 104 - h                  */ 'h',
+/* 105 - i                  */ 'i',
+/* 106 - j                  */ 'j',
+/* 107 - k                  */ 'k',
+/* 108 - l                  */ 'l',
+/* 109 - m                  */ 'm',
+/* 110 - n                  */ 0x0A,
+/* 111 - o                  */ 'o',
+/* 112 - p                  */ 'p',
+/* 113 - q                  */ 'q',
+/* 114 - r                  */ 0x0D,
+/* 115 - s                  */ 's',
+/* 116 - t                  */ 0x09,
+/* 117 - u                  */ 0,
+/* 118 - v                  */ 0x0B,
+/* 119 - w                  */ 'w',
+/* 120 - x                  */ 0,
+/* 121 - y                  */ 'y',
+/* 122 - z                  */ 'z',
+/* 123 - {                  */ '{',
+/* 124 - |                  */ '|',
+/* 125 - }                  */ '}',
+/* 126 - ~                  */ '~',
+/* 127 - Delete             */ 0
+};
 
-// a bridge for yacc from the C world to C++
-int jscyylex(void* lvalp, void* llocp, void* globalData)
+template <typename T>
+Lexer<T>::Lexer(VM* vm, JSParserStrictness strictness)
+    : m_isReparsing(false)
+    , m_vm(vm)
+    , m_parsingBuiltinFunction(strictness == JSParseBuiltin)
 {
-    return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
 }
 
-namespace JSC {
+template <typename T>
+Lexer<T>::~Lexer()
+{
+}
 
-static bool isDecimalDigit(int);
-
-Lexer::Lexer(JSGlobalData* globalData)
-    : yylineno(1)
-    , m_restrKeyword(false)
-    , m_eatNextIdentifier(false)
-    , m_stackToken(-1)
-    , m_lastToken(-1)
-    , m_position(0)
-    , m_code(0)
-    , m_length(0)
-    , m_isReparsing(false)
-    , m_atLineStart(true)
-    , m_current(0)
-    , m_next1(0)
-    , m_next2(0)
-    , m_next3(0)
-    , m_currentOffset(0)
-    , m_nextOffset1(0)
-    , m_nextOffset2(0)
-    , m_nextOffset3(0)
-    , m_globalData(globalData)
-    , m_mainTable(JSC::mainTable)
+template <typename T>
+String Lexer<T>::invalidCharacterMessage() const
 {
-    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
-    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
+    switch (m_current) {
+    case 0:
+        return "Invalid character: '\\0'";
+    case 10:
+        return "Invalid character: '\\n'";
+    case 11:
+        return "Invalid character: '\\v'";
+    case 13:
+        return "Invalid character: '\\r'";
+    case 35:
+        return "Invalid character: '#'";
+    case 64:
+        return "Invalid character: '@'";
+    case 96:
+        return "Invalid character: '`'";
+    default:
+        return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
+    }
 }
 
-Lexer::~Lexer()
+template <typename T>
+ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
 {
-    m_mainTable.deleteTable();
+    ASSERT(m_code <= m_codeEnd);
+    return m_code;
 }
 
-void Lexer::setCode(const SourceCode& source)
+template <typename T>
+void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
 {
-    yylineno = source.firstLine();
-    m_restrKeyword = false;
-    m_delimited = false;
-    m_eatNextIdentifier = false;
-    m_stackToken = -1;
+    m_arena = &arena->identifierArena();
+    
+    m_lineNumber = source.firstLine();
     m_lastToken = -1;
+    
+    const String& sourceString = source.provider()->source();
+
+    if (!sourceString.isNull())
+        setCodeStart(sourceString.impl());
+    else
+        m_codeStart = 0;
 
-    m_position = source.startOffset();
     m_source = &source;
-    m_code = source.provider()->data();
-    m_length = source.endOffset();
-    m_skipLF = false;
-    m_skipCR = false;
+    m_sourceOffset = source.startOffset();
+    m_codeStartPlusOffset = m_codeStart + source.startOffset();
+    m_code = m_codeStartPlusOffset;
+    m_codeEnd = m_codeStart + source.endOffset();
     m_error = false;
     m_atLineStart = true;
+    m_lineStart = m_code;
+    m_lexErrorMessage = String();
+    
+    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
+    m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
+    
+    if (LIKELY(m_code < m_codeEnd))
+        m_current = *m_code;
+    else
+        m_current = 0;
+    ASSERT(currentOffset() == source.startOffset());
+}
 
-    // read first characters
-    shift(4);
+template <typename T>
+template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
+{
+    m_code += shiftAmount;
+    ASSERT(currentOffset() >= currentLineStartOffset());
+    m_current = *m_code;
 }
 
-void Lexer::shift(unsigned p)
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::shift()
 {
-    // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
-    // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
+    // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
+    m_current = 0;
+    ++m_code;
+    if (LIKELY(m_code < m_codeEnd))
+        m_current = *m_code;
+}
 
-    while (p--) {
-        m_current = m_next1;
-        m_next1 = m_next2;
-        m_next2 = m_next3;
-        m_currentOffset = m_nextOffset1;
-        m_nextOffset1 = m_nextOffset2;
-        m_nextOffset2 = m_nextOffset3;
-        do {
-            if (m_position >= m_length) {
-                m_nextOffset3 = m_position;
-                m_position++;
-                m_next3 = -1;
-                break;
-            }
-            m_nextOffset3 = m_position;
-            m_next3 = m_code[m_position++];
-        } while (m_next3 == 0xFEFF);
-    }
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::atEnd() const
+{
+    ASSERT(!m_current || m_code < m_codeEnd);
+    return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
 }
 
-// called on each new line
-void Lexer::nextLine()
+template <typename T>
+ALWAYS_INLINE T Lexer<T>::peek(int offset) const
 {
-    yylineno++;
-    m_atLineStart = true;
+    ASSERT(offset > 0 && offset < 5);
+    const T* code = m_code + offset;
+    return (code < m_codeEnd) ? *code : 0;
 }
 
-void Lexer::setDone(State s)
+template <typename T>
+typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
 {
-    m_state = s;
-    m_done = true;
+    T char1 = peek(1);
+    T char2 = peek(2);
+    T char3 = peek(3);
+
+    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
+        return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
+
+    int result = convertUnicode(m_current, char1, char2, char3);
+    shift();
+    shift();
+    shift();
+    shift();
+    return UnicodeHexValue(result);
 }
 
-int Lexer::lex(void* p1, void* p2)
+template <typename T>
+void Lexer<T>::shiftLineTerminator()
 {
-    YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
-    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
-    int token = 0;
-    m_state = Start;
-    unsigned short stringType = 0; // either single or double quotes
-    m_buffer8.clear();
-    m_buffer16.clear();
-    m_done = false;
-    m_terminator = false;
-    m_skipLF = false;
-    m_skipCR = false;
-
-    // did we push a token on the stack previously ?
-    // (after an automatic semicolon insertion)
-    if (m_stackToken >= 0) {
-        setDone(Other);
-        token = m_stackToken;
-        m_stackToken = 0;
-    }
-    int startOffset = m_currentOffset;
-    while (!m_done) {
-        if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
-            m_skipLF = false;
-        if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
-            m_skipCR = false;
-        if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
-            m_skipLF = false;
-            m_skipCR = false;
-            shift(1);
-        }
-        switch (m_state) {
-            case Start:
-                startOffset = m_currentOffset;
-                if (isWhiteSpace()) {
-                    // do nothing
-                } else if (m_current == '/' && m_next1 == '/') {
-                    shift(1);
-                    m_state = InSingleLineComment;
-                } else if (m_current == '/' && m_next1 == '*') {
-                    shift(1);
-                    m_state = InMultiLineComment;
-                } else if (m_current == -1) {
-                    if (!m_terminator && !m_delimited && !m_isReparsing) {
-                        // automatic semicolon insertion if program incomplete
-                        token = ';';
-                        m_stackToken = 0;
-                        setDone(Other);
-                    } else
-                        setDone(Eof);
-                } else if (isLineTerminator()) {
-                    nextLine();
-                    m_terminator = true;
-                    if (m_restrKeyword) {
-                        token = ';';
-                        setDone(Other);
-                    }
-                } else if (m_current == '"' || m_current == '\'') {
-                    m_state = InString;
-                    stringType = static_cast<unsigned short>(m_current);
-                } else if (isIdentStart(m_current)) {
-                    record16(m_current);
-                    m_state = InIdentifierOrKeyword;
-                } else if (m_current == '\\')
-                    m_state = InIdentifierStartUnicodeEscapeStart;
-                else if (m_current == '0') {
-                    record8(m_current);
-                    m_state = InNum0;
-                } else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InNum;
-                } else if (m_current == '.' && isDecimalDigit(m_next1)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                    // <!-- marks the beginning of a line comment (for www usage)
-                } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
-                    shift(3);
-                    m_state = InSingleLineComment;
-                    // same for -->
-                } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {
-                    shift(2);
-                    m_state = InSingleLineComment;
-                } else {
-                    token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
-                    if (token != -1)
-                        setDone(Other);
-                    else
-                        setDone(Bad);
-                }
-                break;
-            case InString:
-                if (m_current == stringType) {
-                    shift(1);
-                    setDone(String);
-                } else if (isLineTerminator() || m_current == -1)
-                    setDone(Bad);
-                else if (m_current == '\\')
-                    m_state = InEscapeSequence;
-                else
-                    record16(m_current);
-                break;
-            // Escape Sequences inside of strings
-            case InEscapeSequence:
-                if (isOctalDigit(m_current)) {
-                    if (m_current >= '0' && m_current <= '3' &&
-                        isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
-                        record16(convertOctal(m_current, m_next1, m_next2));
-                        shift(2);
-                        m_state = InString;
-                    } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
-                        record16(convertOctal('0', m_current, m_next1));
-                        shift(1);
-                        m_state = InString;
-                    } else if (isOctalDigit(m_current)) {
-                        record16(convertOctal('0', '0', m_current));
-                        m_state = InString;
-                    } else
-                        setDone(Bad);
-                } else if (m_current == 'x')
-                    m_state = InHexEscape;
-                else if (m_current == 'u')
-                    m_state = InUnicodeEscape;
-                else if (isLineTerminator()) {
-                    nextLine();
-                    m_state = InString;
-                } else {
-                    record16(singleEscape(static_cast<unsigned short>(m_current)));
-                    m_state = InString;
-                }
-                break;
-            case InHexEscape:
-                if (isHexDigit(m_current) && isHexDigit(m_next1)) {
-                    m_state = InString;
-                    record16(convertHex(m_current, m_next1));
-                    shift(1);
-                } else if (m_current == stringType) {
-                    record16('x');
-                    shift(1);
-                    setDone(String);
-                } else {
-                    record16('x');
-                    record16(m_current);
-                    m_state = InString;
-                }
-                break;
-            case InUnicodeEscape:
-                if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
-                    record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
-                    shift(3);
-                    m_state = InString;
-                } else if (m_current == stringType) {
-                    record16('u');
-                    shift(1);
-                    setDone(String);
-                } else
-                    setDone(Bad);
-                break;
-            case InSingleLineComment:
-                if (isLineTerminator()) {
-                    nextLine();
-                    m_terminator = true;
-                    if (m_restrKeyword) {
-                        token = ';';
-                        setDone(Other);
-                    } else
-                        m_state = Start;
-                } else if (m_current == -1)
-                    setDone(Eof);
-                break;
-            case InMultiLineComment:
-                if (m_current == -1)
-                    setDone(Bad);
-                else if (isLineTerminator())
-                    nextLine();
-                else if (m_current == '*' && m_next1 == '/') {
-                    m_state = Start;
-                    shift(1);
-                }
-                break;
-            case InIdentifierOrKeyword:
-            case InIdentifier:
-                if (isIdentPart(m_current))
-                    record16(m_current);
-                else if (m_current == '\\')
-                    m_state = InIdentifierPartUnicodeEscapeStart;
-                else
-                    setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
-                break;
-            case InNum0:
-                if (m_current == 'x' || m_current == 'X') {
-                    record8(m_current);
-                    m_state = InHex;
-                } else if (m_current == '.') {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else if (isOctalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InOctal;
-                } else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else
-                    setDone(Number);
-                break;
-            case InHex:
-                if (isHexDigit(m_current))
-                    record8(m_current);
-                else
-                    setDone(Hex);
-                break;
-            case InOctal:
-                if (isOctalDigit(m_current))
-                    record8(m_current);
-                else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else
-                    setDone(Octal);
-                break;
-            case InNum:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else if (m_current == '.') {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else
-                    setDone(Number);
-                break;
-            case InDecimal:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else
-                    setDone(Number);
-                break;
-            case InExponentIndicator:
-                if (m_current == '+' || m_current == '-')
-                    record8(m_current);
-                else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InExponent;
-                } else
-                    setDone(Bad);
-                break;
-            case InExponent:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else
-                    setDone(Number);
-                break;
-            case InIdentifierStartUnicodeEscapeStart:
-                if (m_current == 'u')
-                    m_state = InIdentifierStartUnicodeEscape;
-                else
-                    setDone(Bad);
-                break;
-            case InIdentifierPartUnicodeEscapeStart:
-                if (m_current == 'u')
-                    m_state = InIdentifierPartUnicodeEscape;
-                else
-                    setDone(Bad);
-                break;
-            case InIdentifierStartUnicodeEscape:
-                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
-                    setDone(Bad);
-                    break;
-                }
-                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
-                shift(3);
-                if (!isIdentStart(token)) {
-                    setDone(Bad);
-                    break;
-                }
-                record16(token);
-                m_state = InIdentifier;
-                break;
-            case InIdentifierPartUnicodeEscape:
-                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
-                    setDone(Bad);
-                    break;
-                }
-                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
-                shift(3);
-                if (!isIdentPart(token)) {
-                    setDone(Bad);
-                    break;
-                }
-                record16(token);
-                m_state = InIdentifier;
-                break;
-            default:
-                ASSERT(!"Unhandled state in switch statement");
-        }
+    ASSERT(isLineTerminator(m_current));
 
-        // move on to the next character
-        if (!m_done)
-            shift(1);
-        if (m_state != Start && m_state != InSingleLineComment)
-            m_atLineStart = false;
-    }
+    m_positionBeforeLastNewline = currentPosition();
+    T prev = m_current;
+    shift();
 
-    // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
-    if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
-        m_state = Bad;
+    // Allow both CRLF and LFCR.
+    if (prev + m_current == '\n' + '\r')
+        shift();
 
-    // terminate string
-    m_buffer8.append('\0');
+    ++m_lineNumber;
+}
 
-#ifdef JSC_DEBUG_LEX
-    fprintf(stderr, "line: %d ", lineNo());
-    fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
-    fprintf(stderr, "%s ", m_buffer8.data());
-#endif
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
+{
+    return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
+}
 
-    double dval = 0;
-    if (m_state == Number)
-        dval = WTF::strtod(m_buffer8.data(), 0L);
-    else if (m_state == Hex) { // scan hex numbers
-        const char* p = m_buffer8.data() + 2;
-        while (char c = *p++) {
-            dval *= 16;
-            dval += convertHex(c);
-        }
+static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
+{
+    return U_GET_GC_MASK(c) & U_GC_L_MASK;
+}
 
-        if (dval >= mantissaOverflowLowerBound)
-            dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
+static ALWAYS_INLINE bool isLatin1(LChar)
+{
+    return true;
+}
 
-        m_state = Number;
-    } else if (m_state == Octal) {   // scan octal number
-        const char* p = m_buffer8.data() + 1;
-        while (char c = *p++) {
-            dval *= 8;
-            dval += c - '0';
-        }
+static ALWAYS_INLINE bool isLatin1(UChar c)
+{
+    return c < 256;
+}
 
-        if (dval >= mantissaOverflowLowerBound)
-            dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
+static inline bool isIdentStart(LChar c)
+{
+    return typesOfLatin1Characters[c] == CharacterIdentifierStart;
+}
 
-        m_state = Number;
-    }
+static inline bool isIdentStart(UChar c)
+{
+    return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
+}
 
-#ifdef JSC_DEBUG_LEX
-    switch (m_state) {
-        case Eof:
-            printf("(EOF)\n");
-            break;
-        case Other:
-            printf("(Other)\n");
-            break;
-        case Identifier:
-            printf("(Identifier)/(Keyword)\n");
-            break;
-        case String:
-            printf("(String)\n");
-            break;
-        case Number:
-            printf("(Number)\n");
-            break;
-        default:
-            printf("(unknown)");
-    }
-#endif
+static NEVER_INLINE bool isNonLatin1IdentPart(int c)
+{
+    return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
+}
 
-    if (m_state != Identifier)
-        m_eatNextIdentifier = false;
-
-    m_restrKeyword = false;
-    m_delimited = false;
-    llocp->first_line = yylineno;
-    llocp->last_line = yylineno;
-    llocp->first_column = startOffset;
-    llocp->last_column = m_currentOffset;
-    switch (m_state) {
-        case Eof:
-            token = 0;
-            break;
-        case Other:
-            if (token == '}' || token == ';')
-                m_delimited = true;
-            break;
-        case Identifier:
-            // Apply anonymous-function hack below (eat the identifier).
-            if (m_eatNextIdentifier) {
-                m_eatNextIdentifier = false;
-                token = lex(lvalp, llocp);
-                break;
-            }
-            lvalp->ident = makeIdentifier(m_buffer16);
-            token = IDENT;
-            break;
-        case IdentifierOrKeyword: {
-            lvalp->ident = makeIdentifier(m_buffer16);
-            const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
-            if (!entry) {
-                // Lookup for keyword failed, means this is an identifier.
-                token = IDENT;
-                break;
-            }
-            token = entry->lexerValue();
-            // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
-            m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
-            if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
-                m_restrKeyword = true;
-            break;
-        }
-        case String:
-            // Atomize constant strings in case they're later used in property lookup.
-            lvalp->ident = makeIdentifier(m_buffer16);
-            token = STRING;
-            break;
-        case Number:
-            lvalp->doubleValue = dval;
-            token = NUMBER;
-            break;
-        case Bad:
-#ifdef JSC_DEBUG_LEX
-            fprintf(stderr, "yylex: ERROR.\n");
-#endif
-            m_error = true;
-            return -1;
-        default:
-            ASSERT(!"unhandled numeration value in switch");
-            m_error = true;
-            return -1;
+static ALWAYS_INLINE bool isIdentPart(LChar c)
+{
+    // Character types are divided into two groups depending on whether they can be part of an
+    // identifier or not. Those whose type value is less or equal than CharacterNumber can be
+    // part of an identifier. (See the CharacterType definition for more details.)
+    return typesOfLatin1Characters[c] <= CharacterNumber;
+}
+
+static ALWAYS_INLINE bool isIdentPart(UChar c)
+{
+    return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
+}
+
+static inline LChar singleEscape(int c)
+{
+    if (c < 128) {
+        ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
+        return singleCharacterEscapeValuesForASCII[c];
     }
-    m_lastToken = token;
-    return token;
+    return 0;
 }
 
-bool Lexer::isWhiteSpace() const
+template <typename T>
+inline void Lexer<T>::record8(int c)
 {
-    return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
+    ASSERT(c >= 0);
+    ASSERT(c <= 0xFF);
+    m_buffer8.append(static_cast<LChar>(c));
 }
 
-bool Lexer::isLineTerminator()
+template <typename T>
+inline void assertCharIsIn8BitRange(T c)
 {
-    bool cr = (m_current == '\r');
-    bool lf = (m_current == '\n');
-    if (cr)
-        m_skipLF = true;
-    else if (lf)
-        m_skipCR = true;
-    return cr || lf || m_current == 0x2028 || m_current == 0x2029;
+    UNUSED_PARAM(c);
+    ASSERT(c >= 0);
+    ASSERT(c <= 0xFF);
 }
 
-bool Lexer::isIdentStart(int c)
+template <>
+inline void assertCharIsIn8BitRange(UChar c)
 {
-    return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
+    UNUSED_PARAM(c);
+    ASSERT(c <= 0xFF);
 }
 
-bool Lexer::isIdentPart(int c)
+template <>
+inline void assertCharIsIn8BitRange(LChar)
 {
-    return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
-                            | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
 }
 
-static bool isDecimalDigit(int c)
+template <typename T>
+inline void Lexer<T>::append8(const T* p, size_t length)
 {
-    return isASCIIDigit(c);
+    size_t currentSize = m_buffer8.size();
+    m_buffer8.grow(currentSize + length);
+    LChar* rawBuffer = m_buffer8.data() + currentSize;
+
+    for (size_t i = 0; i < length; i++) {
+        T c = p[i];
+        assertCharIsIn8BitRange(c);
+        rawBuffer[i] = c;
+    }
 }
 
-bool Lexer::isHexDigit(int c)
+template <typename T>
+inline void Lexer<T>::append16(const LChar* p, size_t length)
 {
-    return isASCIIHexDigit(c); 
+    size_t currentSize = m_buffer16.size();
+    m_buffer16.grow(currentSize + length);
+    UChar* rawBuffer = m_buffer16.data() + currentSize;
+
+    for (size_t i = 0; i < length; i++)
+        rawBuffer[i] = p[i];
 }
 
-bool Lexer::isOctalDigit(int c)
+template <typename T>
+inline void Lexer<T>::record16(T c)
 {
-    return isASCIIOctalDigit(c);
+    m_buffer16.append(c);
 }
 
-int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
+template <typename T>
+inline void Lexer<T>::record16(int c)
 {
-    if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
-        shift(4);
-        return URSHIFTEQUAL;
-    }
-    if (c1 == '=' && c2 == '=' && c3 == '=') {
-        shift(3);
-        return STREQ;
-    }
-    if (c1 == '!' && c2 == '=' && c3 == '=') {
-        shift(3);
-        return STRNEQ;
-    }
-    if (c1 == '>' && c2 == '>' && c3 == '>') {
-        shift(3);
-        return URSHIFT;
-    }
-    if (c1 == '<' && c2 == '<' && c3 == '=') {
-        shift(3);
-        return LSHIFTEQUAL;
-    }
-    if (c1 == '>' && c2 == '>' && c3 == '=') {
-        shift(3);
-        return RSHIFTEQUAL;
-    }
-    if (c1 == '<' && c2 == '=') {
-        shift(2);
-        return LE;
-    }
-    if (c1 == '>' && c2 == '=') {
-        shift(2);
-        return GE;
-    }
-    if (c1 == '!' && c2 == '=') {
-        shift(2);
-        return NE;
-    }
-    if (c1 == '+' && c2 == '+') {
-        shift(2);
-        if (m_terminator)
-            return AUTOPLUSPLUS;
-        return PLUSPLUS;
-    }
-    if (c1 == '-' && c2 == '-') {
-        shift(2);
-        if (m_terminator)
-            return AUTOMINUSMINUS;
-        return MINUSMINUS;
-    }
-    if (c1 == '=' && c2 == '=') {
-        shift(2);
-        return EQEQ;
+    ASSERT(c >= 0);
+    ASSERT(c <= static_cast<int>(USHRT_MAX));
+    m_buffer16.append(static_cast<UChar>(c));
+}
+    
+#if !ASSERT_DISABLED
+bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
+{
+    if (!ident)
+        return true;
+    /* Just block any use of suspicious identifiers.  This is intended to
+     * be used as a safety net while implementing builtins.
+     */
+    if (*ident == vm.propertyNames->builtinNames().callPublicName())
+        return false;
+    if (*ident == vm.propertyNames->builtinNames().applyPublicName())
+        return false;
+    if (*ident == vm.propertyNames->eval)
+        return false;
+    if (*ident == vm.propertyNames->Function)
+        return false;
+    return true;
+}
+#endif
+    
+template <>
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+{
+    const ptrdiff_t remaining = m_codeEnd - m_code;
+    if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
+        JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
+        if (keyword != IDENT) {
+            ASSERT((!shouldCreateIdentifier) || tokenData->ident);
+            return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
+        }
     }
-    if (c1 == '+' && c2 == '=') {
-        shift(2);
-        return PLUSEQUAL;
+    
+    bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+    if (isPrivateName)
+        shift();
+    
+    const LChar* identifierStart = currentSourcePtr();
+    unsigned identifierLineStart = currentLineStartOffset();
+    
+    while (isIdentPart(m_current))
+        shift();
+    
+    if (UNLIKELY(m_current == '\\')) {
+        setOffsetFromSourcePtr(identifierStart, identifierLineStart);
+        return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
     }
-    if (c1 == '-' && c2 == '=') {
-        shift(2);
-        return MINUSEQUAL;
+
+    const Identifier* ident = 0;
+    
+    if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+        int identifierLength = currentSourcePtr() - identifierStart;
+        ident = makeIdentifier(identifierStart, identifierLength);
+        if (m_parsingBuiltinFunction) {
+            if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+                m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+                return ERRORTOK;
+            }
+            if (isPrivateName)
+                ident = m_vm->propertyNames->getPrivateName(*ident);
+            else if (*ident == m_vm->propertyNames->undefinedKeyword)
+                tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+            if (!ident)
+                return INVALID_PRIVATE_NAME_ERRORTOK;
+        }
+        tokenData->ident = ident;
+    } else
+        tokenData->ident = 0;
+
+    if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
+        ASSERT(shouldCreateIdentifier);
+        if (remaining < maxTokenLength) {
+            const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+            ASSERT((remaining < maxTokenLength) || !entry);
+            if (!entry)
+                return IDENT;
+            JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+            return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
+        }
+        return IDENT;
     }
-    if (c1 == '*' && c2 == '=') {
-        shift(2);
-        return MULTEQUAL;
+
+    return IDENT;
+}
+
+template <>
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+{
+    const ptrdiff_t remaining = m_codeEnd - m_code;
+    if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
+        JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
+        if (keyword != IDENT) {
+            ASSERT((!shouldCreateIdentifier) || tokenData->ident);
+            return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
+        }
     }
-    if (c1 == '/' && c2 == '=') {
-        shift(2);
-        return DIVEQUAL;
+    
+    bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
+    if (isPrivateName)
+        shift();
+
+    const UChar* identifierStart = currentSourcePtr();
+    int identifierLineStart = currentLineStartOffset();
+
+    UChar orAllChars = 0;
+    
+    while (isIdentPart(m_current)) {
+        orAllChars |= m_current;
+        shift();
     }
-    if (c1 == '&' && c2 == '=') {
-        shift(2);
-        return ANDEQUAL;
+    
+    if (UNLIKELY(m_current == '\\')) {
+        ASSERT(!isPrivateName);
+        setOffsetFromSourcePtr(identifierStart, identifierLineStart);
+        return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
     }
-    if (c1 == '^' && c2 == '=') {
-        shift(2);
-        return XOREQUAL;
+
+    bool isAll8Bit = false;
+
+    if (!(orAllChars & ~0xff))
+        isAll8Bit = true;
+
+    const Identifier* ident = 0;
+    
+    if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
+        int identifierLength = currentSourcePtr() - identifierStart;
+        if (isAll8Bit)
+            ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
+        else
+            ident = makeIdentifier(identifierStart, identifierLength);
+        if (m_parsingBuiltinFunction) {
+            if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
+                m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
+                return ERRORTOK;
+            }
+            if (isPrivateName)
+                ident = m_vm->propertyNames->getPrivateName(*ident);
+            else if (*ident == m_vm->propertyNames->undefinedKeyword)
+                tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
+            if (!ident)
+                return INVALID_PRIVATE_NAME_ERRORTOK;
+        }
+        tokenData->ident = ident;
+    } else
+        tokenData->ident = 0;
+    
+    if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
+        ASSERT(shouldCreateIdentifier);
+        if (remaining < maxTokenLength) {
+            const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+            ASSERT((remaining < maxTokenLength) || !entry);
+            if (!entry)
+                return IDENT;
+            JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+            return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
+        }
+        return IDENT;
     }
-    if (c1 == '%' && c2 == '=') {
-        shift(2);
-        return MODEQUAL;
+
+    return IDENT;
+}
+
+template <typename T>
+template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
+{
+    const ptrdiff_t remaining = m_codeEnd - m_code;
+    const T* identifierStart = currentSourcePtr();
+    bool bufferRequired = false;
+
+    while (true) {
+        if (LIKELY(isIdentPart(m_current))) {
+            shift();
+            continue;
+        }
+        if (LIKELY(m_current != '\\'))
+            break;
+
+        // \uXXXX unicode characters.
+        bufferRequired = true;
+        if (identifierStart != currentSourcePtr())
+            m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
+        shift();
+        if (UNLIKELY(m_current != 'u'))
+            return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
+        shift();
+        UnicodeHexValue character = parseFourDigitUnicodeHex();
+        if (UNLIKELY(!character.isValid()))
+            return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+        UChar ucharacter = static_cast<UChar>(character.value());
+        if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
+            return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
+        if (shouldCreateIdentifier)
+            record16(ucharacter);
+        identifierStart = currentSourcePtr();
     }
-    if (c1 == '|' && c2 == '=') {
-        shift(2);
-        return OREQUAL;
+
+    int identifierLength;
+    const Identifier* ident = 0;
+    if (shouldCreateIdentifier) {
+        if (!bufferRequired) {
+            identifierLength = currentSourcePtr() - identifierStart;
+            ident = makeIdentifier(identifierStart, identifierLength);
+        } else {
+            if (identifierStart != currentSourcePtr())
+                m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
+            ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+        }
+
+        tokenData->ident = ident;
+    } else
+        tokenData->ident = 0;
+
+    if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
+        ASSERT(shouldCreateIdentifier);
+        // Keywords must not be recognized if there was an \uXXXX in the identifier.
+        if (remaining < maxTokenLength) {
+            const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
+            ASSERT((remaining < maxTokenLength) || !entry);
+            if (!entry)
+                return IDENT;
+            JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
+            return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
+        }
+        return IDENT;
     }
-    if (c1 == '<' && c2 == '<') {
-        shift(2);
-        return LSHIFT;
+
+    m_buffer16.resize(0);
+    return IDENT;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
+{
+    return character < 0xE;
+}
+
+static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
+{
+    return character < 0xE || character > 0xFF;
+}
+
+template <typename T>
+template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
+{
+    int startingOffset = currentOffset();
+    int startingLineStartOffset = currentLineStartOffset();
+    int startingLineNumber = lineNumber();
+    T stringQuoteCharacter = m_current;
+    shift();
+
+    const T* stringStart = currentSourcePtr();
+
+    while (m_current != stringQuoteCharacter) {
+        if (UNLIKELY(m_current == '\\')) {
+            if (stringStart != currentSourcePtr() && shouldBuildStrings)
+                append8(stringStart, currentSourcePtr() - stringStart);
+            shift();
+
+            LChar escape = singleEscape(m_current);
+
+            // Most common escape sequences first
+            if (escape) {
+                if (shouldBuildStrings)
+                    record8(escape);
+                shift();
+            } else if (UNLIKELY(isLineTerminator(m_current)))
+                shiftLineTerminator();
+            else if (m_current == 'x') {
+                shift();
+                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
+                    return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
+                }
+                T prev = m_current;
+                shift();
+                if (shouldBuildStrings)
+                    record8(convertHex(prev, m_current));
+                shift();
+            } else {
+                setOffset(startingOffset, startingLineStartOffset);
+                setLineNumber(startingLineNumber);
+                m_buffer8.resize(0);
+                return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
+            }
+            stringStart = currentSourcePtr();
+            continue;
+        }
+
+        if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
+            setOffset(startingOffset, startingLineStartOffset);
+            setLineNumber(startingLineNumber);
+            m_buffer8.resize(0);
+            return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
+        }
+
+        shift();
     }
-    if (c1 == '>' && c2 == '>') {
-        shift(2);
-        return RSHIFT;
+
+    if (currentSourcePtr() != stringStart && shouldBuildStrings)
+        append8(stringStart, currentSourcePtr() - stringStart);
+    if (shouldBuildStrings) {
+        tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
+        m_buffer8.resize(0);
+    } else
+        tokenData->ident = 0;
+
+    return StringParsedSuccessfully;
+}
+
+template <typename T>
+template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
+{
+    T stringQuoteCharacter = m_current;
+    shift();
+
+    const T* stringStart = currentSourcePtr();
+
+    while (m_current != stringQuoteCharacter) {
+        if (UNLIKELY(m_current == '\\')) {
+            if (stringStart != currentSourcePtr() && shouldBuildStrings)
+                append16(stringStart, currentSourcePtr() - stringStart);
+            shift();
+
+            LChar escape = singleEscape(m_current);
+
+            // Most common escape sequences first
+            if (escape) {
+                if (shouldBuildStrings)
+                    record16(escape);
+                shift();
+            } else if (UNLIKELY(isLineTerminator(m_current)))
+                shiftLineTerminator();
+            else if (m_current == 'x') {
+                shift();
+                if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
+                    m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
+                    return StringCannotBeParsed;
+                }
+                T prev = m_current;
+                shift();
+                if (shouldBuildStrings)
+                    record16(convertHex(prev, m_current));
+                shift();
+            } else if (m_current == 'u') {
+                shift();
+                UnicodeHexValue character = parseFourDigitUnicodeHex();
+                if (character.isValid()) {
+                    if (shouldBuildStrings)
+                        record16(character.value());
+                } else if (m_current == stringQuoteCharacter) {
+                    if (shouldBuildStrings)
+                        record16('u');
+                } else {
+                    m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
+                    return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
+                }
+            } else if (strictMode && isASCIIDigit(m_current)) {
+                // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
+                int character1 = m_current;
+                shift();
+                if (character1 != '0' || isASCIIDigit(m_current)) {
+                    m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
+                    return StringCannotBeParsed;
+                }
+                if (shouldBuildStrings)
+                    record16(0);
+            } else if (!strictMode && isASCIIOctalDigit(m_current)) {
+                // Octal character sequences
+                T character1 = m_current;
+                shift();
+                if (isASCIIOctalDigit(m_current)) {
+                    // Two octal characters
+                    T character2 = m_current;
+                    shift();
+                    if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
+                        shift();
+                    } else {
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 8 + character2 - '0');
+                    }
+                } else {
+                    if (shouldBuildStrings)
+                        record16(character1 - '0');
+                }
+            } else if (!atEnd()) {
+                if (shouldBuildStrings)
+                    record16(m_current);
+                shift();
+            } else {
+                m_lexErrorMessage = "Unterminated string constant";
+                return StringUnterminated;
+            }
+
+            stringStart = currentSourcePtr();
+            continue;
+        }
+        // Fast check for characters that require special handling.
+        // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
+        // as possible, and lets through all common ASCII characters.
+        if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+            // New-line or end of input is not allowed
+            if (atEnd() || isLineTerminator(m_current)) {
+                m_lexErrorMessage = "Unexpected EOF";
+                return atEnd() ? StringUnterminated : StringCannotBeParsed;
+            }
+            // Anything else is just a normal character
+        }
+        shift();
     }
-    if (c1 == '&' && c2 == '&') {
-        shift(2);
-        return AND;
+
+    if (currentSourcePtr() != stringStart && shouldBuildStrings)
+        append16(stringStart, currentSourcePtr() - stringStart);
+    if (shouldBuildStrings)
+        tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    else
+        tokenData->ident = 0;
+
+    m_buffer16.resize(0);
+    return StringParsedSuccessfully;
+}
+
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
+{
+    // Optimization: most hexadecimal values fit into 4 bytes.
+    uint32_t hexValue = 0;
+    int maximumDigits = 7;
+
+    // Shift out the 'x' prefix.
+    shift();
+
+    do {
+        hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
+        shift();
+        --maximumDigits;
+    } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
+
+    if (maximumDigits >= 0) {
+        returnValue = hexValue;
+        return;
     }
-    if (c1 == '|' && c2 == '|') {
-        shift(2);
-        return OR;
+
+    // No more place in the hexValue buffer.
+    // The values are shifted out and placed into the m_buffer8 vector.
+    for (int i = 0; i < 8; ++i) {
+         int digit = hexValue >> 28;
+         if (digit < 10)
+             record8(digit + '0');
+         else
+             record8(digit - 10 + 'a');
+         hexValue <<= 4;
     }
 
-    switch (c1) {
-        case '=':
-        case '>':
-        case '<':
-        case ',':
-        case '!':
-        case '~':
-        case '?':
-        case ':':
-        case '.':
-        case '+':
-        case '-':
-        case '*':
-        case '/':
-        case '&':
-        case '|':
-        case '^':
-        case '%':
-        case '(':
-        case ')':
-        case '[':
-        case ']':
-        case ';':
-            shift(1);
-            return static_cast<int>(c1);
-        case '{':
-            charPos = m_currentOffset;
-            shift(1);
-            return OPENBRACE;
-        case '}':
-            charPos = m_currentOffset;
-            shift(1);
-            return CLOSEBRACE;
-        default:
-            return -1;
+    while (isASCIIHexDigit(m_current)) {
+        record8(m_current);
+        shift();
     }
+
+    returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
 }
 
-unsigned short Lexer::singleEscape(unsigned short c)
-{
-    switch (c) {
-        case 'b':
-            return 0x08;
-        case 't':
-            return 0x09;
-        case 'n':
-            return 0x0A;
-        case 'v':
-            return 0x0B;
-        case 'f':
-            return 0x0C;
-        case 'r':
-            return 0x0D;
-        case '"':
-            return 0x22;
-        case '\'':
-            return 0x27;
-        case '\\':
-            return 0x5C;
-        default:
-            return c;
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
+{
+    // Optimization: most octal values fit into 4 bytes.
+    uint32_t octalValue = 0;
+    int maximumDigits = 9;
+    // Temporary buffer for the digits. Makes easier
+    // to reconstruct the input characters when needed.
+    LChar digits[10];
+
+    do {
+        octalValue = octalValue * 8 + (m_current - '0');
+        digits[maximumDigits] = m_current;
+        shift();
+        --maximumDigits;
+    } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
+
+    if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
+        returnValue = octalValue;
+        return true;
     }
+
+    for (int i = 9; i > maximumDigits; --i)
+         record8(digits[i]);
+
+    while (isASCIIOctalDigit(m_current)) {
+        record8(m_current);
+        shift();
+    }
+
+    if (isASCIIDigit(m_current))
+        return false;
+
+    returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
+    return true;
 }
 
-unsigned short Lexer::convertOctal(int c1, int c2, int c3)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
 {
-    return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
+    // Optimization: most decimal values fit into 4 bytes.
+    uint32_t decimalValue = 0;
+
+    // Since parseOctal may be executed before parseDecimal,
+    // the m_buffer8 may hold ascii digits.
+    if (!m_buffer8.size()) {
+        int maximumDigits = 9;
+        // Temporary buffer for the digits. Makes easier
+        // to reconstruct the input characters when needed.
+        LChar digits[10];
+
+        do {
+            decimalValue = decimalValue * 10 + (m_current - '0');
+            digits[maximumDigits] = m_current;
+            shift();
+            --maximumDigits;
+        } while (isASCIIDigit(m_current) && maximumDigits >= 0);
+
+        if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
+            returnValue = decimalValue;
+            return true;
+        }
+
+        for (int i = 9; i > maximumDigits; --i)
+            record8(digits[i]);
+    }
+
+    while (isASCIIDigit(m_current)) {
+        record8(m_current);
+        shift();
+    }
+
+    return false;
 }
 
-unsigned char Lexer::convertHex(int c)
+template <typename T>
+ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
 {
-    if (c >= '0' && c <= '9')
-        return static_cast<unsigned char>(c - '0');
-    if (c >= 'a' && c <= 'f')
-        return static_cast<unsigned char>(c - 'a' + 10);
-    return static_cast<unsigned char>(c - 'A' + 10);
+    record8('.');
+    while (isASCIIDigit(m_current)) {
+        record8(m_current);
+        shift();
+    }
 }
 
-unsigned char Lexer::convertHex(int c1, int c2)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
 {
-    return ((convertHex(c1) << 4) + convertHex(c2));
+    record8('e');
+    shift();
+    if (m_current == '+' || m_current == '-') {
+        record8(m_current);
+        shift();
+    }
+
+    if (!isASCIIDigit(m_current))
+        return false;
+
+    do {
+        record8(m_current);
+        shift();
+    } while (isASCIIDigit(m_current));
+    return true;
 }
 
-UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
+template <typename T>
+ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
 {
-    unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
-    unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
-    return (highByte << 8 | lowByte);
+    while (true) {
+        while (UNLIKELY(m_current == '*')) {
+            shift();
+            if (m_current == '/') {
+                shift();
+                return true;
+            }
+        }
+
+        if (atEnd())
+            return false;
+
+        if (isLineTerminator(m_current)) {
+            shiftLineTerminator();
+            m_terminator = true;
+        } else
+            shift();
+    }
 }
 
-void Lexer::record8(int c)
+template <typename T>
+bool Lexer<T>::nextTokenIsColon()
 {
-    ASSERT(c >= 0);
-    ASSERT(c <= 0xff);
-    m_buffer8.append(static_cast<char>(c));
+    const T* code = m_code;
+    while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
+        code++;
+    
+    return code < m_codeEnd && *code == ':';
 }
 
-void Lexer::record16(int c)
+template <typename T>
+JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
 {
-    ASSERT(c >= 0);
-    ASSERT(c <= USHRT_MAX);
-    record16(UChar(static_cast<unsigned short>(c)));
+    JSTokenData* tokenData = &tokenRecord->m_data;
+    JSTokenLocation* tokenLocation = &tokenRecord->m_location;
+    ASSERT(!m_error);
+    ASSERT(m_buffer8.isEmpty());
+    ASSERT(m_buffer16.isEmpty());
+
+    JSTokenType token = ERRORTOK;
+    m_terminator = false;
+
+start:
+    while (isWhiteSpace(m_current))
+        shift();
+
+    if (atEnd())
+        return EOFTOK;
+    
+    tokenLocation->startOffset = currentOffset();
+    ASSERT(currentOffset() >= currentLineStartOffset());
+    tokenRecord->m_startPosition = currentPosition();
+
+    CharacterType type;
+    if (LIKELY(isLatin1(m_current)))
+        type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
+    else if (isNonLatin1IdentStart(m_current))
+        type = CharacterIdentifierStart;
+    else if (isLineTerminator(m_current))
+        type = CharacterLineTerminator;
+    else
+        type = CharacterInvalid;
+
+    switch (type) {
+    case CharacterGreater:
+        shift();
+        if (m_current == '>') {
+            shift();
+            if (m_current == '>') {
+                shift();
+                if (m_current == '=') {
+                    shift();
+                    token = URSHIFTEQUAL;
+                    break;
+                }
+                token = URSHIFT;
+                break;
+            }
+            if (m_current == '=') {
+                shift();
+                token = RSHIFTEQUAL;
+                break;
+            }
+            token = RSHIFT;
+            break;
+        }
+        if (m_current == '=') {
+            shift();
+            token = GE;
+            break;
+        }
+        token = GT;
+        break;
+    case CharacterEqual:
+        shift();
+        if (m_current == '=') {
+            shift();
+            if (m_current == '=') {
+                shift();
+                token = STREQ;
+                break;
+            }
+            token = EQEQ;
+            break;
+        }
+        token = EQUAL;
+        break;
+    case CharacterLess:
+        shift();
+        if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
+            // <!-- marks the beginning of a line comment (for www usage)
+            goto inSingleLineComment;
+        }
+        if (m_current == '<') {
+            shift();
+            if (m_current == '=') {
+                shift();
+                token = LSHIFTEQUAL;
+                break;
+            }
+            token = LSHIFT;
+            break;
+        }
+        if (m_current == '=') {
+            shift();
+            token = LE;
+            break;
+        }
+        token = LT;
+        break;
+    case CharacterExclamationMark:
+        shift();
+        if (m_current == '=') {
+            shift();
+            if (m_current == '=') {
+                shift();
+                token = STRNEQ;
+                break;
+            }
+            token = NE;
+            break;
+        }
+        token = EXCLAMATION;
+        break;
+    case CharacterAdd:
+        shift();
+        if (m_current == '+') {
+            shift();
+            token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
+            break;
+        }
+        if (m_current == '=') {
+            shift();
+            token = PLUSEQUAL;
+            break;
+        }
+        token = PLUS;
+        break;
+    case CharacterSub:
+        shift();
+        if (m_current == '-') {
+            shift();
+            if (m_atLineStart && m_current == '>') {
+                shift();
+                goto inSingleLineComment;
+            }
+            token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
+            break;
+        }
+        if (m_current == '=') {
+            shift();
+            token = MINUSEQUAL;
+            break;
+        }
+        token = MINUS;
+        break;
+    case CharacterMultiply:
+        shift();
+        if (m_current == '=') {
+            shift();
+            token = MULTEQUAL;
+            break;
+        }
+        token = TIMES;
+        break;
+    case CharacterSlash:
+        shift();
+        if (m_current == '/') {
+            shift();
+            goto inSingleLineComment;
+        }
+        if (m_current == '*') {
+            shift();
+            if (parseMultilineComment())
+                goto start;
+            m_lexErrorMessage = "Multiline comment was not closed properly";
+            token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
+            goto returnError;
+        }
+        if (m_current == '=') {
+            shift();
+            token = DIVEQUAL;
+            break;
+        }
+        token = DIVIDE;
+        break;
+    case CharacterAnd:
+        shift();
+        if (m_current == '&') {
+            shift();
+            token = AND;
+            break;
+        }
+        if (m_current == '=') {
+            shift();
+            token = ANDEQUAL;
+            break;
+        }
+        token = BITAND;
+        break;
+    case CharacterXor:
+        shift();
+        if (m_current == '=') {
+            shift();
+            token = XOREQUAL;
+            break;
+        }
+        token = BITXOR;
+        break;
+    case CharacterModulo:
+        shift();
+        if (m_current == '=') {
+            shift();
+            token = MODEQUAL;
+            break;
+        }
+        token = MOD;
+        break;
+    case CharacterOr:
+        shift();
+        if (m_current == '=') {
+            shift();
+            token = OREQUAL;
+            break;
+        }
+        if (m_current == '|') {
+            shift();
+            token = OR;
+            break;
+        }
+        token = BITOR;
+        break;
+    case CharacterOpenParen:
+        token = OPENPAREN;
+        shift();
+        break;
+    case CharacterCloseParen:
+        token = CLOSEPAREN;
+        shift();
+        break;
+    case CharacterOpenBracket:
+        token = OPENBRACKET;
+        shift();
+        break;
+    case CharacterCloseBracket:
+        token = CLOSEBRACKET;
+        shift();
+        break;
+    case CharacterComma:
+        token = COMMA;
+        shift();
+        break;
+    case CharacterColon:
+        token = COLON;
+        shift();
+        break;
+    case CharacterQuestion:
+        token = QUESTION;
+        shift();
+        break;
+    case CharacterTilde:
+        token = TILDE;
+        shift();
+        break;
+    case CharacterSemicolon:
+        shift();
+        token = SEMICOLON;
+        break;
+    case CharacterOpenBrace:
+        tokenData->line = lineNumber();
+        tokenData->offset = currentOffset();
+        tokenData->lineStartOffset = currentLineStartOffset();
+        ASSERT(tokenData->offset >= tokenData->lineStartOffset);
+        shift();
+        token = OPENBRACE;
+        break;
+    case CharacterCloseBrace:
+        tokenData->line = lineNumber();
+        tokenData->offset = currentOffset();
+        tokenData->lineStartOffset = currentLineStartOffset();
+        ASSERT(tokenData->offset >= tokenData->lineStartOffset);
+        shift();
+        token = CLOSEBRACE;
+        break;
+    case CharacterDot:
+        shift();
+        if (!isASCIIDigit(m_current)) {
+            if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
+                shift();
+                shift();
+                token = DOTDOTDOT;
+                break;
+            }
+            token = DOT;
+            break;
+        }
+        goto inNumberAfterDecimalPoint;
+    case CharacterZero:
+        shift();
+        if ((m_current | 0x20) == 'x') {
+            if (!isASCIIHexDigit(peek(1))) {
+                m_lexErrorMessage = "No hexadecimal digits after '0x'";
+                token = INVALID_HEX_NUMBER_ERRORTOK;
+                goto returnError;
+            }
+            parseHex(tokenData->doubleValue);
+            if (isIdentStart(m_current)) {
+                m_lexErrorMessage = "No space between hexadecimal literal and identifier";
+                token = INVALID_HEX_NUMBER_ERRORTOK;
+                goto returnError;
+            }
+            token = NUMBER;
+            m_buffer8.resize(0);
+            break;
+        }
+
+        record8('0');
+        if (isASCIIOctalDigit(m_current)) {
+            if (parseOctal(tokenData->doubleValue)) {
+                if (strictMode) {
+                    m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
+                    token = INVALID_OCTAL_NUMBER_ERRORTOK;
+                    goto returnError;
+                }
+                token = NUMBER;
+            }
+        }
+        FALLTHROUGH;
+    case CharacterNumber:
+        if (LIKELY(token != NUMBER)) {
+            if (!parseDecimal(tokenData->doubleValue)) {
+                if (m_current == '.') {
+                    shift();
+inNumberAfterDecimalPoint:
+                    parseNumberAfterDecimalPoint();
+                }
+                if ((m_current | 0x20) == 'e') {
+                    if (!parseNumberAfterExponentIndicator()) {
+                        m_lexErrorMessage = "Non-number found after exponent indicator";
+                        token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
+                        goto returnError;
+                    }
+                }
+                size_t parsedLength;
+                tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
+            }
+            token = NUMBER;
+        }
+
+        // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
+        if (UNLIKELY(isIdentStart(m_current))) {
+            m_lexErrorMessage = "At least one digit must occur after a decimal point";
+            token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
+            goto returnError;
+        }
+        m_buffer8.resize(0);
+        break;
+    case CharacterQuote:
+        if (lexerFlags & LexerFlagsDontBuildStrings) {
+            StringParseResult result = parseString<false>(tokenData, strictMode);
+            if (UNLIKELY(result != StringParsedSuccessfully)) {
+                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
+                goto returnError;
+            }
+        } else {
+            StringParseResult result = parseString<true>(tokenData, strictMode);
+            if (UNLIKELY(result != StringParsedSuccessfully)) {
+                token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
+                goto returnError;
+            }
+        }
+        shift();
+        token = STRING;
+        break;
+    case CharacterIdentifierStart:
+        ASSERT(isIdentStart(m_current));
+        FALLTHROUGH;
+    case CharacterBackSlash:
+        parseIdent:
+        if (lexerFlags & LexexFlagsDontBuildKeywords)
+            token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
+        else
+            token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
+        break;
+    case CharacterLineTerminator:
+        ASSERT(isLineTerminator(m_current));
+        shiftLineTerminator();
+        m_atLineStart = true;
+        m_terminator = true;
+        m_lineStart = m_code;
+        goto start;
+    case CharacterPrivateIdentifierStart:
+        if (m_parsingBuiltinFunction)
+            goto parseIdent;
+
+        FALLTHROUGH;
+    case CharacterInvalid:
+        m_lexErrorMessage = invalidCharacterMessage();
+        token = ERRORTOK;
+        goto returnError;
+    default:
+        RELEASE_ASSERT_NOT_REACHED();
+        m_lexErrorMessage = "Internal Error";
+        token = ERRORTOK;
+        goto returnError;
+    }
+
+    m_atLineStart = false;
+    goto returnToken;
+
+inSingleLineComment:
+    while (!isLineTerminator(m_current)) {
+        if (atEnd())
+            return EOFTOK;
+        shift();
+    }
+    shiftLineTerminator();
+    m_atLineStart = true;
+    m_terminator = true;
+    m_lineStart = m_code;
+    if (!lastTokenWasRestrKeyword())
+        goto start;
+
+    token = SEMICOLON;
+    // Fall through into returnToken.
+
+returnToken:
+    tokenLocation->line = m_lineNumber;
+    tokenLocation->endOffset = currentOffset();
+    tokenLocation->lineStartOffset = currentLineStartOffset();
+    ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+    tokenRecord->m_endPosition = currentPosition();
+    m_lastToken = token;
+    return token;
+
+returnError:
+    m_error = true;
+    tokenLocation->line = m_lineNumber;
+    tokenLocation->endOffset = currentOffset();
+    tokenLocation->lineStartOffset = currentLineStartOffset();
+    ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
+    tokenRecord->m_endPosition = currentPosition();
+    RELEASE_ASSERT(token & ErrorTokenFlag);
+    return token;
 }
 
-void Lexer::record16(UChar c)
+template <typename T>
+static inline void orCharacter(UChar&, UChar);
+
+template <>
+inline void orCharacter<LChar>(UChar&, UChar) { }
+
+template <>
+inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
 {
-    m_buffer16.append(c);
+    orAccumulator |= character;
 }
 
-bool Lexer::scanRegExp()
+template <typename T>
+bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
 {
-    m_buffer16.clear();
+    ASSERT(m_buffer16.isEmpty());
+
     bool lastWasEscape = false;
     bool inBrackets = false;
+    UChar charactersOredTogether = 0;
 
-    while (1) {
-        if (isLineTerminator() || m_current == -1)
+    if (patternPrefix) {
+        ASSERT(!isLineTerminator(patternPrefix));
+        ASSERT(patternPrefix != '/');
+        ASSERT(patternPrefix != '[');
+        record16(patternPrefix);
+    }
+
+    while (true) {
+        if (isLineTerminator(m_current) || atEnd()) {
+            m_buffer16.resize(0);
             return false;
-        else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
-            // keep track of '[' and ']'
-            if (!lastWasEscape) {
-                if ( m_current == '[' && !inBrackets )
-                    inBrackets = true;
-                if ( m_current == ']' && inBrackets )
-                    inBrackets = false;
-            }
-            record16(m_current);
-            lastWasEscape =
-            !lastWasEscape && (m_current == '\\');
-        } else { // end of regexp
-            m_pattern = UString(m_buffer16);
-            m_buffer16.clear();
-            shift(1);
+        }
+
+        T prev = m_current;
+        
+        shift();
+
+        if (prev == '/' && !lastWasEscape && !inBrackets)
+            break;
+
+        record16(prev);
+        orCharacter<T>(charactersOredTogether, prev);
+
+        if (lastWasEscape) {
+            lastWasEscape = false;
+            continue;
+        }
+
+        switch (prev) {
+        case '[':
+            inBrackets = true;
+            break;
+        case ']':
+            inBrackets = false;
+            break;
+        case '\\':
+            lastWasEscape = true;
             break;
         }
-        shift(1);
     }
 
+    pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
+
+    m_buffer16.resize(0);
+    charactersOredTogether = 0;
+
     while (isIdentPart(m_current)) {
         record16(m_current);
-        shift(1);
+        orCharacter<T>(charactersOredTogether, m_current);
+        shift();
     }
-    m_flags = UString(m_buffer16);
+
+    flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
+    m_buffer16.resize(0);
 
     return true;
 }
 
-void Lexer::clear()
+template <typename T>
+bool Lexer<T>::skipRegExp()
 {
-    m_identifiers.clear();
+    bool lastWasEscape = false;
+    bool inBrackets = false;
+
+    while (true) {
+        if (isLineTerminator(m_current) || atEnd())
+            return false;
+
+        T prev = m_current;
+        
+        shift();
 
-    Vector<char> newBuffer8;
-    newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
+        if (prev == '/' && !lastWasEscape && !inBrackets)
+            break;
+
+        if (lastWasEscape) {
+            lastWasEscape = false;
+            continue;
+        }
+
+        switch (prev) {
+        case '[':
+            inBrackets = true;
+            break;
+        case ']':
+            inBrackets = false;
+            break;
+        case '\\':
+            lastWasEscape = true;
+            break;
+        }
+    }
+
+    while (isIdentPart(m_current))
+        shift();
+
+    return true;
+}
+
+template <typename T>
+void Lexer<T>::clear()
+{
+    m_arena = 0;
+
+    Vector<LChar> newBuffer8;
     m_buffer8.swap(newBuffer8);
 
     Vector<UChar> newBuffer16;
-    newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer16.swap(newBuffer16);
 
     m_isReparsing = false;
-
-    m_pattern = 0;
-    m_flags = 0;
 }
 
+// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
+template class Lexer<LChar>;
+template class Lexer<UChar>;
+
 } // namespace JSC