parser/Lexer.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
   6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
   7  *
   8  *  This library is free software; you can redistribute it and/or
   9  *  modify it under the terms of the GNU Library General Public
  10  *  License as published by the Free Software Foundation; either
  11  *  version 2 of the License, or (at your option) any later version.
  12  *
  13  *  This library is distributed in the hope that it will be useful,
  14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  *  Library General Public License for more details.
  17  *
  18  *  You should have received a copy of the GNU Library General Public License
  19  *  along with this library; see the file COPYING.LIB.  If not, write to
  20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21  *  Boston, MA 02110-1301, USA.
  22  *
  23  */
  24
  25 #include "config.h"
  26 #include "Lexer.h"
  27
  28 #include "JSFunctionInlines.h"
  29
  30 #include "BuiltinNames.h"
  31 #include "JSGlobalObjectFunctions.h"
  32 #include "Identifier.h"
  33 #include "Nodes.h"
  34 #include "JSCInlines.h"
  35 #include <wtf/dtoa.h>
  36 #include <ctype.h>
  37 #include <limits.h>
  38 #include <string.h>
  39 #include <wtf/Assertions.h>
  40
  41 #include "KeywordLookup.h"
  42 #include "Lexer.lut.h"
  43 #include "Parser.h"
  44
  45 namespace JSC {
  46
  47 Keywords::Keywords(VM& vm)
  48     : m_vm(vm)
  49     , m_keywordTable(JSC::mainTable)
  50 {
  51 }
  52
  53 enum CharacterType {
  54     // Types for the main switch
  55
  56     // The first three types are fixed, and also used for identifying
  57     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
  58     CharacterIdentifierStart,
  59     CharacterZero,
  60     CharacterNumber,
  61
  62     CharacterInvalid,
  63     CharacterLineTerminator,
  64     CharacterExclamationMark,
  65     CharacterOpenParen,
  66     CharacterCloseParen,
  67     CharacterOpenBracket,
  68     CharacterCloseBracket,
  69     CharacterComma,
  70     CharacterColon,
  71     CharacterQuestion,
  72     CharacterTilde,
  73     CharacterQuote,
  74     CharacterBackQuote,
  75     CharacterDot,
  76     CharacterSlash,
  77     CharacterBackSlash,
  78     CharacterSemicolon,
  79     CharacterOpenBrace,
  80     CharacterCloseBrace,
  81
  82     CharacterAdd,
  83     CharacterSub,
  84     CharacterMultiply,
  85     CharacterModulo,
  86     CharacterAnd,
  87     CharacterXor,
  88     CharacterOr,
  89     CharacterLess,
  90     CharacterGreater,
  91     CharacterEqual,
  92
  93     // Other types (only one so far)
  94     CharacterWhiteSpace,
  95     CharacterPrivateIdentifierStart
  96 };
  97
  98 // 256 Latin-1 codes
  99 static const unsigned short typesOfLatin1Characters[256] = {
 100 /*   0 - Null               */ CharacterInvalid,
 101 /*   1 - Start of Heading   */ CharacterInvalid,
 102 /*   2 - Start of Text      */ CharacterInvalid,
 103 /*   3 - End of Text        */ CharacterInvalid,
 104 /*   4 - End of Transm.     */ CharacterInvalid,
 105 /*   5 - Enquiry            */ CharacterInvalid,
 106 /*   6 - Acknowledgment     */ CharacterInvalid,
 107 /*   7 - Bell               */ CharacterInvalid,
 108 /*   8 - Back Space         */ CharacterInvalid,
 109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
 110 /*  10 - Line Feed          */ CharacterLineTerminator,
 111 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
 112 /*  12 - Form Feed          */ CharacterWhiteSpace,
 113 /*  13 - Carriage Return    */ CharacterLineTerminator,
 114 /*  14 - Shift Out          */ CharacterInvalid,
 115 /*  15 - Shift In           */ CharacterInvalid,
 116 /*  16 - Data Line Escape   */ CharacterInvalid,
 117 /*  17 - Device Control 1   */ CharacterInvalid,
 118 /*  18 - Device Control 2   */ CharacterInvalid,
 119 /*  19 - Device Control 3   */ CharacterInvalid,
 120 /*  20 - Device Control 4   */ CharacterInvalid,
 121 /*  21 - Negative Ack.      */ CharacterInvalid,
 122 /*  22 - Synchronous Idle   */ CharacterInvalid,
 123 /*  23 - End of Transmit    */ CharacterInvalid,
 124 /*  24 - Cancel             */ CharacterInvalid,
 125 /*  25 - End of Medium      */ CharacterInvalid,
 126 /*  26 - Substitute         */ CharacterInvalid,
 127 /*  27 - Escape             */ CharacterInvalid,
 128 /*  28 - File Separator     */ CharacterInvalid,
 129 /*  29 - Group Separator    */ CharacterInvalid,
 130 /*  30 - Record Separator   */ CharacterInvalid,
 131 /*  31 - Unit Separator     */ CharacterInvalid,
 132 /*  32 - Space              */ CharacterWhiteSpace,
 133 /*  33 - !                  */ CharacterExclamationMark,
 134 /*  34 - "                  */ CharacterQuote,
 135 /*  35 - #                  */ CharacterInvalid,
 136 /*  36 - $                  */ CharacterIdentifierStart,
 137 /*  37 - %                  */ CharacterModulo,
 138 /*  38 - &                  */ CharacterAnd,
 139 /*  39 - '                  */ CharacterQuote,
 140 /*  40 - (                  */ CharacterOpenParen,
 141 /*  41 - )                  */ CharacterCloseParen,
 142 /*  42 - *                  */ CharacterMultiply,
 143 /*  43 - +                  */ CharacterAdd,
 144 /*  44 - ,                  */ CharacterComma,
 145 /*  45 - -                  */ CharacterSub,
 146 /*  46 - .                  */ CharacterDot,
 147 /*  47 - /                  */ CharacterSlash,
 148 /*  48 - 0                  */ CharacterZero,
 149 /*  49 - 1                  */ CharacterNumber,
 150 /*  50 - 2                  */ CharacterNumber,
 151 /*  51 - 3                  */ CharacterNumber,
 152 /*  52 - 4                  */ CharacterNumber,
 153 /*  53 - 5                  */ CharacterNumber,
 154 /*  54 - 6                  */ CharacterNumber,
 155 /*  55 - 7                  */ CharacterNumber,
 156 /*  56 - 8                  */ CharacterNumber,
 157 /*  57 - 9                  */ CharacterNumber,
 158 /*  58 - :                  */ CharacterColon,
 159 /*  59 - ;                  */ CharacterSemicolon,
 160 /*  60 - <                  */ CharacterLess,
 161 /*  61 - =                  */ CharacterEqual,
 162 /*  62 - >                  */ CharacterGreater,
 163 /*  63 - ?                  */ CharacterQuestion,
 164 /*  64 - @                  */ CharacterPrivateIdentifierStart,
 165 /*  65 - A                  */ CharacterIdentifierStart,
 166 /*  66 - B                  */ CharacterIdentifierStart,
 167 /*  67 - C                  */ CharacterIdentifierStart,
 168 /*  68 - D                  */ CharacterIdentifierStart,
 169 /*  69 - E                  */ CharacterIdentifierStart,
 170 /*  70 - F                  */ CharacterIdentifierStart,
 171 /*  71 - G                  */ CharacterIdentifierStart,
 172 /*  72 - H                  */ CharacterIdentifierStart,
 173 /*  73 - I                  */ CharacterIdentifierStart,
 174 /*  74 - J                  */ CharacterIdentifierStart,
 175 /*  75 - K                  */ CharacterIdentifierStart,
 176 /*  76 - L                  */ CharacterIdentifierStart,
 177 /*  77 - M                  */ CharacterIdentifierStart,
 178 /*  78 - N                  */ CharacterIdentifierStart,
 179 /*  79 - O                  */ CharacterIdentifierStart,
 180 /*  80 - P                  */ CharacterIdentifierStart,
 181 /*  81 - Q                  */ CharacterIdentifierStart,
 182 /*  82 - R                  */ CharacterIdentifierStart,
 183 /*  83 - S                  */ CharacterIdentifierStart,
 184 /*  84 - T                  */ CharacterIdentifierStart,
 185 /*  85 - U                  */ CharacterIdentifierStart,
 186 /*  86 - V                  */ CharacterIdentifierStart,
 187 /*  87 - W                  */ CharacterIdentifierStart,
 188 /*  88 - X                  */ CharacterIdentifierStart,
 189 /*  89 - Y                  */ CharacterIdentifierStart,
 190 /*  90 - Z                  */ CharacterIdentifierStart,
 191 /*  91 - [                  */ CharacterOpenBracket,
 192 /*  92 - \                  */ CharacterBackSlash,
 193 /*  93 - ]                  */ CharacterCloseBracket,
 194 /*  94 - ^                  */ CharacterXor,
 195 /*  95 - _                  */ CharacterIdentifierStart,
 196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
 197 /*  96 - `                  */ CharacterBackQuote,
 198 #else
 199 /*  96 - `                  */ CharacterInvalid,
 200 #endif
 201 /*  97 - a                  */ CharacterIdentifierStart,
 202 /*  98 - b                  */ CharacterIdentifierStart,
 203 /*  99 - c                  */ CharacterIdentifierStart,
 204 /* 100 - d                  */ CharacterIdentifierStart,
 205 /* 101 - e                  */ CharacterIdentifierStart,
 206 /* 102 - f                  */ CharacterIdentifierStart,
 207 /* 103 - g                  */ CharacterIdentifierStart,
 208 /* 104 - h                  */ CharacterIdentifierStart,
 209 /* 105 - i                  */ CharacterIdentifierStart,
 210 /* 106 - j                  */ CharacterIdentifierStart,
 211 /* 107 - k                  */ CharacterIdentifierStart,
 212 /* 108 - l                  */ CharacterIdentifierStart,
 213 /* 109 - m                  */ CharacterIdentifierStart,
 214 /* 110 - n                  */ CharacterIdentifierStart,
 215 /* 111 - o                  */ CharacterIdentifierStart,
 216 /* 112 - p                  */ CharacterIdentifierStart,
 217 /* 113 - q                  */ CharacterIdentifierStart,
 218 /* 114 - r                  */ CharacterIdentifierStart,
 219 /* 115 - s                  */ CharacterIdentifierStart,
 220 /* 116 - t                  */ CharacterIdentifierStart,
 221 /* 117 - u                  */ CharacterIdentifierStart,
 222 /* 118 - v                  */ CharacterIdentifierStart,
 223 /* 119 - w                  */ CharacterIdentifierStart,
 224 /* 120 - x                  */ CharacterIdentifierStart,
 225 /* 121 - y                  */ CharacterIdentifierStart,
 226 /* 122 - z                  */ CharacterIdentifierStart,
 227 /* 123 - {                  */ CharacterOpenBrace,
 228 /* 124 - |                  */ CharacterOr,
 229 /* 125 - }                  */ CharacterCloseBrace,
 230 /* 126 - ~                  */ CharacterTilde,
 231 /* 127 - Delete             */ CharacterInvalid,
 232 /* 128 - Cc category        */ CharacterInvalid,
 233 /* 129 - Cc category        */ CharacterInvalid,
 234 /* 130 - Cc category        */ CharacterInvalid,
 235 /* 131 - Cc category        */ CharacterInvalid,
 236 /* 132 - Cc category        */ CharacterInvalid,
 237 /* 133 - Cc category        */ CharacterInvalid,
 238 /* 134 - Cc category        */ CharacterInvalid,
 239 /* 135 - Cc category        */ CharacterInvalid,
 240 /* 136 - Cc category        */ CharacterInvalid,
 241 /* 137 - Cc category        */ CharacterInvalid,
 242 /* 138 - Cc category        */ CharacterInvalid,
 243 /* 139 - Cc category        */ CharacterInvalid,
 244 /* 140 - Cc category        */ CharacterInvalid,
 245 /* 141 - Cc category        */ CharacterInvalid,
 246 /* 142 - Cc category        */ CharacterInvalid,
 247 /* 143 - Cc category        */ CharacterInvalid,
 248 /* 144 - Cc category        */ CharacterInvalid,
 249 /* 145 - Cc category        */ CharacterInvalid,
 250 /* 146 - Cc category        */ CharacterInvalid,
 251 /* 147 - Cc category        */ CharacterInvalid,
 252 /* 148 - Cc category        */ CharacterInvalid,
 253 /* 149 - Cc category        */ CharacterInvalid,
 254 /* 150 - Cc category        */ CharacterInvalid,
 255 /* 151 - Cc category        */ CharacterInvalid,
 256 /* 152 - Cc category        */ CharacterInvalid,
 257 /* 153 - Cc category        */ CharacterInvalid,
 258 /* 154 - Cc category        */ CharacterInvalid,
 259 /* 155 - Cc category        */ CharacterInvalid,
 260 /* 156 - Cc category        */ CharacterInvalid,
 261 /* 157 - Cc category        */ CharacterInvalid,
 262 /* 158 - Cc category        */ CharacterInvalid,
 263 /* 159 - Cc category        */ CharacterInvalid,
 264 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
 265 /* 161 - Po category        */ CharacterInvalid,
 266 /* 162 - Sc category        */ CharacterInvalid,
 267 /* 163 - Sc category        */ CharacterInvalid,
 268 /* 164 - Sc category        */ CharacterInvalid,
 269 /* 165 - Sc category        */ CharacterInvalid,
 270 /* 166 - So category        */ CharacterInvalid,
 271 /* 167 - So category        */ CharacterInvalid,
 272 /* 168 - Sk category        */ CharacterInvalid,
 273 /* 169 - So category        */ CharacterInvalid,
 274 /* 170 - Ll category        */ CharacterIdentifierStart,
 275 /* 171 - Pi category        */ CharacterInvalid,
 276 /* 172 - Sm category        */ CharacterInvalid,
 277 /* 173 - Cf category        */ CharacterInvalid,
 278 /* 174 - So category        */ CharacterInvalid,
 279 /* 175 - Sk category        */ CharacterInvalid,
 280 /* 176 - So category        */ CharacterInvalid,
 281 /* 177 - Sm category        */ CharacterInvalid,
 282 /* 178 - No category        */ CharacterInvalid,
 283 /* 179 - No category        */ CharacterInvalid,
 284 /* 180 - Sk category        */ CharacterInvalid,
 285 /* 181 - Ll category        */ CharacterIdentifierStart,
 286 /* 182 - So category        */ CharacterInvalid,
 287 /* 183 - Po category        */ CharacterInvalid,
 288 /* 184 - Sk category        */ CharacterInvalid,
 289 /* 185 - No category        */ CharacterInvalid,
 290 /* 186 - Ll category        */ CharacterIdentifierStart,
 291 /* 187 - Pf category        */ CharacterInvalid,
 292 /* 188 - No category        */ CharacterInvalid,
 293 /* 189 - No category        */ CharacterInvalid,
 294 /* 190 - No category        */ CharacterInvalid,
 295 /* 191 - Po category        */ CharacterInvalid,
 296 /* 192 - Lu category        */ CharacterIdentifierStart,
 297 /* 193 - Lu category        */ CharacterIdentifierStart,
 298 /* 194 - Lu category        */ CharacterIdentifierStart,
 299 /* 195 - Lu category        */ CharacterIdentifierStart,
 300 /* 196 - Lu category        */ CharacterIdentifierStart,
 301 /* 197 - Lu category        */ CharacterIdentifierStart,
 302 /* 198 - Lu category        */ CharacterIdentifierStart,
 303 /* 199 - Lu category        */ CharacterIdentifierStart,
 304 /* 200 - Lu category        */ CharacterIdentifierStart,
 305 /* 201 - Lu category        */ CharacterIdentifierStart,
 306 /* 202 - Lu category        */ CharacterIdentifierStart,
 307 /* 203 - Lu category        */ CharacterIdentifierStart,
 308 /* 204 - Lu category        */ CharacterIdentifierStart,
 309 /* 205 - Lu category        */ CharacterIdentifierStart,
 310 /* 206 - Lu category        */ CharacterIdentifierStart,
 311 /* 207 - Lu category        */ CharacterIdentifierStart,
 312 /* 208 - Lu category        */ CharacterIdentifierStart,
 313 /* 209 - Lu category        */ CharacterIdentifierStart,
 314 /* 210 - Lu category        */ CharacterIdentifierStart,
 315 /* 211 - Lu category        */ CharacterIdentifierStart,
 316 /* 212 - Lu category        */ CharacterIdentifierStart,
 317 /* 213 - Lu category        */ CharacterIdentifierStart,
 318 /* 214 - Lu category        */ CharacterIdentifierStart,
 319 /* 215 - Sm category        */ CharacterInvalid,
 320 /* 216 - Lu category        */ CharacterIdentifierStart,
 321 /* 217 - Lu category        */ CharacterIdentifierStart,
 322 /* 218 - Lu category        */ CharacterIdentifierStart,
 323 /* 219 - Lu category        */ CharacterIdentifierStart,
 324 /* 220 - Lu category        */ CharacterIdentifierStart,
 325 /* 221 - Lu category        */ CharacterIdentifierStart,
 326 /* 222 - Lu category        */ CharacterIdentifierStart,
 327 /* 223 - Ll category        */ CharacterIdentifierStart,
 328 /* 224 - Ll category        */ CharacterIdentifierStart,
 329 /* 225 - Ll category        */ CharacterIdentifierStart,
 330 /* 226 - Ll category        */ CharacterIdentifierStart,
 331 /* 227 - Ll category        */ CharacterIdentifierStart,
 332 /* 228 - Ll category        */ CharacterIdentifierStart,
 333 /* 229 - Ll category        */ CharacterIdentifierStart,
 334 /* 230 - Ll category        */ CharacterIdentifierStart,
 335 /* 231 - Ll category        */ CharacterIdentifierStart,
 336 /* 232 - Ll category        */ CharacterIdentifierStart,
 337 /* 233 - Ll category        */ CharacterIdentifierStart,
 338 /* 234 - Ll category        */ CharacterIdentifierStart,
 339 /* 235 - Ll category        */ CharacterIdentifierStart,
 340 /* 236 - Ll category        */ CharacterIdentifierStart,
 341 /* 237 - Ll category        */ CharacterIdentifierStart,
 342 /* 238 - Ll category        */ CharacterIdentifierStart,
 343 /* 239 - Ll category        */ CharacterIdentifierStart,
 344 /* 240 - Ll category        */ CharacterIdentifierStart,
 345 /* 241 - Ll category        */ CharacterIdentifierStart,
 346 /* 242 - Ll category        */ CharacterIdentifierStart,
 347 /* 243 - Ll category        */ CharacterIdentifierStart,
 348 /* 244 - Ll category        */ CharacterIdentifierStart,
 349 /* 245 - Ll category        */ CharacterIdentifierStart,
 350 /* 246 - Ll category        */ CharacterIdentifierStart,
 351 /* 247 - Sm category        */ CharacterInvalid,
 352 /* 248 - Ll category        */ CharacterIdentifierStart,
 353 /* 249 - Ll category        */ CharacterIdentifierStart,
 354 /* 250 - Ll category        */ CharacterIdentifierStart,
 355 /* 251 - Ll category        */ CharacterIdentifierStart,
 356 /* 252 - Ll category        */ CharacterIdentifierStart,
 357 /* 253 - Ll category        */ CharacterIdentifierStart,
 358 /* 254 - Ll category        */ CharacterIdentifierStart,
 359 /* 255 - Ll category        */ CharacterIdentifierStart
 360 };
 361
 362 // This table provides the character that results from \X where X is the index in the table beginning
 363 // with SPACE. A table value of 0 means that more processing needs to be done.
 364 static const LChar singleCharacterEscapeValuesForASCII[128] = {
 365 /*   0 - Null               */ 0,
 366 /*   1 - Start of Heading   */ 0,
 367 /*   2 - Start of Text      */ 0,
 368 /*   3 - End of Text        */ 0,
 369 /*   4 - End of Transm.     */ 0,
 370 /*   5 - Enquiry            */ 0,
 371 /*   6 - Acknowledgment     */ 0,
 372 /*   7 - Bell               */ 0,
 373 /*   8 - Back Space         */ 0,
 374 /*   9 - Horizontal Tab     */ 0,
 375 /*  10 - Line Feed          */ 0,
 376 /*  11 - Vertical Tab       */ 0,
 377 /*  12 - Form Feed          */ 0,
 378 /*  13 - Carriage Return    */ 0,
 379 /*  14 - Shift Out          */ 0,
 380 /*  15 - Shift In           */ 0,
 381 /*  16 - Data Line Escape   */ 0,
 382 /*  17 - Device Control 1   */ 0,
 383 /*  18 - Device Control 2   */ 0,
 384 /*  19 - Device Control 3   */ 0,
 385 /*  20 - Device Control 4   */ 0,
 386 /*  21 - Negative Ack.      */ 0,
 387 /*  22 - Synchronous Idle   */ 0,
 388 /*  23 - End of Transmit    */ 0,
 389 /*  24 - Cancel             */ 0,
 390 /*  25 - End of Medium      */ 0,
 391 /*  26 - Substitute         */ 0,
 392 /*  27 - Escape             */ 0,
 393 /*  28 - File Separator     */ 0,
 394 /*  29 - Group Separator    */ 0,
 395 /*  30 - Record Separator   */ 0,
 396 /*  31 - Unit Separator     */ 0,
 397 /*  32 - Space              */ ' ',
 398 /*  33 - !                  */ '!',
 399 /*  34 - "                  */ '"',
 400 /*  35 - #                  */ '#',
 401 /*  36 - $                  */ '$',
 402 /*  37 - %                  */ '%',
 403 /*  38 - &                  */ '&',
 404 /*  39 - '                  */ '\'',
 405 /*  40 - (                  */ '(',
 406 /*  41 - )                  */ ')',
 407 /*  42 - *                  */ '*',
 408 /*  43 - +                  */ '+',
 409 /*  44 - ,                  */ ',',
 410 /*  45 - -                  */ '-',
 411 /*  46 - .                  */ '.',
 412 /*  47 - /                  */ '/',
 413 /*  48 - 0                  */ 0,
 414 /*  49 - 1                  */ 0,
 415 /*  50 - 2                  */ 0,
 416 /*  51 - 3                  */ 0,
 417 /*  52 - 4                  */ 0,
 418 /*  53 - 5                  */ 0,
 419 /*  54 - 6                  */ 0,
 420 /*  55 - 7                  */ 0,
 421 /*  56 - 8                  */ 0,
 422 /*  57 - 9                  */ 0,
 423 /*  58 - :                  */ ':',
 424 /*  59 - ;                  */ ';',
 425 /*  60 - <                  */ '<',
 426 /*  61 - =                  */ '=',
 427 /*  62 - >                  */ '>',
 428 /*  63 - ?                  */ '?',
 429 /*  64 - @                  */ '@',
 430 /*  65 - A                  */ 'A',
 431 /*  66 - B                  */ 'B',
 432 /*  67 - C                  */ 'C',
 433 /*  68 - D                  */ 'D',
 434 /*  69 - E                  */ 'E',
 435 /*  70 - F                  */ 'F',
 436 /*  71 - G                  */ 'G',
 437 /*  72 - H                  */ 'H',
 438 /*  73 - I                  */ 'I',
 439 /*  74 - J                  */ 'J',
 440 /*  75 - K                  */ 'K',
 441 /*  76 - L                  */ 'L',
 442 /*  77 - M                  */ 'M',
 443 /*  78 - N                  */ 'N',
 444 /*  79 - O                  */ 'O',
 445 /*  80 - P                  */ 'P',
 446 /*  81 - Q                  */ 'Q',
 447 /*  82 - R                  */ 'R',
 448 /*  83 - S                  */ 'S',
 449 /*  84 - T                  */ 'T',
 450 /*  85 - U                  */ 'U',
 451 /*  86 - V                  */ 'V',
 452 /*  87 - W                  */ 'W',
 453 /*  88 - X                  */ 'X',
 454 /*  89 - Y                  */ 'Y',
 455 /*  90 - Z                  */ 'Z',
 456 /*  91 - [                  */ '[',
 457 /*  92 - \                  */ '\\',
 458 /*  93 - ]                  */ ']',
 459 /*  94 - ^                  */ '^',
 460 /*  95 - _                  */ '_',
 461 /*  96 - `                  */ '`',
 462 /*  97 - a                  */ 'a',
 463 /*  98 - b                  */ 0x08,
 464 /*  99 - c                  */ 'c',
 465 /* 100 - d                  */ 'd',
 466 /* 101 - e                  */ 'e',
 467 /* 102 - f                  */ 0x0C,
 468 /* 103 - g                  */ 'g',
 469 /* 104 - h                  */ 'h',
 470 /* 105 - i                  */ 'i',
 471 /* 106 - j                  */ 'j',
 472 /* 107 - k                  */ 'k',
 473 /* 108 - l                  */ 'l',
 474 /* 109 - m                  */ 'm',
 475 /* 110 - n                  */ 0x0A,
 476 /* 111 - o                  */ 'o',
 477 /* 112 - p                  */ 'p',
 478 /* 113 - q                  */ 'q',
 479 /* 114 - r                  */ 0x0D,
 480 /* 115 - s                  */ 's',
 481 /* 116 - t                  */ 0x09,
 482 /* 117 - u                  */ 0,
 483 /* 118 - v                  */ 0x0B,
 484 /* 119 - w                  */ 'w',
 485 /* 120 - x                  */ 0,
 486 /* 121 - y                  */ 'y',
 487 /* 122 - z                  */ 'z',
 488 /* 123 - {                  */ '{',
 489 /* 124 - |                  */ '|',
 490 /* 125 - }                  */ '}',
 491 /* 126 - ~                  */ '~',
 492 /* 127 - Delete             */ 0
 493 };
 494
 495 template <typename T>
 496 Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
 497     : m_isReparsing(false)
 498     , m_vm(vm)
 499     , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
 500 {
 501 }
 502
 503 static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
 504 {
 505     if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
 506         return INTEGER;
 507     return DOUBLE;
 508 }
 509
 510 template <typename T>
 511 Lexer<T>::~Lexer()
 512 {
 513 }
 514
 515 template <typename T>
 516 String Lexer<T>::invalidCharacterMessage() const
 517 {
 518     switch (m_current) {
 519     case 0:
 520         return ASCIILiteral("Invalid character: '\\0'");
 521     case 10:
 522         return ASCIILiteral("Invalid character: '\\n'");
 523     case 11:
 524         return ASCIILiteral("Invalid character: '\\v'");
 525     case 13:
 526         return ASCIILiteral("Invalid character: '\\r'");
 527     case 35:
 528         return ASCIILiteral("Invalid character: '#'");
 529     case 64:
 530         return ASCIILiteral("Invalid character: '@'");
 531     case 96:
 532         return ASCIILiteral("Invalid character: '`'");
 533     default:
 534         return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
 535     }
 536 }
 537
 538 template <typename T>
 539 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
 540 {
 541     ASSERT(m_code <= m_codeEnd);
 542     return m_code;
 543 }
 544
 545 template <typename T>
 546 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
 547 {
 548     m_arena = &arena->identifierArena();
 549
 550     m_lineNumber = source.firstLine();
 551     m_lastToken = -1;
 552
 553     const String& sourceString = source.provider()->source();
 554
 555     if (!sourceString.isNull())
 556         setCodeStart(sourceString.impl());
 557     else
 558         m_codeStart = 0;
 559
 560     m_source = &source;
 561     m_sourceOffset = source.startOffset();
 562     m_codeStartPlusOffset = m_codeStart + source.startOffset();
 563     m_code = m_codeStartPlusOffset;
 564     m_codeEnd = m_codeStart + source.endOffset();
 565     m_error = false;
 566     m_atLineStart = true;
 567     m_lineStart = m_code;
 568     m_lexErrorMessage = String();
 569
 570     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
 571     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
 572     m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
 573
 574     if (LIKELY(m_code < m_codeEnd))
 575         m_current = *m_code;
 576     else
 577         m_current = 0;
 578     ASSERT(currentOffset() == source.startOffset());
 579 }
 580
 581 template <typename T>
 582 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
 583 {
 584     m_code += shiftAmount;
 585     ASSERT(currentOffset() >= currentLineStartOffset());
 586     m_current = *m_code;
 587 }
 588
 589 template <typename T>
 590 ALWAYS_INLINE void Lexer<T>::shift()
 591 {
 592     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
 593     m_current = 0;
 594     ++m_code;
 595     if (LIKELY(m_code < m_codeEnd))
 596         m_current = *m_code;
 597 }
 598
 599 template <typename T>
 600 ALWAYS_INLINE bool Lexer<T>::atEnd() const
 601 {
 602     ASSERT(!m_current || m_code < m_codeEnd);
 603     return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
 604 }
 605
 606 template <typename T>
 607 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
 608 {
 609     ASSERT(offset > 0 && offset < 5);
 610     const T* code = m_code + offset;
 611     return (code < m_codeEnd) ? *code : 0;
 612 }
 613
 614 struct ParsedUnicodeEscapeValue {
 615     ParsedUnicodeEscapeValue(UChar32 value)
 616         : m_value(value)
 617     {
 618         ASSERT(isValid());
 619     }
 620
 621     enum SpecialValueType { Incomplete = -2, Invalid = -1 };
 622     ParsedUnicodeEscapeValue(SpecialValueType type)
 623         : m_value(type)
 624     {
 625     }
 626
 627     bool isValid() const { return m_value >= 0; }
 628     bool isIncomplete() const { return m_value == Incomplete; }
 629
 630     UChar32 value() const
 631     {
 632         ASSERT(isValid());
 633         return m_value;
 634     }
 635
 636 private:
 637     UChar32 m_value;
 638 };
 639
 640 template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
 641 {
 642     if (m_current == '{') {
 643         shift();
 644         UChar32 codePoint = 0;
 645         do {
 646             if (!isASCIIHexDigit(m_current))
 647                 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
 648             codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
 649             if (codePoint > UCHAR_MAX_VALUE)
 650                 return ParsedUnicodeEscapeValue::Invalid;
 651             shift();
 652         } while (m_current != '}');
 653         shift();
 654         return codePoint;
 655     }
 656
 657     auto character2 = peek(1);
 658     auto character3 = peek(2);
 659     auto character4 = peek(3);
 660     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4)))
 661         return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
 662     auto result = convertUnicode(m_current, character2, character3, character4);
 663     shift();
 664     shift();
 665     shift();
 666     shift();
 667     return result;
 668 }
 669
 670 template <typename T>
 671 void Lexer<T>::shiftLineTerminator()
 672 {
 673     ASSERT(isLineTerminator(m_current));
 674
 675     m_positionBeforeLastNewline = currentPosition();
 676     T prev = m_current;
 677     shift();
 678
 679     // Allow both CRLF and LFCR.
 680     if (prev + m_current == '\n' + '\r')
 681         shift();
 682
 683     ++m_lineNumber;
 684 }
 685
 686 template <typename T>
 687 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
 688 {
 689     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 690 }
 691
 692 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
 693 {
 694     return U_GET_GC_MASK(c) & U_GC_L_MASK;
 695 }
 696
 697 static ALWAYS_INLINE bool isLatin1(LChar)
 698 {
 699     return true;
 700 }
 701
 702 static ALWAYS_INLINE bool isLatin1(UChar c)
 703 {
 704     return c < 256;
 705 }
 706
 707 static ALWAYS_INLINE bool isLatin1(UChar32 c)
 708 {
 709     return !(c & ~0xFF);
 710 }
 711
 712 static inline bool isIdentStart(LChar c)
 713 {
 714     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
 715 }
 716
 717 static inline bool isIdentStart(UChar32 c)
 718 {
 719     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
 720 }
 721
 722 static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
 723 {
 724     // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
 725     return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
 726 }
 727
 728 static ALWAYS_INLINE bool isIdentPart(LChar c)
 729 {
 730     // Character types are divided into two groups depending on whether they can be part of an
 731     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
 732     // part of an identifier. (See the CharacterType definition for more details.)
 733     return typesOfLatin1Characters[c] <= CharacterNumber;
 734 }
 735
 736 static ALWAYS_INLINE bool isIdentPart(UChar32 c)
 737 {
 738     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
 739 }
 740
 741 static ALWAYS_INLINE bool isIdentPart(UChar c)
 742 {
 743     return isIdentPart(static_cast<UChar32>(c));
 744 }
 745
 746 template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
 747 {
 748     if (isIdentPart(code[0]))
 749         return true;
 750
 751     // Shortest sequence handled below is \u{0}, which is 5 characters.
 752     if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
 753         return false;
 754
 755     if (code[2] == '{') {
 756         UChar32 codePoint = 0;
 757         const CharacterType* pointer;
 758         for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
 759             auto digit = *pointer;
 760             if (!isASCIIHexDigit(digit))
 761                 break;
 762             codePoint = (codePoint << 4) | toASCIIHexValue(digit);
 763             if (codePoint > UCHAR_MAX_VALUE)
 764                 return false;
 765         }
 766         return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
 767     }
 768
 769     // Shortest sequence handled below is \uXXXX, which is 6 characters.
 770     if (codeEnd - code < 6)
 771         return false;
 772
 773     auto character1 = code[2];
 774     auto character2 = code[3];
 775     auto character3 = code[4];
 776     auto character4 = code[5];
 777     return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
 778         && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
 779 }
 780
 781 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
 782 {
 783     return isIdentPartIncludingEscapeTemplate(code, codeEnd);
 784 }
 785
 786 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
 787 {
 788     return isIdentPartIncludingEscapeTemplate(code, codeEnd);
 789 }
 790
 791 static inline LChar singleEscape(int c)
 792 {
 793     if (c < 128) {
 794         ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
 795         return singleCharacterEscapeValuesForASCII[c];
 796     }
 797     return 0;
 798 }
 799
 800 template <typename T>
 801 inline void Lexer<T>::record8(int c)
 802 {
 803     ASSERT(c >= 0);
 804     ASSERT(c <= 0xFF);
 805     m_buffer8.append(static_cast<LChar>(c));
 806 }
 807
 808 template <typename T>
 809 inline void assertCharIsIn8BitRange(T c)
 810 {
 811     UNUSED_PARAM(c);
 812     ASSERT(c >= 0);
 813     ASSERT(c <= 0xFF);
 814 }
 815
 816 template <>
 817 inline void assertCharIsIn8BitRange(UChar c)
 818 {
 819     UNUSED_PARAM(c);
 820     ASSERT(c <= 0xFF);
 821 }
 822
 823 template <>
 824 inline void assertCharIsIn8BitRange(LChar)
 825 {
 826 }
 827
 828 template <typename T>
 829 inline void Lexer<T>::append8(const T* p, size_t length)
 830 {
 831     size_t currentSize = m_buffer8.size();
 832     m_buffer8.grow(currentSize + length);
 833     LChar* rawBuffer = m_buffer8.data() + currentSize;
 834
 835     for (size_t i = 0; i < length; i++) {
 836         T c = p[i];
 837         assertCharIsIn8BitRange(c);
 838         rawBuffer[i] = c;
 839     }
 840 }
 841
 842 template <typename T>
 843 inline void Lexer<T>::append16(const LChar* p, size_t length)
 844 {
 845     size_t currentSize = m_buffer16.size();
 846     m_buffer16.grow(currentSize + length);
 847     UChar* rawBuffer = m_buffer16.data() + currentSize;
 848
 849     for (size_t i = 0; i < length; i++)
 850         rawBuffer[i] = p[i];
 851 }
 852
 853 template <typename T>
 854 inline void Lexer<T>::record16(T c)
 855 {
 856     m_buffer16.append(c);
 857 }
 858
 859 template <typename T>
 860 inline void Lexer<T>::record16(int c)
 861 {
 862     ASSERT(c >= 0);
 863     ASSERT(c <= static_cast<int>(USHRT_MAX));
 864     m_buffer16.append(static_cast<UChar>(c));
 865 }
 866
 867 template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
 868 {
 869     ASSERT(codePoint >= 0);
 870     ASSERT(codePoint <= UCHAR_MAX_VALUE);
 871     if (U_IS_BMP(codePoint))
 872         record16(codePoint);
 873     else {
 874         UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
 875         append16(codeUnits, 2);
 876     }
 877 }
 878
 879 #if !ASSERT_DISABLED
 880 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
 881 {
 882     if (!ident)
 883         return true;
 884     /* Just block any use of suspicious identifiers.  This is intended to
 885      * be used as a safety net while implementing builtins.
 886      */
 887     // FIXME: How can a debug-only assertion be a safety net?
 888     if (*ident == vm.propertyNames->builtinNames().callPublicName())
 889         return false;
 890     if (*ident == vm.propertyNames->builtinNames().applyPublicName())
 891         return false;
 892     if (*ident == vm.propertyNames->eval)
 893         return false;
 894     if (*ident == vm.propertyNames->Function)
 895         return false;
 896     return true;
 897 }
 898 #endif
 899
 900 template <>
 901 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 902 {
 903     const ptrdiff_t remaining = m_codeEnd - m_code;
 904     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
 905         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
 906         if (keyword != IDENT) {
 907             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
 908             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
 909         }
 910     }
 911
 912     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
 913     if (isPrivateName)
 914         shift();
 915
 916     const LChar* identifierStart = currentSourcePtr();
 917     unsigned identifierLineStart = currentLineStartOffset();
 918
 919     while (isIdentPart(m_current))
 920         shift();
 921
 922     if (UNLIKELY(m_current == '\\')) {
 923         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
 924         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
 925     }
 926
 927     const Identifier* ident = 0;
 928
 929     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
 930         int identifierLength = currentSourcePtr() - identifierStart;
 931         ident = makeIdentifier(identifierStart, identifierLength);
 932         if (m_parsingBuiltinFunction) {
 933             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
 934                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
 935                 return ERRORTOK;
 936             }
 937             if (isPrivateName)
 938                 ident = m_vm->propertyNames->getPrivateName(*ident);
 939             else if (*ident == m_vm->propertyNames->undefinedKeyword)
 940                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
 941             if (!ident)
 942                 return INVALID_PRIVATE_NAME_ERRORTOK;
 943         }
 944         tokenData->ident = ident;
 945     } else
 946         tokenData->ident = 0;
 947
 948     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
 949         ASSERT(shouldCreateIdentifier);
 950         if (remaining < maxTokenLength) {
 951             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
 952             ASSERT((remaining < maxTokenLength) || !entry);
 953             if (!entry)
 954                 return IDENT;
 955             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
 956             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
 957         }
 958         return IDENT;
 959     }
 960
 961     return IDENT;
 962 }
 963
 964 template <>
 965 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 966 {
 967     const ptrdiff_t remaining = m_codeEnd - m_code;
 968     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
 969         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
 970         if (keyword != IDENT) {
 971             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
 972             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
 973         }
 974     }
 975
 976     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
 977     if (isPrivateName)
 978         shift();
 979
 980     const UChar* identifierStart = currentSourcePtr();
 981     int identifierLineStart = currentLineStartOffset();
 982
 983     UChar orAllChars = 0;
 984
 985     while (isIdentPart(m_current)) {
 986         orAllChars |= m_current;
 987         shift();
 988     }
 989
 990     if (UNLIKELY(m_current == '\\')) {
 991         ASSERT(!isPrivateName);
 992         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
 993         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
 994     }
 995
 996     bool isAll8Bit = false;
 997
 998     if (!(orAllChars & ~0xff))
 999         isAll8Bit = true;
1000
1001     const Identifier* ident = 0;
1002
1003     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
1004         int identifierLength = currentSourcePtr() - identifierStart;
1005         if (isAll8Bit)
1006             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1007         else
1008             ident = makeIdentifier(identifierStart, identifierLength);
1009         if (m_parsingBuiltinFunction) {
1010             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1011                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1012                 return ERRORTOK;
1013             }
1014             if (isPrivateName)
1015                 ident = m_vm->propertyNames->getPrivateName(*ident);
1016             else if (*ident == m_vm->propertyNames->undefinedKeyword)
1017                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
1018             if (!ident)
1019                 return INVALID_PRIVATE_NAME_ERRORTOK;
1020         }
1021         tokenData->ident = ident;
1022     } else
1023         tokenData->ident = 0;
1024
1025     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
1026         ASSERT(shouldCreateIdentifier);
1027         if (remaining < maxTokenLength) {
1028             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1029             ASSERT((remaining < maxTokenLength) || !entry);
1030             if (!entry)
1031                 return IDENT;
1032             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1033             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1034         }
1035         return IDENT;
1036     }
1037
1038     return IDENT;
1039 }
1040
1041 template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1042 {
1043     auto identifierStart = currentSourcePtr();
1044     bool bufferRequired = false;
1045
1046     while (true) {
1047         if (LIKELY(isIdentPart(m_current))) {
1048             shift();
1049             continue;
1050         }
1051         if (LIKELY(m_current != '\\'))
1052             break;
1053
1054         // \uXXXX unicode characters.
1055         bufferRequired = true;
1056         if (identifierStart != currentSourcePtr())
1057             m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1058         shift();
1059         if (UNLIKELY(m_current != 'u'))
1060             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1061         shift();
1062         auto character = parseUnicodeEscape();
1063         if (UNLIKELY(!character.isValid()))
1064             return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1065         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1066             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1067         if (shouldCreateIdentifier)
1068             recordUnicodeCodePoint(character.value());
1069         identifierStart = currentSourcePtr();
1070     }
1071
1072     int identifierLength;
1073     const Identifier* ident = nullptr;
1074     if (shouldCreateIdentifier) {
1075         if (!bufferRequired) {
1076             identifierLength = currentSourcePtr() - identifierStart;
1077             ident = makeIdentifier(identifierStart, identifierLength);
1078         } else {
1079             if (identifierStart != currentSourcePtr())
1080                 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1081             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1082         }
1083
1084         tokenData->ident = ident;
1085     } else
1086         tokenData->ident = nullptr;
1087
1088     m_buffer16.shrink(0);
1089
1090     if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1091         ASSERT(shouldCreateIdentifier);
1092         const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1093         if (!entry)
1094             return IDENT;
1095         JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1096         return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1097     }
1098
1099     return IDENT;
1100 }
1101
1102 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1103 {
1104     return character < 0xE;
1105 }
1106
1107 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1108 {
1109     return character < 0xE || character > 0xFF;
1110 }
1111
1112 template <typename T>
1113 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1114 {
1115     int startingOffset = currentOffset();
1116     int startingLineStartOffset = currentLineStartOffset();
1117     int startingLineNumber = lineNumber();
1118     T stringQuoteCharacter = m_current;
1119     shift();
1120
1121     const T* stringStart = currentSourcePtr();
1122
1123     while (m_current != stringQuoteCharacter) {
1124         if (UNLIKELY(m_current == '\\')) {
1125             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1126                 append8(stringStart, currentSourcePtr() - stringStart);
1127             shift();
1128
1129             LChar escape = singleEscape(m_current);
1130
1131             // Most common escape sequences first.
1132             if (escape) {
1133                 if (shouldBuildStrings)
1134                     record8(escape);
1135                 shift();
1136             } else if (UNLIKELY(isLineTerminator(m_current)))
1137                 shiftLineTerminator();
1138             else if (m_current == 'x') {
1139                 shift();
1140                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1141                     m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1142                     return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1143                 }
1144                 T prev = m_current;
1145                 shift();
1146                 if (shouldBuildStrings)
1147                     record8(convertHex(prev, m_current));
1148                 shift();
1149             } else {
1150                 setOffset(startingOffset, startingLineStartOffset);
1151                 setLineNumber(startingLineNumber);
1152                 m_buffer8.shrink(0);
1153                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1154             }
1155             stringStart = currentSourcePtr();
1156             continue;
1157         }
1158
1159         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1160             setOffset(startingOffset, startingLineStartOffset);
1161             setLineNumber(startingLineNumber);
1162             m_buffer8.shrink(0);
1163             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1164         }
1165
1166         shift();
1167     }
1168
1169     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1170         append8(stringStart, currentSourcePtr() - stringStart);
1171     if (shouldBuildStrings) {
1172         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1173         m_buffer8.shrink(0);
1174     } else
1175         tokenData->ident = 0;
1176
1177     return StringParsedSuccessfully;
1178 }
1179
1180 template <typename T>
1181 template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
1182 {
1183     if (m_current == 'x') {
1184         shift();
1185         if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1186             m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1187             return StringCannotBeParsed;
1188         }
1189         T prev = m_current;
1190         shift();
1191         if (shouldBuildStrings)
1192             record16(convertHex(prev, m_current));
1193         shift();
1194         return StringParsedSuccessfully;
1195     }
1196
1197     if (m_current == 'u') {
1198         shift();
1199
1200         if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
1201             if (shouldBuildStrings)
1202                 record16('u');
1203             return StringParsedSuccessfully;
1204         }
1205
1206         auto character = parseUnicodeEscape();
1207         if (character.isValid()) {
1208             if (shouldBuildStrings)
1209                 recordUnicodeCodePoint(character.value());
1210             return StringParsedSuccessfully;
1211         }
1212
1213         m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1214         return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed;
1215     }
1216
1217     if (strictMode) {
1218         if (isASCIIDigit(m_current)) {
1219             // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1220             int character1 = m_current;
1221             shift();
1222             if (character1 != '0' || isASCIIDigit(m_current)) {
1223                 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1224                 return StringCannotBeParsed;
1225             }
1226             if (shouldBuildStrings)
1227                 record16(0);
1228             return StringParsedSuccessfully;
1229         }
1230     } else {
1231         if (isASCIIOctalDigit(m_current)) {
1232             // Octal character sequences
1233             T character1 = m_current;
1234             shift();
1235             if (isASCIIOctalDigit(m_current)) {
1236                 // Two octal characters
1237                 T character2 = m_current;
1238                 shift();
1239                 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1240                     if (shouldBuildStrings)
1241                         record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1242                     shift();
1243                 } else {
1244                     if (shouldBuildStrings)
1245                         record16((character1 - '0') * 8 + character2 - '0');
1246                 }
1247             } else {
1248                 if (shouldBuildStrings)
1249                     record16(character1 - '0');
1250             }
1251             return StringParsedSuccessfully;
1252         }
1253     }
1254
1255     if (!atEnd()) {
1256         if (shouldBuildStrings)
1257             record16(m_current);
1258         shift();
1259         return StringParsedSuccessfully;
1260     }
1261
1262     m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
1263     return StringUnterminated;
1264 }
1265
1266 template <typename T>
1267 template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1268 {
1269     T stringQuoteCharacter = m_current;
1270     shift();
1271
1272     const T* stringStart = currentSourcePtr();
1273
1274     while (m_current != stringQuoteCharacter) {
1275         if (UNLIKELY(m_current == '\\')) {
1276             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1277                 append16(stringStart, currentSourcePtr() - stringStart);
1278             shift();
1279
1280             LChar escape = singleEscape(m_current);
1281
1282             // Most common escape sequences first
1283             if (escape) {
1284                 if (shouldBuildStrings)
1285                     record16(escape);
1286                 shift();
1287             } else if (UNLIKELY(isLineTerminator(m_current)))
1288                 shiftLineTerminator();
1289             else {
1290                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
1291                 if (result != StringParsedSuccessfully)
1292                     return result;
1293             }
1294
1295             stringStart = currentSourcePtr();
1296             continue;
1297         }
1298         // Fast check for characters that require special handling.
1299         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1300         // as possible, and lets through all common ASCII characters.
1301         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1302             // New-line or end of input is not allowed
1303             if (atEnd() || isLineTerminator(m_current)) {
1304                 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1305                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1306             }
1307             // Anything else is just a normal character
1308         }
1309         shift();
1310     }
1311
1312     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1313         append16(stringStart, currentSourcePtr() - stringStart);
1314     if (shouldBuildStrings)
1315         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1316     else
1317         tokenData->ident = 0;
1318
1319     m_buffer16.shrink(0);
1320     return StringParsedSuccessfully;
1321 }
1322
1323 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
1324 // While the lexer accepts <LF><CR> (not <CR><LF>) sequence
1325 // as one line terminator and increments one line number,
1326 // TemplateLiteral considers it as two line terminators <LF> and <CR>.
1327 //
1328 // TemplateLiteral normalizes line terminators as follows.
1329 //
1330 // <LF> => <LF>
1331 // <CR> => <LF>
1332 // <CR><LF> => <LF>
1333 // <\u2028> => <\u2028>
1334 // <\u2029> => <\u2029>
1335 //
1336 // So, <LF><CR> should be normalized to <LF><LF>.
1337 // However, the lexer should increment the line number only once for <LF><CR>.
1338 //
1339 // To achieve this, LineNumberAdder holds the current status of line terminator sequence.
1340 // When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
1341 // LineNumberAdder maintains the status and increments the line number when it's necessary.
1342 // For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
1343 template<typename CharacterType>
1344 class LineNumberAdder {
1345 public:
1346     LineNumberAdder(int& lineNumber)
1347         : m_lineNumber(lineNumber)
1348     {
1349     }
1350
1351     void clear()
1352     {
1353         m_previous = 0;
1354     }
1355
1356     void add(CharacterType character)
1357     {
1358         ASSERT(Lexer<CharacterType>::isLineTerminator(character));
1359         if ((character + m_previous) == ('\n' + '\r'))
1360             m_previous = 0;
1361         else {
1362             ++m_lineNumber;
1363             m_previous = character;
1364         }
1365     }
1366
1367 private:
1368     int& m_lineNumber;
1369     CharacterType m_previous { 0 };
1370 };
1371
1372 template <typename T>
1373 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1374 {
1375     const T* stringStart = currentSourcePtr();
1376     const T* rawStringStart = currentSourcePtr();
1377
1378     LineNumberAdder<T> lineNumberAdder(m_lineNumber);
1379
1380     while (m_current != '`') {
1381         if (UNLIKELY(m_current == '\\')) {
1382             lineNumberAdder.clear();
1383             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1384                 append16(stringStart, currentSourcePtr() - stringStart);
1385             shift();
1386
1387             LChar escape = singleEscape(m_current);
1388
1389             // Most common escape sequences first.
1390             if (escape) {
1391                 if (shouldBuildStrings)
1392                     record16(escape);
1393                 shift();
1394             } else if (UNLIKELY(isLineTerminator(m_current))) {
1395                 if (m_current == '\r') {
1396                     lineNumberAdder.add(m_current);
1397                     shift();
1398                     if (m_current == '\n') {
1399                         lineNumberAdder.add(m_current);
1400                         shift();
1401                     }
1402                 } else {
1403                     lineNumberAdder.add(m_current);
1404                     shift();
1405                 }
1406             } else {
1407                 bool strictMode = true;
1408                 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
1409                 if (result != StringParsedSuccessfully)
1410                     return result;
1411             }
1412
1413             stringStart = currentSourcePtr();
1414             continue;
1415         }
1416
1417         if (m_current == '$' && peek(1) == '{')
1418             break;
1419
1420         // Fast check for characters that require special handling.
1421         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1422         // as possible, and lets through all common ASCII characters.
1423         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1424             // End of input is not allowed.
1425             // Unlike String, line terminator is allowed.
1426             if (atEnd()) {
1427                 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1428                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1429             }
1430
1431             if (isLineTerminator(m_current)) {
1432                 if (m_current == '\r') {
1433                     // Normalize <CR>, <CR><LF> to <LF>.
1434                     if (shouldBuildStrings) {
1435                         if (stringStart != currentSourcePtr())
1436                             append16(stringStart, currentSourcePtr() - stringStart);
1437                         if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1438                             m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1439
1440                         record16('\n');
1441                         if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1442                             m_bufferForRawTemplateString16.append('\n');
1443                     }
1444                     lineNumberAdder.add(m_current);
1445                     shift();
1446                     if (m_current == '\n') {
1447                         lineNumberAdder.add(m_current);
1448                         shift();
1449                     }
1450                     stringStart = currentSourcePtr();
1451                     rawStringStart = currentSourcePtr();
1452                 } else {
1453                     lineNumberAdder.add(m_current);
1454                     shift();
1455                 }
1456                 continue;
1457             }
1458             // Anything else is just a normal character
1459         }
1460
1461         lineNumberAdder.clear();
1462         shift();
1463     }
1464
1465     bool isTail = m_current == '`';
1466
1467     if (shouldBuildStrings) {
1468         if (currentSourcePtr() != stringStart)
1469             append16(stringStart, currentSourcePtr() - stringStart);
1470         if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1471             m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1472     }
1473
1474     if (shouldBuildStrings) {
1475         tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1476         // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1477         if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1478             tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1479         else
1480             tokenData->raw = makeEmptyIdentifier();
1481     } else {
1482         tokenData->cooked = makeEmptyIdentifier();
1483         tokenData->raw = makeEmptyIdentifier();
1484     }
1485     tokenData->isTail = isTail;
1486
1487     m_buffer16.shrink(0);
1488     m_bufferForRawTemplateString16.shrink(0);
1489
1490     if (isTail) {
1491         // Skip `
1492         shift();
1493     } else {
1494         // Skip $ and {
1495         shift();
1496         shift();
1497     }
1498
1499     return StringParsedSuccessfully;
1500 }
1501 #endif
1502
1503 template <typename T>
1504 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1505 {
1506     // Optimization: most hexadecimal values fit into 4 bytes.
1507     uint32_t hexValue = 0;
1508     int maximumDigits = 7;
1509
1510     do {
1511         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1512         shift();
1513         --maximumDigits;
1514     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1515
1516     if (maximumDigits >= 0) {
1517         returnValue = hexValue;
1518         return;
1519     }
1520
1521     // No more place in the hexValue buffer.
1522     // The values are shifted out and placed into the m_buffer8 vector.
1523     for (int i = 0; i < 8; ++i) {
1524          int digit = hexValue >> 28;
1525          if (digit < 10)
1526              record8(digit + '0');
1527          else
1528              record8(digit - 10 + 'a');
1529          hexValue <<= 4;
1530     }
1531
1532     while (isASCIIHexDigit(m_current)) {
1533         record8(m_current);
1534         shift();
1535     }
1536
1537     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1538 }
1539
1540 template <typename T>
1541 ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
1542 {
1543     // Optimization: most binary values fit into 4 bytes.
1544     uint32_t binaryValue = 0;
1545     const unsigned maximumDigits = 32;
1546     int digit = maximumDigits - 1;
1547     // Temporary buffer for the digits. Makes easier
1548     // to reconstruct the input characters when needed.
1549     LChar digits[maximumDigits];
1550
1551     do {
1552         binaryValue = (binaryValue << 1) + (m_current - '0');
1553         digits[digit] = m_current;
1554         shift();
1555         --digit;
1556     } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1557
1558     if (!isASCIIDigit(m_current) && digit >= 0) {
1559         returnValue = binaryValue;
1560         return true;
1561     }
1562
1563     for (int i = maximumDigits - 1; i > digit; --i)
1564         record8(digits[i]);
1565
1566     while (isASCIIBinaryDigit(m_current)) {
1567         record8(m_current);
1568         shift();
1569     }
1570
1571     if (isASCIIDigit(m_current))
1572         return false;
1573
1574     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
1575     return true;
1576 }
1577
1578 template <typename T>
1579 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1580 {
1581     // Optimization: most octal values fit into 4 bytes.
1582     uint32_t octalValue = 0;
1583     const unsigned maximumDigits = 10;
1584     int digit = maximumDigits - 1;
1585     // Temporary buffer for the digits. Makes easier
1586     // to reconstruct the input characters when needed.
1587     LChar digits[maximumDigits];
1588
1589     do {
1590         octalValue = octalValue * 8 + (m_current - '0');
1591         digits[digit] = m_current;
1592         shift();
1593         --digit;
1594     } while (isASCIIOctalDigit(m_current) && digit >= 0);
1595
1596     if (!isASCIIDigit(m_current) && digit >= 0) {
1597         returnValue = octalValue;
1598         return true;
1599     }
1600
1601     for (int i = maximumDigits - 1; i > digit; --i)
1602          record8(digits[i]);
1603
1604     while (isASCIIOctalDigit(m_current)) {
1605         record8(m_current);
1606         shift();
1607     }
1608
1609     if (isASCIIDigit(m_current))
1610         return false;
1611
1612     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1613     return true;
1614 }
1615
1616 template <typename T>
1617 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1618 {
1619     // Optimization: most decimal values fit into 4 bytes.
1620     uint32_t decimalValue = 0;
1621
1622     // Since parseOctal may be executed before parseDecimal,
1623     // the m_buffer8 may hold ascii digits.
1624     if (!m_buffer8.size()) {
1625         const unsigned maximumDigits = 10;
1626         int digit = maximumDigits - 1;
1627         // Temporary buffer for the digits. Makes easier
1628         // to reconstruct the input characters when needed.
1629         LChar digits[maximumDigits];
1630
1631         do {
1632             decimalValue = decimalValue * 10 + (m_current - '0');
1633             digits[digit] = m_current;
1634             shift();
1635             --digit;
1636         } while (isASCIIDigit(m_current) && digit >= 0);
1637
1638         if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1639             returnValue = decimalValue;
1640             return true;
1641         }
1642
1643         for (int i = maximumDigits - 1; i > digit; --i)
1644             record8(digits[i]);
1645     }
1646
1647     while (isASCIIDigit(m_current)) {
1648         record8(m_current);
1649         shift();
1650     }
1651
1652     return false;
1653 }
1654
1655 template <typename T>
1656 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1657 {
1658     record8('.');
1659     while (isASCIIDigit(m_current)) {
1660         record8(m_current);
1661         shift();
1662     }
1663 }
1664
1665 template <typename T>
1666 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1667 {
1668     record8('e');
1669     shift();
1670     if (m_current == '+' || m_current == '-') {
1671         record8(m_current);
1672         shift();
1673     }
1674
1675     if (!isASCIIDigit(m_current))
1676         return false;
1677
1678     do {
1679         record8(m_current);
1680         shift();
1681     } while (isASCIIDigit(m_current));
1682     return true;
1683 }
1684
1685 template <typename T>
1686 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1687 {
1688     while (true) {
1689         while (UNLIKELY(m_current == '*')) {
1690             shift();
1691             if (m_current == '/') {
1692                 shift();
1693                 return true;
1694             }
1695         }
1696
1697         if (atEnd())
1698             return false;
1699
1700         if (isLineTerminator(m_current)) {
1701             shiftLineTerminator();
1702             m_terminator = true;
1703         } else
1704             shift();
1705     }
1706 }
1707
1708 template <typename T>
1709 bool Lexer<T>::nextTokenIsColon()
1710 {
1711     const T* code = m_code;
1712     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1713         code++;
1714
1715     return code < m_codeEnd && *code == ':';
1716 }
1717
1718 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1719 template <typename T>
1720 void Lexer<T>::setTokenPosition(JSToken* tokenRecord)
1721 {
1722     JSTokenData* tokenData = &tokenRecord->m_data;
1723     tokenData->line = lineNumber();
1724     tokenData->offset = currentOffset();
1725     tokenData->lineStartOffset = currentLineStartOffset();
1726     ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1727 }
1728 #endif
1729
1730 template <typename T>
1731 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1732 {
1733     JSTokenData* tokenData = &tokenRecord->m_data;
1734     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1735     m_lastTockenLocation = JSTokenLocation(tokenRecord->m_location);
1736
1737     ASSERT(!m_error);
1738     ASSERT(m_buffer8.isEmpty());
1739     ASSERT(m_buffer16.isEmpty());
1740
1741     JSTokenType token = ERRORTOK;
1742     m_terminator = false;
1743
1744 start:
1745     while (isWhiteSpace(m_current))
1746         shift();
1747
1748     if (atEnd())
1749         return EOFTOK;
1750
1751     tokenLocation->startOffset = currentOffset();
1752     ASSERT(currentOffset() >= currentLineStartOffset());
1753     tokenRecord->m_startPosition = currentPosition();
1754
1755     CharacterType type;
1756     if (LIKELY(isLatin1(m_current)))
1757         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1758     else if (isNonLatin1IdentStart(m_current))
1759         type = CharacterIdentifierStart;
1760     else if (isLineTerminator(m_current))
1761         type = CharacterLineTerminator;
1762     else
1763         type = CharacterInvalid;
1764
1765     switch (type) {
1766     case CharacterGreater:
1767         shift();
1768         if (m_current == '>') {
1769             shift();
1770             if (m_current == '>') {
1771                 shift();
1772                 if (m_current == '=') {
1773                     shift();
1774                     token = URSHIFTEQUAL;
1775                     break;
1776                 }
1777                 token = URSHIFT;
1778                 break;
1779             }
1780             if (m_current == '=') {
1781                 shift();
1782                 token = RSHIFTEQUAL;
1783                 break;
1784             }
1785             token = RSHIFT;
1786             break;
1787         }
1788         if (m_current == '=') {
1789             shift();
1790             token = GE;
1791             break;
1792         }
1793         token = GT;
1794         break;
1795     case CharacterEqual: {
1796 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1797         if (peek(1) == '>') {
1798             token = ARROWFUNCTION;
1799             tokenData->line = lineNumber();
1800             tokenData->offset = currentOffset();
1801             tokenData->lineStartOffset = currentLineStartOffset();
1802             ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1803             shift();
1804             shift();
1805             break;
1806         }
1807 #endif
1808         shift();
1809         if (m_current == '=') {
1810             shift();
1811             if (m_current == '=') {
1812                 shift();
1813                 token = STREQ;
1814                 break;
1815             }
1816             token = EQEQ;
1817             break;
1818         }
1819         token = EQUAL;
1820         break;
1821     }
1822     case CharacterLess:
1823         shift();
1824         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1825             // <!-- marks the beginning of a line comment (for www usage)
1826             goto inSingleLineComment;
1827         }
1828         if (m_current == '<') {
1829             shift();
1830             if (m_current == '=') {
1831                 shift();
1832                 token = LSHIFTEQUAL;
1833                 break;
1834             }
1835             token = LSHIFT;
1836             break;
1837         }
1838         if (m_current == '=') {
1839             shift();
1840             token = LE;
1841             break;
1842         }
1843         token = LT;
1844         break;
1845     case CharacterExclamationMark:
1846         shift();
1847         if (m_current == '=') {
1848             shift();
1849             if (m_current == '=') {
1850                 shift();
1851                 token = STRNEQ;
1852                 break;
1853             }
1854             token = NE;
1855             break;
1856         }
1857         token = EXCLAMATION;
1858         break;
1859     case CharacterAdd:
1860         shift();
1861         if (m_current == '+') {
1862             shift();
1863             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1864             break;
1865         }
1866         if (m_current == '=') {
1867             shift();
1868             token = PLUSEQUAL;
1869             break;
1870         }
1871         token = PLUS;
1872         break;
1873     case CharacterSub:
1874         shift();
1875         if (m_current == '-') {
1876             shift();
1877             if (m_atLineStart && m_current == '>') {
1878                 shift();
1879                 goto inSingleLineComment;
1880             }
1881             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1882             break;
1883         }
1884         if (m_current == '=') {
1885             shift();
1886             token = MINUSEQUAL;
1887             break;
1888         }
1889         token = MINUS;
1890         break;
1891     case CharacterMultiply:
1892         shift();
1893         if (m_current == '=') {
1894             shift();
1895             token = MULTEQUAL;
1896             break;
1897         }
1898         token = TIMES;
1899         break;
1900     case CharacterSlash:
1901         shift();
1902         if (m_current == '/') {
1903             shift();
1904             goto inSingleLineComment;
1905         }
1906         if (m_current == '*') {
1907             shift();
1908             if (parseMultilineComment())
1909                 goto start;
1910             m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
1911             token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1912             goto returnError;
1913         }
1914         if (m_current == '=') {
1915             shift();
1916             token = DIVEQUAL;
1917             break;
1918         }
1919         token = DIVIDE;
1920         break;
1921     case CharacterAnd:
1922         shift();
1923         if (m_current == '&') {
1924             shift();
1925             token = AND;
1926             break;
1927         }
1928         if (m_current == '=') {
1929             shift();
1930             token = ANDEQUAL;
1931             break;
1932         }
1933         token = BITAND;
1934         break;
1935     case CharacterXor:
1936         shift();
1937         if (m_current == '=') {
1938             shift();
1939             token = XOREQUAL;
1940             break;
1941         }
1942         token = BITXOR;
1943         break;
1944     case CharacterModulo:
1945         shift();
1946         if (m_current == '=') {
1947             shift();
1948             token = MODEQUAL;
1949             break;
1950         }
1951         token = MOD;
1952         break;
1953     case CharacterOr:
1954         shift();
1955         if (m_current == '=') {
1956             shift();
1957             token = OREQUAL;
1958             break;
1959         }
1960         if (m_current == '|') {
1961             shift();
1962             token = OR;
1963             break;
1964         }
1965         token = BITOR;
1966         break;
1967     case CharacterOpenParen:
1968         token = OPENPAREN;
1969         shift();
1970         break;
1971     case CharacterCloseParen:
1972         token = CLOSEPAREN;
1973         shift();
1974         break;
1975     case CharacterOpenBracket:
1976         token = OPENBRACKET;
1977         shift();
1978         break;
1979     case CharacterCloseBracket:
1980         token = CLOSEBRACKET;
1981         shift();
1982         break;
1983     case CharacterComma:
1984         token = COMMA;
1985         shift();
1986         break;
1987     case CharacterColon:
1988         token = COLON;
1989         shift();
1990         break;
1991     case CharacterQuestion:
1992         token = QUESTION;
1993         shift();
1994         break;
1995     case CharacterTilde:
1996         token = TILDE;
1997         shift();
1998         break;
1999     case CharacterSemicolon:
2000         shift();
2001         token = SEMICOLON;
2002         break;
2003     case CharacterOpenBrace:
2004         tokenData->line = lineNumber();
2005         tokenData->offset = currentOffset();
2006         tokenData->lineStartOffset = currentLineStartOffset();
2007         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2008         shift();
2009         token = OPENBRACE;
2010         break;
2011     case CharacterCloseBrace:
2012         tokenData->line = lineNumber();
2013         tokenData->offset = currentOffset();
2014         tokenData->lineStartOffset = currentLineStartOffset();
2015         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2016         shift();
2017         token = CLOSEBRACE;
2018         break;
2019     case CharacterDot:
2020         shift();
2021         if (!isASCIIDigit(m_current)) {
2022             if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2023                 shift();
2024                 shift();
2025                 token = DOTDOTDOT;
2026                 break;
2027             }
2028             token = DOT;
2029             break;
2030         }
2031         goto inNumberAfterDecimalPoint;
2032     case CharacterZero:
2033         shift();
2034         if ((m_current | 0x20) == 'x') {
2035             if (!isASCIIHexDigit(peek(1))) {
2036                 m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
2037                 token = INVALID_HEX_NUMBER_ERRORTOK;
2038                 goto returnError;
2039             }
2040
2041             // Shift out the 'x' prefix.
2042             shift();
2043
2044             parseHex(tokenData->doubleValue);
2045             if (isIdentStart(m_current)) {
2046                 m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
2047                 token = INVALID_HEX_NUMBER_ERRORTOK;
2048                 goto returnError;
2049             }
2050             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2051             m_buffer8.shrink(0);
2052             break;
2053         }
2054         if ((m_current | 0x20) == 'b') {
2055             if (!isASCIIBinaryDigit(peek(1))) {
2056                 m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
2057                 token = INVALID_BINARY_NUMBER_ERRORTOK;
2058                 goto returnError;
2059             }
2060
2061             // Shift out the 'b' prefix.
2062             shift();
2063
2064             parseBinary(tokenData->doubleValue);
2065             if (isIdentStart(m_current)) {
2066                 m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
2067                 token = INVALID_BINARY_NUMBER_ERRORTOK;
2068                 goto returnError;
2069             }
2070             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2071             m_buffer8.shrink(0);
2072             break;
2073         }
2074
2075         if ((m_current | 0x20) == 'o') {
2076             if (!isASCIIOctalDigit(peek(1))) {
2077                 m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
2078                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2079                 goto returnError;
2080             }
2081
2082             // Shift out the 'o' prefix.
2083             shift();
2084
2085             parseOctal(tokenData->doubleValue);
2086             if (isIdentStart(m_current)) {
2087                 m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
2088                 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2089                 goto returnError;
2090             }
2091             token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2092             m_buffer8.shrink(0);
2093             break;
2094         }
2095
2096         record8('0');
2097         if (strictMode && isASCIIDigit(m_current)) {
2098             m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
2099             token = INVALID_OCTAL_NUMBER_ERRORTOK;
2100             goto returnError;
2101         }
2102         if (isASCIIOctalDigit(m_current)) {
2103             if (parseOctal(tokenData->doubleValue)) {
2104                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2105             }
2106         }
2107         FALLTHROUGH;
2108     case CharacterNumber:
2109         if (LIKELY(token != INTEGER && token != DOUBLE)) {
2110             if (!parseDecimal(tokenData->doubleValue)) {
2111                 token = INTEGER;
2112                 if (m_current == '.') {
2113                     shift();
2114 inNumberAfterDecimalPoint:
2115                     parseNumberAfterDecimalPoint();
2116                     token = DOUBLE;
2117                 }
2118                 if ((m_current | 0x20) == 'e') {
2119                     if (!parseNumberAfterExponentIndicator()) {
2120                         m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
2121                         token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2122                         goto returnError;
2123                     }
2124                 }
2125                 size_t parsedLength;
2126                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2127                 if (token == INTEGER)
2128                     token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2129             } else
2130                 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2131         }
2132
2133         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
2134         if (UNLIKELY(isIdentStart(m_current))) {
2135             m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
2136             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2137             goto returnError;
2138         }
2139         m_buffer8.shrink(0);
2140         break;
2141     case CharacterQuote: {
2142         StringParseResult result = StringCannotBeParsed;
2143         if (lexerFlags & LexerFlagsDontBuildStrings)
2144             result = parseString<false>(tokenData, strictMode);
2145         else
2146             result = parseString<true>(tokenData, strictMode);
2147
2148         if (UNLIKELY(result != StringParsedSuccessfully)) {
2149             token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2150             goto returnError;
2151         }
2152         shift();
2153         token = STRING;
2154         break;
2155         }
2156 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2157     case CharacterBackQuote: {
2158         // Skip backquote.
2159         shift();
2160         StringParseResult result = StringCannotBeParsed;
2161         if (lexerFlags & LexerFlagsDontBuildStrings)
2162             result = parseTemplateLiteral<false>(tokenData, RawStringsBuildMode::BuildRawStrings);
2163         else
2164             result = parseTemplateLiteral<true>(tokenData, RawStringsBuildMode::BuildRawStrings);
2165
2166         if (UNLIKELY(result != StringParsedSuccessfully)) {
2167             token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2168             goto returnError;
2169         }
2170         token = TEMPLATE;
2171         break;
2172         }
2173 #endif
2174     case CharacterIdentifierStart:
2175         ASSERT(isIdentStart(m_current));
2176         FALLTHROUGH;
2177     case CharacterBackSlash:
2178         parseIdent:
2179         if (lexerFlags & LexexFlagsDontBuildKeywords)
2180             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2181         else
2182             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2183         break;
2184     case CharacterLineTerminator:
2185         ASSERT(isLineTerminator(m_current));
2186         shiftLineTerminator();
2187         m_atLineStart = true;
2188         m_terminator = true;
2189         m_lineStart = m_code;
2190         goto start;
2191     case CharacterPrivateIdentifierStart:
2192         if (m_parsingBuiltinFunction)
2193             goto parseIdent;
2194
2195         FALLTHROUGH;
2196     case CharacterInvalid:
2197         m_lexErrorMessage = invalidCharacterMessage();
2198         token = ERRORTOK;
2199         goto returnError;
2200     default:
2201         RELEASE_ASSERT_NOT_REACHED();
2202         m_lexErrorMessage = ASCIILiteral("Internal Error");
2203         token = ERRORTOK;
2204         goto returnError;
2205     }
2206
2207     m_atLineStart = false;
2208     goto returnToken;
2209
2210 inSingleLineComment:
2211     while (!isLineTerminator(m_current)) {
2212         if (atEnd())
2213             return EOFTOK;
2214         shift();
2215     }
2216     shiftLineTerminator();
2217     m_atLineStart = true;
2218     m_terminator = true;
2219     m_lineStart = m_code;
2220     if (!lastTokenWasRestrKeyword())
2221         goto start;
2222
2223     token = SEMICOLON;
2224     // Fall through into returnToken.
2225
2226 returnToken:
2227     tokenLocation->line = m_lineNumber;
2228     tokenLocation->endOffset = currentOffset();
2229     tokenLocation->lineStartOffset = currentLineStartOffset();
2230     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2231     tokenRecord->m_endPosition = currentPosition();
2232     m_lastToken = token;
2233     return token;
2234
2235 returnError:
2236     m_error = true;
2237     tokenLocation->line = m_lineNumber;
2238     tokenLocation->endOffset = currentOffset();
2239     tokenLocation->lineStartOffset = currentLineStartOffset();
2240     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2241     tokenRecord->m_endPosition = currentPosition();
2242     RELEASE_ASSERT(token & ErrorTokenFlag);
2243     return token;
2244 }
2245
2246 template <typename T>
2247 static inline void orCharacter(UChar&, UChar);
2248
2249 template <>
2250 inline void orCharacter<LChar>(UChar&, UChar) { }
2251
2252 template <>
2253 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2254 {
2255     orAccumulator |= character;
2256 }
2257
2258 template <typename T>
2259 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
2260 {
2261     ASSERT(m_buffer16.isEmpty());
2262
2263     bool lastWasEscape = false;
2264     bool inBrackets = false;
2265     UChar charactersOredTogether = 0;
2266
2267     if (patternPrefix) {
2268         ASSERT(!isLineTerminator(patternPrefix));
2269         ASSERT(patternPrefix != '/');
2270         ASSERT(patternPrefix != '[');
2271         record16(patternPrefix);
2272     }
2273
2274     while (true) {
2275         if (isLineTerminator(m_current) || atEnd()) {
2276             m_buffer16.shrink(0);
2277             return false;
2278         }
2279
2280         T prev = m_current;
2281
2282         shift();
2283
2284         if (prev == '/' && !lastWasEscape && !inBrackets)
2285             break;
2286
2287         record16(prev);
2288         orCharacter<T>(charactersOredTogether, prev);
2289
2290         if (lastWasEscape) {
2291             lastWasEscape = false;
2292             continue;
2293         }
2294
2295         switch (prev) {
2296         case '[':
2297             inBrackets = true;
2298             break;
2299         case ']':
2300             inBrackets = false;
2301             break;
2302         case '\\':
2303             lastWasEscape = true;
2304             break;
2305         }
2306     }
2307
2308     pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2309
2310     m_buffer16.shrink(0);
2311     charactersOredTogether = 0;
2312
2313     while (isIdentPart(m_current)) {
2314         record16(m_current);
2315         orCharacter<T>(charactersOredTogether, m_current);
2316         shift();
2317     }
2318
2319     flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2320     m_buffer16.shrink(0);
2321
2322     return true;
2323 }
2324
2325 template <typename T>
2326 bool Lexer<T>::skipRegExp()
2327 {
2328     bool lastWasEscape = false;
2329     bool inBrackets = false;
2330
2331     while (true) {
2332         if (isLineTerminator(m_current) || atEnd())
2333             return false;
2334
2335         T prev = m_current;
2336
2337         shift();
2338
2339         if (prev == '/' && !lastWasEscape && !inBrackets)
2340             break;
2341
2342         if (lastWasEscape) {
2343             lastWasEscape = false;
2344             continue;
2345         }
2346
2347         switch (prev) {
2348         case '[':
2349             inBrackets = true;
2350             break;
2351         case ']':
2352             inBrackets = false;
2353             break;
2354         case '\\':
2355             lastWasEscape = true;
2356             break;
2357         }
2358     }
2359
2360     while (isIdentPart(m_current))
2361         shift();
2362
2363     return true;
2364 }
2365
2366 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2367 template <typename T>
2368 JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2369 {
2370     JSTokenData* tokenData = &tokenRecord->m_data;
2371     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
2372     ASSERT(!m_error);
2373     ASSERT(m_buffer16.isEmpty());
2374
2375     // Leading closing brace } is already shifted in the previous token scan.
2376     // So in this re-scan phase, shift() is not needed here.
2377     StringParseResult result = parseTemplateLiteral<true>(tokenData, rawStringsBuildMode);
2378     JSTokenType token = ERRORTOK;
2379     if (UNLIKELY(result != StringParsedSuccessfully)) {
2380         token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2381         m_error = true;
2382     } else {
2383         token = TEMPLATE;
2384         m_lastToken = token;
2385     }
2386
2387     // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2388     m_atLineStart = false;
2389
2390     // Adjust current tokenLocation data for TemplateString.
2391     tokenLocation->line = m_lineNumber;
2392     tokenLocation->endOffset = currentOffset();
2393     tokenLocation->lineStartOffset = currentLineStartOffset();
2394     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2395     tokenRecord->m_endPosition = currentPosition();
2396     return token;
2397 }
2398 #endif
2399
2400 template <typename T>
2401 void Lexer<T>::clear()
2402 {
2403     m_arena = 0;
2404
2405     Vector<LChar> newBuffer8;
2406     m_buffer8.swap(newBuffer8);
2407
2408     Vector<UChar> newBuffer16;
2409     m_buffer16.swap(newBuffer16);
2410
2411     Vector<UChar> newBufferForRawTemplateString16;
2412     m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2413
2414     m_isReparsing = false;
2415 }
2416
2417 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2418 template class Lexer<LChar>;
2419 template class Lexer<UChar>;
2420
2421 } // namespace JSC