parser/Lexer.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
   6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
   7  *
   8  *  This library is free software; you can redistribute it and/or
   9  *  modify it under the terms of the GNU Library General Public
  10  *  License as published by the Free Software Foundation; either
  11  *  version 2 of the License, or (at your option) any later version.
  12  *
  13  *  This library is distributed in the hope that it will be useful,
  14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  *  Library General Public License for more details.
  17  *
  18  *  You should have received a copy of the GNU Library General Public License
  19  *  along with this library; see the file COPYING.LIB.  If not, write to
  20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21  *  Boston, MA 02110-1301, USA.
  22  *
  23  */
  24
  25 #include "config.h"
  26 #include "Lexer.h"
  27
  28 #include "JSFunctionInlines.h"
  29
  30 #include "BuiltinNames.h"
  31 #include "JSGlobalObjectFunctions.h"
  32 #include "Identifier.h"
  33 #include "NodeInfo.h"
  34 #include "Nodes.h"
  35 #include "JSCInlines.h"
  36 #include <wtf/dtoa.h>
  37 #include <ctype.h>
  38 #include <limits.h>
  39 #include <string.h>
  40 #include <wtf/Assertions.h>
  41
  42 #include "KeywordLookup.h"
  43 #include "Lexer.lut.h"
  44 #include "Parser.h"
  45
  46 namespace JSC {
  47
  48 Keywords::Keywords(VM& vm)
  49     : m_vm(vm)
  50     , m_keywordTable(JSC::mainTable)
  51 {
  52 }
  53
  54 enum CharacterType {
  55     // Types for the main switch
  56
  57     // The first three types are fixed, and also used for identifying
  58     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
  59     CharacterIdentifierStart,
  60     CharacterZero,
  61     CharacterNumber,
  62
  63     CharacterInvalid,
  64     CharacterLineTerminator,
  65     CharacterExclamationMark,
  66     CharacterOpenParen,
  67     CharacterCloseParen,
  68     CharacterOpenBracket,
  69     CharacterCloseBracket,
  70     CharacterComma,
  71     CharacterColon,
  72     CharacterQuestion,
  73     CharacterTilde,
  74     CharacterQuote,
  75     CharacterDot,
  76     CharacterSlash,
  77     CharacterBackSlash,
  78     CharacterSemicolon,
  79     CharacterOpenBrace,
  80     CharacterCloseBrace,
  81
  82     CharacterAdd,
  83     CharacterSub,
  84     CharacterMultiply,
  85     CharacterModulo,
  86     CharacterAnd,
  87     CharacterXor,
  88     CharacterOr,
  89     CharacterLess,
  90     CharacterGreater,
  91     CharacterEqual,
  92
  93     // Other types (only one so far)
  94     CharacterWhiteSpace,
  95     CharacterPrivateIdentifierStart
  96 };
  97
  98 // 256 Latin-1 codes
  99 static const unsigned short typesOfLatin1Characters[256] = {
 100 /*   0 - Null               */ CharacterInvalid,
 101 /*   1 - Start of Heading   */ CharacterInvalid,
 102 /*   2 - Start of Text      */ CharacterInvalid,
 103 /*   3 - End of Text        */ CharacterInvalid,
 104 /*   4 - End of Transm.     */ CharacterInvalid,
 105 /*   5 - Enquiry            */ CharacterInvalid,
 106 /*   6 - Acknowledgment     */ CharacterInvalid,
 107 /*   7 - Bell               */ CharacterInvalid,
 108 /*   8 - Back Space         */ CharacterInvalid,
 109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
 110 /*  10 - Line Feed          */ CharacterLineTerminator,
 111 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
 112 /*  12 - Form Feed          */ CharacterWhiteSpace,
 113 /*  13 - Carriage Return    */ CharacterLineTerminator,
 114 /*  14 - Shift Out          */ CharacterInvalid,
 115 /*  15 - Shift In           */ CharacterInvalid,
 116 /*  16 - Data Line Escape   */ CharacterInvalid,
 117 /*  17 - Device Control 1   */ CharacterInvalid,
 118 /*  18 - Device Control 2   */ CharacterInvalid,
 119 /*  19 - Device Control 3   */ CharacterInvalid,
 120 /*  20 - Device Control 4   */ CharacterInvalid,
 121 /*  21 - Negative Ack.      */ CharacterInvalid,
 122 /*  22 - Synchronous Idle   */ CharacterInvalid,
 123 /*  23 - End of Transmit    */ CharacterInvalid,
 124 /*  24 - Cancel             */ CharacterInvalid,
 125 /*  25 - End of Medium      */ CharacterInvalid,
 126 /*  26 - Substitute         */ CharacterInvalid,
 127 /*  27 - Escape             */ CharacterInvalid,
 128 /*  28 - File Separator     */ CharacterInvalid,
 129 /*  29 - Group Separator    */ CharacterInvalid,
 130 /*  30 - Record Separator   */ CharacterInvalid,
 131 /*  31 - Unit Separator     */ CharacterInvalid,
 132 /*  32 - Space              */ CharacterWhiteSpace,
 133 /*  33 - !                  */ CharacterExclamationMark,
 134 /*  34 - "                  */ CharacterQuote,
 135 /*  35 - #                  */ CharacterInvalid,
 136 /*  36 - $                  */ CharacterIdentifierStart,
 137 /*  37 - %                  */ CharacterModulo,
 138 /*  38 - &                  */ CharacterAnd,
 139 /*  39 - '                  */ CharacterQuote,
 140 /*  40 - (                  */ CharacterOpenParen,
 141 /*  41 - )                  */ CharacterCloseParen,
 142 /*  42 - *                  */ CharacterMultiply,
 143 /*  43 - +                  */ CharacterAdd,
 144 /*  44 - ,                  */ CharacterComma,
 145 /*  45 - -                  */ CharacterSub,
 146 /*  46 - .                  */ CharacterDot,
 147 /*  47 - /                  */ CharacterSlash,
 148 /*  48 - 0                  */ CharacterZero,
 149 /*  49 - 1                  */ CharacterNumber,
 150 /*  50 - 2                  */ CharacterNumber,
 151 /*  51 - 3                  */ CharacterNumber,
 152 /*  52 - 4                  */ CharacterNumber,
 153 /*  53 - 5                  */ CharacterNumber,
 154 /*  54 - 6                  */ CharacterNumber,
 155 /*  55 - 7                  */ CharacterNumber,
 156 /*  56 - 8                  */ CharacterNumber,
 157 /*  57 - 9                  */ CharacterNumber,
 158 /*  58 - :                  */ CharacterColon,
 159 /*  59 - ;                  */ CharacterSemicolon,
 160 /*  60 - <                  */ CharacterLess,
 161 /*  61 - =                  */ CharacterEqual,
 162 /*  62 - >                  */ CharacterGreater,
 163 /*  63 - ?                  */ CharacterQuestion,
 164 /*  64 - @                  */ CharacterPrivateIdentifierStart,
 165 /*  65 - A                  */ CharacterIdentifierStart,
 166 /*  66 - B                  */ CharacterIdentifierStart,
 167 /*  67 - C                  */ CharacterIdentifierStart,
 168 /*  68 - D                  */ CharacterIdentifierStart,
 169 /*  69 - E                  */ CharacterIdentifierStart,
 170 /*  70 - F                  */ CharacterIdentifierStart,
 171 /*  71 - G                  */ CharacterIdentifierStart,
 172 /*  72 - H                  */ CharacterIdentifierStart,
 173 /*  73 - I                  */ CharacterIdentifierStart,
 174 /*  74 - J                  */ CharacterIdentifierStart,
 175 /*  75 - K                  */ CharacterIdentifierStart,
 176 /*  76 - L                  */ CharacterIdentifierStart,
 177 /*  77 - M                  */ CharacterIdentifierStart,
 178 /*  78 - N                  */ CharacterIdentifierStart,
 179 /*  79 - O                  */ CharacterIdentifierStart,
 180 /*  80 - P                  */ CharacterIdentifierStart,
 181 /*  81 - Q                  */ CharacterIdentifierStart,
 182 /*  82 - R                  */ CharacterIdentifierStart,
 183 /*  83 - S                  */ CharacterIdentifierStart,
 184 /*  84 - T                  */ CharacterIdentifierStart,
 185 /*  85 - U                  */ CharacterIdentifierStart,
 186 /*  86 - V                  */ CharacterIdentifierStart,
 187 /*  87 - W                  */ CharacterIdentifierStart,
 188 /*  88 - X                  */ CharacterIdentifierStart,
 189 /*  89 - Y                  */ CharacterIdentifierStart,
 190 /*  90 - Z                  */ CharacterIdentifierStart,
 191 /*  91 - [                  */ CharacterOpenBracket,
 192 /*  92 - \                  */ CharacterBackSlash,
 193 /*  93 - ]                  */ CharacterCloseBracket,
 194 /*  94 - ^                  */ CharacterXor,
 195 /*  95 - _                  */ CharacterIdentifierStart,
 196 /*  96 - `                  */ CharacterInvalid,
 197 /*  97 - a                  */ CharacterIdentifierStart,
 198 /*  98 - b                  */ CharacterIdentifierStart,
 199 /*  99 - c                  */ CharacterIdentifierStart,
 200 /* 100 - d                  */ CharacterIdentifierStart,
 201 /* 101 - e                  */ CharacterIdentifierStart,
 202 /* 102 - f                  */ CharacterIdentifierStart,
 203 /* 103 - g                  */ CharacterIdentifierStart,
 204 /* 104 - h                  */ CharacterIdentifierStart,
 205 /* 105 - i                  */ CharacterIdentifierStart,
 206 /* 106 - j                  */ CharacterIdentifierStart,
 207 /* 107 - k                  */ CharacterIdentifierStart,
 208 /* 108 - l                  */ CharacterIdentifierStart,
 209 /* 109 - m                  */ CharacterIdentifierStart,
 210 /* 110 - n                  */ CharacterIdentifierStart,
 211 /* 111 - o                  */ CharacterIdentifierStart,
 212 /* 112 - p                  */ CharacterIdentifierStart,
 213 /* 113 - q                  */ CharacterIdentifierStart,
 214 /* 114 - r                  */ CharacterIdentifierStart,
 215 /* 115 - s                  */ CharacterIdentifierStart,
 216 /* 116 - t                  */ CharacterIdentifierStart,
 217 /* 117 - u                  */ CharacterIdentifierStart,
 218 /* 118 - v                  */ CharacterIdentifierStart,
 219 /* 119 - w                  */ CharacterIdentifierStart,
 220 /* 120 - x                  */ CharacterIdentifierStart,
 221 /* 121 - y                  */ CharacterIdentifierStart,
 222 /* 122 - z                  */ CharacterIdentifierStart,
 223 /* 123 - {                  */ CharacterOpenBrace,
 224 /* 124 - |                  */ CharacterOr,
 225 /* 125 - }                  */ CharacterCloseBrace,
 226 /* 126 - ~                  */ CharacterTilde,
 227 /* 127 - Delete             */ CharacterInvalid,
 228 /* 128 - Cc category        */ CharacterInvalid,
 229 /* 129 - Cc category        */ CharacterInvalid,
 230 /* 130 - Cc category        */ CharacterInvalid,
 231 /* 131 - Cc category        */ CharacterInvalid,
 232 /* 132 - Cc category        */ CharacterInvalid,
 233 /* 133 - Cc category        */ CharacterInvalid,
 234 /* 134 - Cc category        */ CharacterInvalid,
 235 /* 135 - Cc category        */ CharacterInvalid,
 236 /* 136 - Cc category        */ CharacterInvalid,
 237 /* 137 - Cc category        */ CharacterInvalid,
 238 /* 138 - Cc category        */ CharacterInvalid,
 239 /* 139 - Cc category        */ CharacterInvalid,
 240 /* 140 - Cc category        */ CharacterInvalid,
 241 /* 141 - Cc category        */ CharacterInvalid,
 242 /* 142 - Cc category        */ CharacterInvalid,
 243 /* 143 - Cc category        */ CharacterInvalid,
 244 /* 144 - Cc category        */ CharacterInvalid,
 245 /* 145 - Cc category        */ CharacterInvalid,
 246 /* 146 - Cc category        */ CharacterInvalid,
 247 /* 147 - Cc category        */ CharacterInvalid,
 248 /* 148 - Cc category        */ CharacterInvalid,
 249 /* 149 - Cc category        */ CharacterInvalid,
 250 /* 150 - Cc category        */ CharacterInvalid,
 251 /* 151 - Cc category        */ CharacterInvalid,
 252 /* 152 - Cc category        */ CharacterInvalid,
 253 /* 153 - Cc category        */ CharacterInvalid,
 254 /* 154 - Cc category        */ CharacterInvalid,
 255 /* 155 - Cc category        */ CharacterInvalid,
 256 /* 156 - Cc category        */ CharacterInvalid,
 257 /* 157 - Cc category        */ CharacterInvalid,
 258 /* 158 - Cc category        */ CharacterInvalid,
 259 /* 159 - Cc category        */ CharacterInvalid,
 260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
 261 /* 161 - Po category        */ CharacterInvalid,
 262 /* 162 - Sc category        */ CharacterInvalid,
 263 /* 163 - Sc category        */ CharacterInvalid,
 264 /* 164 - Sc category        */ CharacterInvalid,
 265 /* 165 - Sc category        */ CharacterInvalid,
 266 /* 166 - So category        */ CharacterInvalid,
 267 /* 167 - So category        */ CharacterInvalid,
 268 /* 168 - Sk category        */ CharacterInvalid,
 269 /* 169 - So category        */ CharacterInvalid,
 270 /* 170 - Ll category        */ CharacterIdentifierStart,
 271 /* 171 - Pi category        */ CharacterInvalid,
 272 /* 172 - Sm category        */ CharacterInvalid,
 273 /* 173 - Cf category        */ CharacterInvalid,
 274 /* 174 - So category        */ CharacterInvalid,
 275 /* 175 - Sk category        */ CharacterInvalid,
 276 /* 176 - So category        */ CharacterInvalid,
 277 /* 177 - Sm category        */ CharacterInvalid,
 278 /* 178 - No category        */ CharacterInvalid,
 279 /* 179 - No category        */ CharacterInvalid,
 280 /* 180 - Sk category        */ CharacterInvalid,
 281 /* 181 - Ll category        */ CharacterIdentifierStart,
 282 /* 182 - So category        */ CharacterInvalid,
 283 /* 183 - Po category        */ CharacterInvalid,
 284 /* 184 - Sk category        */ CharacterInvalid,
 285 /* 185 - No category        */ CharacterInvalid,
 286 /* 186 - Ll category        */ CharacterIdentifierStart,
 287 /* 187 - Pf category        */ CharacterInvalid,
 288 /* 188 - No category        */ CharacterInvalid,
 289 /* 189 - No category        */ CharacterInvalid,
 290 /* 190 - No category        */ CharacterInvalid,
 291 /* 191 - Po category        */ CharacterInvalid,
 292 /* 192 - Lu category        */ CharacterIdentifierStart,
 293 /* 193 - Lu category        */ CharacterIdentifierStart,
 294 /* 194 - Lu category        */ CharacterIdentifierStart,
 295 /* 195 - Lu category        */ CharacterIdentifierStart,
 296 /* 196 - Lu category        */ CharacterIdentifierStart,
 297 /* 197 - Lu category        */ CharacterIdentifierStart,
 298 /* 198 - Lu category        */ CharacterIdentifierStart,
 299 /* 199 - Lu category        */ CharacterIdentifierStart,
 300 /* 200 - Lu category        */ CharacterIdentifierStart,
 301 /* 201 - Lu category        */ CharacterIdentifierStart,
 302 /* 202 - Lu category        */ CharacterIdentifierStart,
 303 /* 203 - Lu category        */ CharacterIdentifierStart,
 304 /* 204 - Lu category        */ CharacterIdentifierStart,
 305 /* 205 - Lu category        */ CharacterIdentifierStart,
 306 /* 206 - Lu category        */ CharacterIdentifierStart,
 307 /* 207 - Lu category        */ CharacterIdentifierStart,
 308 /* 208 - Lu category        */ CharacterIdentifierStart,
 309 /* 209 - Lu category        */ CharacterIdentifierStart,
 310 /* 210 - Lu category        */ CharacterIdentifierStart,
 311 /* 211 - Lu category        */ CharacterIdentifierStart,
 312 /* 212 - Lu category        */ CharacterIdentifierStart,
 313 /* 213 - Lu category        */ CharacterIdentifierStart,
 314 /* 214 - Lu category        */ CharacterIdentifierStart,
 315 /* 215 - Sm category        */ CharacterInvalid,
 316 /* 216 - Lu category        */ CharacterIdentifierStart,
 317 /* 217 - Lu category        */ CharacterIdentifierStart,
 318 /* 218 - Lu category        */ CharacterIdentifierStart,
 319 /* 219 - Lu category        */ CharacterIdentifierStart,
 320 /* 220 - Lu category        */ CharacterIdentifierStart,
 321 /* 221 - Lu category        */ CharacterIdentifierStart,
 322 /* 222 - Lu category        */ CharacterIdentifierStart,
 323 /* 223 - Ll category        */ CharacterIdentifierStart,
 324 /* 224 - Ll category        */ CharacterIdentifierStart,
 325 /* 225 - Ll category        */ CharacterIdentifierStart,
 326 /* 226 - Ll category        */ CharacterIdentifierStart,
 327 /* 227 - Ll category        */ CharacterIdentifierStart,
 328 /* 228 - Ll category        */ CharacterIdentifierStart,
 329 /* 229 - Ll category        */ CharacterIdentifierStart,
 330 /* 230 - Ll category        */ CharacterIdentifierStart,
 331 /* 231 - Ll category        */ CharacterIdentifierStart,
 332 /* 232 - Ll category        */ CharacterIdentifierStart,
 333 /* 233 - Ll category        */ CharacterIdentifierStart,
 334 /* 234 - Ll category        */ CharacterIdentifierStart,
 335 /* 235 - Ll category        */ CharacterIdentifierStart,
 336 /* 236 - Ll category        */ CharacterIdentifierStart,
 337 /* 237 - Ll category        */ CharacterIdentifierStart,
 338 /* 238 - Ll category        */ CharacterIdentifierStart,
 339 /* 239 - Ll category        */ CharacterIdentifierStart,
 340 /* 240 - Ll category        */ CharacterIdentifierStart,
 341 /* 241 - Ll category        */ CharacterIdentifierStart,
 342 /* 242 - Ll category        */ CharacterIdentifierStart,
 343 /* 243 - Ll category        */ CharacterIdentifierStart,
 344 /* 244 - Ll category        */ CharacterIdentifierStart,
 345 /* 245 - Ll category        */ CharacterIdentifierStart,
 346 /* 246 - Ll category        */ CharacterIdentifierStart,
 347 /* 247 - Sm category        */ CharacterInvalid,
 348 /* 248 - Ll category        */ CharacterIdentifierStart,
 349 /* 249 - Ll category        */ CharacterIdentifierStart,
 350 /* 250 - Ll category        */ CharacterIdentifierStart,
 351 /* 251 - Ll category        */ CharacterIdentifierStart,
 352 /* 252 - Ll category        */ CharacterIdentifierStart,
 353 /* 253 - Ll category        */ CharacterIdentifierStart,
 354 /* 254 - Ll category        */ CharacterIdentifierStart,
 355 /* 255 - Ll category        */ CharacterIdentifierStart
 356 };
 357
 358 // This table provides the character that results from \X where X is the index in the table beginning
 359 // with SPACE. A table value of 0 means that more processing needs to be done.
 360 static const LChar singleCharacterEscapeValuesForASCII[128] = {
 361 /*   0 - Null               */ 0,
 362 /*   1 - Start of Heading   */ 0,
 363 /*   2 - Start of Text      */ 0,
 364 /*   3 - End of Text        */ 0,
 365 /*   4 - End of Transm.     */ 0,
 366 /*   5 - Enquiry            */ 0,
 367 /*   6 - Acknowledgment     */ 0,
 368 /*   7 - Bell               */ 0,
 369 /*   8 - Back Space         */ 0,
 370 /*   9 - Horizontal Tab     */ 0,
 371 /*  10 - Line Feed          */ 0,
 372 /*  11 - Vertical Tab       */ 0,
 373 /*  12 - Form Feed          */ 0,
 374 /*  13 - Carriage Return    */ 0,
 375 /*  14 - Shift Out          */ 0,
 376 /*  15 - Shift In           */ 0,
 377 /*  16 - Data Line Escape   */ 0,
 378 /*  17 - Device Control 1   */ 0,
 379 /*  18 - Device Control 2   */ 0,
 380 /*  19 - Device Control 3   */ 0,
 381 /*  20 - Device Control 4   */ 0,
 382 /*  21 - Negative Ack.      */ 0,
 383 /*  22 - Synchronous Idle   */ 0,
 384 /*  23 - End of Transmit    */ 0,
 385 /*  24 - Cancel             */ 0,
 386 /*  25 - End of Medium      */ 0,
 387 /*  26 - Substitute         */ 0,
 388 /*  27 - Escape             */ 0,
 389 /*  28 - File Separator     */ 0,
 390 /*  29 - Group Separator    */ 0,
 391 /*  30 - Record Separator   */ 0,
 392 /*  31 - Unit Separator     */ 0,
 393 /*  32 - Space              */ ' ',
 394 /*  33 - !                  */ '!',
 395 /*  34 - "                  */ '"',
 396 /*  35 - #                  */ '#',
 397 /*  36 - $                  */ '$',
 398 /*  37 - %                  */ '%',
 399 /*  38 - &                  */ '&',
 400 /*  39 - '                  */ '\'',
 401 /*  40 - (                  */ '(',
 402 /*  41 - )                  */ ')',
 403 /*  42 - *                  */ '*',
 404 /*  43 - +                  */ '+',
 405 /*  44 - ,                  */ ',',
 406 /*  45 - -                  */ '-',
 407 /*  46 - .                  */ '.',
 408 /*  47 - /                  */ '/',
 409 /*  48 - 0                  */ 0,
 410 /*  49 - 1                  */ 0,
 411 /*  50 - 2                  */ 0,
 412 /*  51 - 3                  */ 0,
 413 /*  52 - 4                  */ 0,
 414 /*  53 - 5                  */ 0,
 415 /*  54 - 6                  */ 0,
 416 /*  55 - 7                  */ 0,
 417 /*  56 - 8                  */ 0,
 418 /*  57 - 9                  */ 0,
 419 /*  58 - :                  */ ':',
 420 /*  59 - ;                  */ ';',
 421 /*  60 - <                  */ '<',
 422 /*  61 - =                  */ '=',
 423 /*  62 - >                  */ '>',
 424 /*  63 - ?                  */ '?',
 425 /*  64 - @                  */ '@',
 426 /*  65 - A                  */ 'A',
 427 /*  66 - B                  */ 'B',
 428 /*  67 - C                  */ 'C',
 429 /*  68 - D                  */ 'D',
 430 /*  69 - E                  */ 'E',
 431 /*  70 - F                  */ 'F',
 432 /*  71 - G                  */ 'G',
 433 /*  72 - H                  */ 'H',
 434 /*  73 - I                  */ 'I',
 435 /*  74 - J                  */ 'J',
 436 /*  75 - K                  */ 'K',
 437 /*  76 - L                  */ 'L',
 438 /*  77 - M                  */ 'M',
 439 /*  78 - N                  */ 'N',
 440 /*  79 - O                  */ 'O',
 441 /*  80 - P                  */ 'P',
 442 /*  81 - Q                  */ 'Q',
 443 /*  82 - R                  */ 'R',
 444 /*  83 - S                  */ 'S',
 445 /*  84 - T                  */ 'T',
 446 /*  85 - U                  */ 'U',
 447 /*  86 - V                  */ 'V',
 448 /*  87 - W                  */ 'W',
 449 /*  88 - X                  */ 'X',
 450 /*  89 - Y                  */ 'Y',
 451 /*  90 - Z                  */ 'Z',
 452 /*  91 - [                  */ '[',
 453 /*  92 - \                  */ '\\',
 454 /*  93 - ]                  */ ']',
 455 /*  94 - ^                  */ '^',
 456 /*  95 - _                  */ '_',
 457 /*  96 - `                  */ '`',
 458 /*  97 - a                  */ 'a',
 459 /*  98 - b                  */ 0x08,
 460 /*  99 - c                  */ 'c',
 461 /* 100 - d                  */ 'd',
 462 /* 101 - e                  */ 'e',
 463 /* 102 - f                  */ 0x0C,
 464 /* 103 - g                  */ 'g',
 465 /* 104 - h                  */ 'h',
 466 /* 105 - i                  */ 'i',
 467 /* 106 - j                  */ 'j',
 468 /* 107 - k                  */ 'k',
 469 /* 108 - l                  */ 'l',
 470 /* 109 - m                  */ 'm',
 471 /* 110 - n                  */ 0x0A,
 472 /* 111 - o                  */ 'o',
 473 /* 112 - p                  */ 'p',
 474 /* 113 - q                  */ 'q',
 475 /* 114 - r                  */ 0x0D,
 476 /* 115 - s                  */ 's',
 477 /* 116 - t                  */ 0x09,
 478 /* 117 - u                  */ 0,
 479 /* 118 - v                  */ 0x0B,
 480 /* 119 - w                  */ 'w',
 481 /* 120 - x                  */ 0,
 482 /* 121 - y                  */ 'y',
 483 /* 122 - z                  */ 'z',
 484 /* 123 - {                  */ '{',
 485 /* 124 - |                  */ '|',
 486 /* 125 - }                  */ '}',
 487 /* 126 - ~                  */ '~',
 488 /* 127 - Delete             */ 0
 489 };
 490
 491 template <typename T>
 492 Lexer<T>::Lexer(VM* vm, JSParserStrictness strictness)
 493     : m_isReparsing(false)
 494     , m_vm(vm)
 495     , m_parsingBuiltinFunction(strictness == JSParseBuiltin)
 496 {
 497 }
 498
 499 template <typename T>
 500 Lexer<T>::~Lexer()
 501 {
 502 }
 503
 504 template <typename T>
 505 String Lexer<T>::invalidCharacterMessage() const
 506 {
 507     switch (m_current) {
 508     case 0:
 509         return "Invalid character: '\\0'";
 510     case 10:
 511         return "Invalid character: '\\n'";
 512     case 11:
 513         return "Invalid character: '\\v'";
 514     case 13:
 515         return "Invalid character: '\\r'";
 516     case 35:
 517         return "Invalid character: '#'";
 518     case 64:
 519         return "Invalid character: '@'";
 520     case 96:
 521         return "Invalid character: '`'";
 522     default:
 523         return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
 524     }
 525 }
 526
 527 template <typename T>
 528 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
 529 {
 530     ASSERT(m_code <= m_codeEnd);
 531     return m_code;
 532 }
 533
 534 template <typename T>
 535 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
 536 {
 537     m_arena = &arena->identifierArena();
 538
 539     m_lineNumber = source.firstLine();
 540     m_lastToken = -1;
 541
 542     const String& sourceString = source.provider()->source();
 543
 544     if (!sourceString.isNull())
 545         setCodeStart(sourceString.impl());
 546     else
 547         m_codeStart = 0;
 548
 549     m_source = &source;
 550     m_sourceOffset = source.startOffset();
 551     m_codeStartPlusOffset = m_codeStart + source.startOffset();
 552     m_code = m_codeStartPlusOffset;
 553     m_codeEnd = m_codeStart + source.endOffset();
 554     m_error = false;
 555     m_atLineStart = true;
 556     m_lineStart = m_code;
 557     m_lexErrorMessage = String();
 558
 559     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
 560     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
 561
 562     if (LIKELY(m_code < m_codeEnd))
 563         m_current = *m_code;
 564     else
 565         m_current = 0;
 566     ASSERT(currentOffset() == source.startOffset());
 567 }
 568
 569 template <typename T>
 570 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
 571 {
 572     m_code += shiftAmount;
 573     ASSERT(currentOffset() >= currentLineStartOffset());
 574     m_current = *m_code;
 575 }
 576
 577 template <typename T>
 578 ALWAYS_INLINE void Lexer<T>::shift()
 579 {
 580     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
 581     m_current = 0;
 582     ++m_code;
 583     if (LIKELY(m_code < m_codeEnd))
 584         m_current = *m_code;
 585 }
 586
 587 template <typename T>
 588 ALWAYS_INLINE bool Lexer<T>::atEnd() const
 589 {
 590     ASSERT(!m_current || m_code < m_codeEnd);
 591     return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
 592 }
 593
 594 template <typename T>
 595 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
 596 {
 597     ASSERT(offset > 0 && offset < 5);
 598     const T* code = m_code + offset;
 599     return (code < m_codeEnd) ? *code : 0;
 600 }
 601
 602 template <typename T>
 603 typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
 604 {
 605     T char1 = peek(1);
 606     T char2 = peek(2);
 607     T char3 = peek(3);
 608
 609     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
 610         return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
 611
 612     int result = convertUnicode(m_current, char1, char2, char3);
 613     shift();
 614     shift();
 615     shift();
 616     shift();
 617     return UnicodeHexValue(result);
 618 }
 619
 620 template <typename T>
 621 void Lexer<T>::shiftLineTerminator()
 622 {
 623     ASSERT(isLineTerminator(m_current));
 624
 625     m_positionBeforeLastNewline = currentPosition();
 626     T prev = m_current;
 627     shift();
 628
 629     // Allow both CRLF and LFCR.
 630     if (prev + m_current == '\n' + '\r')
 631         shift();
 632
 633     ++m_lineNumber;
 634 }
 635
 636 template <typename T>
 637 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
 638 {
 639     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 640 }
 641
 642 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
 643 {
 644     return U_GET_GC_MASK(c) & U_GC_L_MASK;
 645 }
 646
 647 static ALWAYS_INLINE bool isLatin1(LChar)
 648 {
 649     return true;
 650 }
 651
 652 static ALWAYS_INLINE bool isLatin1(UChar c)
 653 {
 654     return c < 256;
 655 }
 656
 657 static inline bool isIdentStart(LChar c)
 658 {
 659     return typesOfLatin1Characters[c] == CharacterIdentifierStart;
 660 }
 661
 662 static inline bool isIdentStart(UChar c)
 663 {
 664     return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
 665 }
 666
 667 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
 668 {
 669     return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
 670 }
 671
 672 static ALWAYS_INLINE bool isIdentPart(LChar c)
 673 {
 674     // Character types are divided into two groups depending on whether they can be part of an
 675     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
 676     // part of an identifier. (See the CharacterType definition for more details.)
 677     return typesOfLatin1Characters[c] <= CharacterNumber;
 678 }
 679
 680 static ALWAYS_INLINE bool isIdentPart(UChar c)
 681 {
 682     return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
 683 }
 684
 685 static inline LChar singleEscape(int c)
 686 {
 687     if (c < 128) {
 688         ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
 689         return singleCharacterEscapeValuesForASCII[c];
 690     }
 691     return 0;
 692 }
 693
 694 template <typename T>
 695 inline void Lexer<T>::record8(int c)
 696 {
 697     ASSERT(c >= 0);
 698     ASSERT(c <= 0xFF);
 699     m_buffer8.append(static_cast<LChar>(c));
 700 }
 701
 702 template <typename T>
 703 inline void assertCharIsIn8BitRange(T c)
 704 {
 705     UNUSED_PARAM(c);
 706     ASSERT(c >= 0);
 707     ASSERT(c <= 0xFF);
 708 }
 709
 710 template <>
 711 inline void assertCharIsIn8BitRange(UChar c)
 712 {
 713     UNUSED_PARAM(c);
 714     ASSERT(c <= 0xFF);
 715 }
 716
 717 template <>
 718 inline void assertCharIsIn8BitRange(LChar)
 719 {
 720 }
 721
 722 template <typename T>
 723 inline void Lexer<T>::append8(const T* p, size_t length)
 724 {
 725     size_t currentSize = m_buffer8.size();
 726     m_buffer8.grow(currentSize + length);
 727     LChar* rawBuffer = m_buffer8.data() + currentSize;
 728
 729     for (size_t i = 0; i < length; i++) {
 730         T c = p[i];
 731         assertCharIsIn8BitRange(c);
 732         rawBuffer[i] = c;
 733     }
 734 }
 735
 736 template <typename T>
 737 inline void Lexer<T>::append16(const LChar* p, size_t length)
 738 {
 739     size_t currentSize = m_buffer16.size();
 740     m_buffer16.grow(currentSize + length);
 741     UChar* rawBuffer = m_buffer16.data() + currentSize;
 742
 743     for (size_t i = 0; i < length; i++)
 744         rawBuffer[i] = p[i];
 745 }
 746
 747 template <typename T>
 748 inline void Lexer<T>::record16(T c)
 749 {
 750     m_buffer16.append(c);
 751 }
 752
 753 template <typename T>
 754 inline void Lexer<T>::record16(int c)
 755 {
 756     ASSERT(c >= 0);
 757     ASSERT(c <= static_cast<int>(USHRT_MAX));
 758     m_buffer16.append(static_cast<UChar>(c));
 759 }
 760
 761 #if !ASSERT_DISABLED
 762 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
 763 {
 764     if (!ident)
 765         return true;
 766     /* Just block any use of suspicious identifiers.  This is intended to
 767      * be used as a safety net while implementing builtins.
 768      */
 769     if (*ident == vm.propertyNames->builtinNames().callPublicName())
 770         return false;
 771     if (*ident == vm.propertyNames->builtinNames().applyPublicName())
 772         return false;
 773     if (*ident == vm.propertyNames->eval)
 774         return false;
 775     if (*ident == vm.propertyNames->Function)
 776         return false;
 777     return true;
 778 }
 779 #endif
 780
 781 template <>
 782 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 783 {
 784     const ptrdiff_t remaining = m_codeEnd - m_code;
 785     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
 786         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
 787         if (keyword != IDENT) {
 788             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
 789             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
 790         }
 791     }
 792
 793     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
 794     if (isPrivateName)
 795         shift();
 796
 797     const LChar* identifierStart = currentSourcePtr();
 798     unsigned identifierLineStart = currentLineStartOffset();
 799
 800     while (isIdentPart(m_current))
 801         shift();
 802
 803     if (UNLIKELY(m_current == '\\')) {
 804         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
 805         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
 806     }
 807
 808     const Identifier* ident = 0;
 809
 810     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
 811         int identifierLength = currentSourcePtr() - identifierStart;
 812         ident = makeIdentifier(identifierStart, identifierLength);
 813         if (m_parsingBuiltinFunction) {
 814             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
 815                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
 816                 return ERRORTOK;
 817             }
 818             if (isPrivateName)
 819                 ident = m_vm->propertyNames->getPrivateName(*ident);
 820             else if (*ident == m_vm->propertyNames->undefinedKeyword)
 821                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
 822             if (!ident)
 823                 return INVALID_PRIVATE_NAME_ERRORTOK;
 824         }
 825         tokenData->ident = ident;
 826     } else
 827         tokenData->ident = 0;
 828
 829     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
 830         ASSERT(shouldCreateIdentifier);
 831         if (remaining < maxTokenLength) {
 832             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
 833             ASSERT((remaining < maxTokenLength) || !entry);
 834             if (!entry)
 835                 return IDENT;
 836             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
 837             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
 838         }
 839         return IDENT;
 840     }
 841
 842     return IDENT;
 843 }
 844
 845 template <>
 846 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 847 {
 848     const ptrdiff_t remaining = m_codeEnd - m_code;
 849     if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
 850         JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
 851         if (keyword != IDENT) {
 852             ASSERT((!shouldCreateIdentifier) || tokenData->ident);
 853             return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
 854         }
 855     }
 856
 857     bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
 858     if (isPrivateName)
 859         shift();
 860
 861     const UChar* identifierStart = currentSourcePtr();
 862     int identifierLineStart = currentLineStartOffset();
 863
 864     UChar orAllChars = 0;
 865
 866     while (isIdentPart(m_current)) {
 867         orAllChars |= m_current;
 868         shift();
 869     }
 870
 871     if (UNLIKELY(m_current == '\\')) {
 872         ASSERT(!isPrivateName);
 873         setOffsetFromSourcePtr(identifierStart, identifierLineStart);
 874         return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
 875     }
 876
 877     bool isAll8Bit = false;
 878
 879     if (!(orAllChars & ~0xff))
 880         isAll8Bit = true;
 881
 882     const Identifier* ident = 0;
 883
 884     if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
 885         int identifierLength = currentSourcePtr() - identifierStart;
 886         if (isAll8Bit)
 887             ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
 888         else
 889             ident = makeIdentifier(identifierStart, identifierLength);
 890         if (m_parsingBuiltinFunction) {
 891             if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
 892                 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
 893                 return ERRORTOK;
 894             }
 895             if (isPrivateName)
 896                 ident = m_vm->propertyNames->getPrivateName(*ident);
 897             else if (*ident == m_vm->propertyNames->undefinedKeyword)
 898                 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
 899             if (!ident)
 900                 return INVALID_PRIVATE_NAME_ERRORTOK;
 901         }
 902         tokenData->ident = ident;
 903     } else
 904         tokenData->ident = 0;
 905
 906     if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
 907         ASSERT(shouldCreateIdentifier);
 908         if (remaining < maxTokenLength) {
 909             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
 910             ASSERT((remaining < maxTokenLength) || !entry);
 911             if (!entry)
 912                 return IDENT;
 913             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
 914             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
 915         }
 916         return IDENT;
 917     }
 918
 919     return IDENT;
 920 }
 921
 922 template <typename T>
 923 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
 924 {
 925     const ptrdiff_t remaining = m_codeEnd - m_code;
 926     const T* identifierStart = currentSourcePtr();
 927     bool bufferRequired = false;
 928
 929     while (true) {
 930         if (LIKELY(isIdentPart(m_current))) {
 931             shift();
 932             continue;
 933         }
 934         if (LIKELY(m_current != '\\'))
 935             break;
 936
 937         // \uXXXX unicode characters.
 938         bufferRequired = true;
 939         if (identifierStart != currentSourcePtr())
 940             m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
 941         shift();
 942         if (UNLIKELY(m_current != 'u'))
 943             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
 944         shift();
 945         UnicodeHexValue character = parseFourDigitUnicodeHex();
 946         if (UNLIKELY(!character.isValid()))
 947             return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
 948         UChar ucharacter = static_cast<UChar>(character.value());
 949         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
 950             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
 951         if (shouldCreateIdentifier)
 952             record16(ucharacter);
 953         identifierStart = currentSourcePtr();
 954     }
 955
 956     int identifierLength;
 957     const Identifier* ident = 0;
 958     if (shouldCreateIdentifier) {
 959         if (!bufferRequired) {
 960             identifierLength = currentSourcePtr() - identifierStart;
 961             ident = makeIdentifier(identifierStart, identifierLength);
 962         } else {
 963             if (identifierStart != currentSourcePtr())
 964                 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
 965             ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 966         }
 967
 968         tokenData->ident = ident;
 969     } else
 970         tokenData->ident = 0;
 971
 972     if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
 973         ASSERT(shouldCreateIdentifier);
 974         // Keywords must not be recognized if there was an \uXXXX in the identifier.
 975         if (remaining < maxTokenLength) {
 976             const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
 977             ASSERT((remaining < maxTokenLength) || !entry);
 978             if (!entry)
 979                 return IDENT;
 980             JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
 981             return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
 982         }
 983         return IDENT;
 984     }
 985
 986     m_buffer16.resize(0);
 987     return IDENT;
 988 }
 989
 990 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
 991 {
 992     return character < 0xE;
 993 }
 994
 995 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
 996 {
 997     return character < 0xE || character > 0xFF;
 998 }
 999
1000 template <typename T>
1001 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1002 {
1003     int startingOffset = currentOffset();
1004     int startingLineStartOffset = currentLineStartOffset();
1005     int startingLineNumber = lineNumber();
1006     T stringQuoteCharacter = m_current;
1007     shift();
1008
1009     const T* stringStart = currentSourcePtr();
1010
1011     while (m_current != stringQuoteCharacter) {
1012         if (UNLIKELY(m_current == '\\')) {
1013             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1014                 append8(stringStart, currentSourcePtr() - stringStart);
1015             shift();
1016
1017             LChar escape = singleEscape(m_current);
1018
1019             // Most common escape sequences first
1020             if (escape) {
1021                 if (shouldBuildStrings)
1022                     record8(escape);
1023                 shift();
1024             } else if (UNLIKELY(isLineTerminator(m_current)))
1025                 shiftLineTerminator();
1026             else if (m_current == 'x') {
1027                 shift();
1028                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1029                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
1030                     return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1031                 }
1032                 T prev = m_current;
1033                 shift();
1034                 if (shouldBuildStrings)
1035                     record8(convertHex(prev, m_current));
1036                 shift();
1037             } else {
1038                 setOffset(startingOffset, startingLineStartOffset);
1039                 setLineNumber(startingLineNumber);
1040                 m_buffer8.resize(0);
1041                 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1042             }
1043             stringStart = currentSourcePtr();
1044             continue;
1045         }
1046
1047         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1048             setOffset(startingOffset, startingLineStartOffset);
1049             setLineNumber(startingLineNumber);
1050             m_buffer8.resize(0);
1051             return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1052         }
1053
1054         shift();
1055     }
1056
1057     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1058         append8(stringStart, currentSourcePtr() - stringStart);
1059     if (shouldBuildStrings) {
1060         tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1061         m_buffer8.resize(0);
1062     } else
1063         tokenData->ident = 0;
1064
1065     return StringParsedSuccessfully;
1066 }
1067
1068 template <typename T>
1069 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
1070 {
1071     T stringQuoteCharacter = m_current;
1072     shift();
1073
1074     const T* stringStart = currentSourcePtr();
1075
1076     while (m_current != stringQuoteCharacter) {
1077         if (UNLIKELY(m_current == '\\')) {
1078             if (stringStart != currentSourcePtr() && shouldBuildStrings)
1079                 append16(stringStart, currentSourcePtr() - stringStart);
1080             shift();
1081
1082             LChar escape = singleEscape(m_current);
1083
1084             // Most common escape sequences first
1085             if (escape) {
1086                 if (shouldBuildStrings)
1087                     record16(escape);
1088                 shift();
1089             } else if (UNLIKELY(isLineTerminator(m_current)))
1090                 shiftLineTerminator();
1091             else if (m_current == 'x') {
1092                 shift();
1093                 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1094                     m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
1095                     return StringCannotBeParsed;
1096                 }
1097                 T prev = m_current;
1098                 shift();
1099                 if (shouldBuildStrings)
1100                     record16(convertHex(prev, m_current));
1101                 shift();
1102             } else if (m_current == 'u') {
1103                 shift();
1104                 UnicodeHexValue character = parseFourDigitUnicodeHex();
1105                 if (character.isValid()) {
1106                     if (shouldBuildStrings)
1107                         record16(character.value());
1108                 } else if (m_current == stringQuoteCharacter) {
1109                     if (shouldBuildStrings)
1110                         record16('u');
1111                 } else {
1112                     m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
1113                     return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
1114                 }
1115             } else if (strictMode && isASCIIDigit(m_current)) {
1116                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1117                 int character1 = m_current;
1118                 shift();
1119                 if (character1 != '0' || isASCIIDigit(m_current)) {
1120                     m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
1121                     return StringCannotBeParsed;
1122                 }
1123                 if (shouldBuildStrings)
1124                     record16(0);
1125             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
1126                 // Octal character sequences
1127                 T character1 = m_current;
1128                 shift();
1129                 if (isASCIIOctalDigit(m_current)) {
1130                     // Two octal characters
1131                     T character2 = m_current;
1132                     shift();
1133                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1134                         if (shouldBuildStrings)
1135                             record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1136                         shift();
1137                     } else {
1138                         if (shouldBuildStrings)
1139                             record16((character1 - '0') * 8 + character2 - '0');
1140                     }
1141                 } else {
1142                     if (shouldBuildStrings)
1143                         record16(character1 - '0');
1144                 }
1145             } else if (!atEnd()) {
1146                 if (shouldBuildStrings)
1147                     record16(m_current);
1148                 shift();
1149             } else {
1150                 m_lexErrorMessage = "Unterminated string constant";
1151                 return StringUnterminated;
1152             }
1153
1154             stringStart = currentSourcePtr();
1155             continue;
1156         }
1157         // Fast check for characters that require special handling.
1158         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1159         // as possible, and lets through all common ASCII characters.
1160         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1161             // New-line or end of input is not allowed
1162             if (atEnd() || isLineTerminator(m_current)) {
1163                 m_lexErrorMessage = "Unexpected EOF";
1164                 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1165             }
1166             // Anything else is just a normal character
1167         }
1168         shift();
1169     }
1170
1171     if (currentSourcePtr() != stringStart && shouldBuildStrings)
1172         append16(stringStart, currentSourcePtr() - stringStart);
1173     if (shouldBuildStrings)
1174         tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1175     else
1176         tokenData->ident = 0;
1177
1178     m_buffer16.resize(0);
1179     return StringParsedSuccessfully;
1180 }
1181
1182 template <typename T>
1183 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1184 {
1185     // Optimization: most hexadecimal values fit into 4 bytes.
1186     uint32_t hexValue = 0;
1187     int maximumDigits = 7;
1188
1189     // Shift out the 'x' prefix.
1190     shift();
1191
1192     do {
1193         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1194         shift();
1195         --maximumDigits;
1196     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1197
1198     if (maximumDigits >= 0) {
1199         returnValue = hexValue;
1200         return;
1201     }
1202
1203     // No more place in the hexValue buffer.
1204     // The values are shifted out and placed into the m_buffer8 vector.
1205     for (int i = 0; i < 8; ++i) {
1206          int digit = hexValue >> 28;
1207          if (digit < 10)
1208              record8(digit + '0');
1209          else
1210              record8(digit - 10 + 'a');
1211          hexValue <<= 4;
1212     }
1213
1214     while (isASCIIHexDigit(m_current)) {
1215         record8(m_current);
1216         shift();
1217     }
1218
1219     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1220 }
1221
1222 template <typename T>
1223 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1224 {
1225     // Optimization: most octal values fit into 4 bytes.
1226     uint32_t octalValue = 0;
1227     int maximumDigits = 9;
1228     // Temporary buffer for the digits. Makes easier
1229     // to reconstruct the input characters when needed.
1230     LChar digits[10];
1231
1232     do {
1233         octalValue = octalValue * 8 + (m_current - '0');
1234         digits[maximumDigits] = m_current;
1235         shift();
1236         --maximumDigits;
1237     } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
1238
1239     if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
1240         returnValue = octalValue;
1241         return true;
1242     }
1243
1244     for (int i = 9; i > maximumDigits; --i)
1245          record8(digits[i]);
1246
1247     while (isASCIIOctalDigit(m_current)) {
1248         record8(m_current);
1249         shift();
1250     }
1251
1252     if (isASCIIDigit(m_current))
1253         return false;
1254
1255     returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1256     return true;
1257 }
1258
1259 template <typename T>
1260 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1261 {
1262     // Optimization: most decimal values fit into 4 bytes.
1263     uint32_t decimalValue = 0;
1264
1265     // Since parseOctal may be executed before parseDecimal,
1266     // the m_buffer8 may hold ascii digits.
1267     if (!m_buffer8.size()) {
1268         int maximumDigits = 9;
1269         // Temporary buffer for the digits. Makes easier
1270         // to reconstruct the input characters when needed.
1271         LChar digits[10];
1272
1273         do {
1274             decimalValue = decimalValue * 10 + (m_current - '0');
1275             digits[maximumDigits] = m_current;
1276             shift();
1277             --maximumDigits;
1278         } while (isASCIIDigit(m_current) && maximumDigits >= 0);
1279
1280         if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1281             returnValue = decimalValue;
1282             return true;
1283         }
1284
1285         for (int i = 9; i > maximumDigits; --i)
1286             record8(digits[i]);
1287     }
1288
1289     while (isASCIIDigit(m_current)) {
1290         record8(m_current);
1291         shift();
1292     }
1293
1294     return false;
1295 }
1296
1297 template <typename T>
1298 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1299 {
1300     record8('.');
1301     while (isASCIIDigit(m_current)) {
1302         record8(m_current);
1303         shift();
1304     }
1305 }
1306
1307 template <typename T>
1308 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1309 {
1310     record8('e');
1311     shift();
1312     if (m_current == '+' || m_current == '-') {
1313         record8(m_current);
1314         shift();
1315     }
1316
1317     if (!isASCIIDigit(m_current))
1318         return false;
1319
1320     do {
1321         record8(m_current);
1322         shift();
1323     } while (isASCIIDigit(m_current));
1324     return true;
1325 }
1326
1327 template <typename T>
1328 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1329 {
1330     while (true) {
1331         while (UNLIKELY(m_current == '*')) {
1332             shift();
1333             if (m_current == '/') {
1334                 shift();
1335                 return true;
1336             }
1337         }
1338
1339         if (atEnd())
1340             return false;
1341
1342         if (isLineTerminator(m_current)) {
1343             shiftLineTerminator();
1344             m_terminator = true;
1345         } else
1346             shift();
1347     }
1348 }
1349
1350 template <typename T>
1351 bool Lexer<T>::nextTokenIsColon()
1352 {
1353     const T* code = m_code;
1354     while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1355         code++;
1356
1357     return code < m_codeEnd && *code == ':';
1358 }
1359
1360 template <typename T>
1361 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1362 {
1363     JSTokenData* tokenData = &tokenRecord->m_data;
1364     JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1365     ASSERT(!m_error);
1366     ASSERT(m_buffer8.isEmpty());
1367     ASSERT(m_buffer16.isEmpty());
1368
1369     JSTokenType token = ERRORTOK;
1370     m_terminator = false;
1371
1372 start:
1373     while (isWhiteSpace(m_current))
1374         shift();
1375
1376     if (atEnd())
1377         return EOFTOK;
1378
1379     tokenLocation->startOffset = currentOffset();
1380     ASSERT(currentOffset() >= currentLineStartOffset());
1381     tokenRecord->m_startPosition = currentPosition();
1382
1383     CharacterType type;
1384     if (LIKELY(isLatin1(m_current)))
1385         type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1386     else if (isNonLatin1IdentStart(m_current))
1387         type = CharacterIdentifierStart;
1388     else if (isLineTerminator(m_current))
1389         type = CharacterLineTerminator;
1390     else
1391         type = CharacterInvalid;
1392
1393     switch (type) {
1394     case CharacterGreater:
1395         shift();
1396         if (m_current == '>') {
1397             shift();
1398             if (m_current == '>') {
1399                 shift();
1400                 if (m_current == '=') {
1401                     shift();
1402                     token = URSHIFTEQUAL;
1403                     break;
1404                 }
1405                 token = URSHIFT;
1406                 break;
1407             }
1408             if (m_current == '=') {
1409                 shift();
1410                 token = RSHIFTEQUAL;
1411                 break;
1412             }
1413             token = RSHIFT;
1414             break;
1415         }
1416         if (m_current == '=') {
1417             shift();
1418             token = GE;
1419             break;
1420         }
1421         token = GT;
1422         break;
1423     case CharacterEqual:
1424         shift();
1425         if (m_current == '=') {
1426             shift();
1427             if (m_current == '=') {
1428                 shift();
1429                 token = STREQ;
1430                 break;
1431             }
1432             token = EQEQ;
1433             break;
1434         }
1435         token = EQUAL;
1436         break;
1437     case CharacterLess:
1438         shift();
1439         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1440             // <!-- marks the beginning of a line comment (for www usage)
1441             goto inSingleLineComment;
1442         }
1443         if (m_current == '<') {
1444             shift();
1445             if (m_current == '=') {
1446                 shift();
1447                 token = LSHIFTEQUAL;
1448                 break;
1449             }
1450             token = LSHIFT;
1451             break;
1452         }
1453         if (m_current == '=') {
1454             shift();
1455             token = LE;
1456             break;
1457         }
1458         token = LT;
1459         break;
1460     case CharacterExclamationMark:
1461         shift();
1462         if (m_current == '=') {
1463             shift();
1464             if (m_current == '=') {
1465                 shift();
1466                 token = STRNEQ;
1467                 break;
1468             }
1469             token = NE;
1470             break;
1471         }
1472         token = EXCLAMATION;
1473         break;
1474     case CharacterAdd:
1475         shift();
1476         if (m_current == '+') {
1477             shift();
1478             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1479             break;
1480         }
1481         if (m_current == '=') {
1482             shift();
1483             token = PLUSEQUAL;
1484             break;
1485         }
1486         token = PLUS;
1487         break;
1488     case CharacterSub:
1489         shift();
1490         if (m_current == '-') {
1491             shift();
1492             if (m_atLineStart && m_current == '>') {
1493                 shift();
1494                 goto inSingleLineComment;
1495             }
1496             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1497             break;
1498         }
1499         if (m_current == '=') {
1500             shift();
1501             token = MINUSEQUAL;
1502             break;
1503         }
1504         token = MINUS;
1505         break;
1506     case CharacterMultiply:
1507         shift();
1508         if (m_current == '=') {
1509             shift();
1510             token = MULTEQUAL;
1511             break;
1512         }
1513         token = TIMES;
1514         break;
1515     case CharacterSlash:
1516         shift();
1517         if (m_current == '/') {
1518             shift();
1519             goto inSingleLineComment;
1520         }
1521         if (m_current == '*') {
1522             shift();
1523             if (parseMultilineComment())
1524                 goto start;
1525             m_lexErrorMessage = "Multiline comment was not closed properly";
1526             token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1527             goto returnError;
1528         }
1529         if (m_current == '=') {
1530             shift();
1531             token = DIVEQUAL;
1532             break;
1533         }
1534         token = DIVIDE;
1535         break;
1536     case CharacterAnd:
1537         shift();
1538         if (m_current == '&') {
1539             shift();
1540             token = AND;
1541             break;
1542         }
1543         if (m_current == '=') {
1544             shift();
1545             token = ANDEQUAL;
1546             break;
1547         }
1548         token = BITAND;
1549         break;
1550     case CharacterXor:
1551         shift();
1552         if (m_current == '=') {
1553             shift();
1554             token = XOREQUAL;
1555             break;
1556         }
1557         token = BITXOR;
1558         break;
1559     case CharacterModulo:
1560         shift();
1561         if (m_current == '=') {
1562             shift();
1563             token = MODEQUAL;
1564             break;
1565         }
1566         token = MOD;
1567         break;
1568     case CharacterOr:
1569         shift();
1570         if (m_current == '=') {
1571             shift();
1572             token = OREQUAL;
1573             break;
1574         }
1575         if (m_current == '|') {
1576             shift();
1577             token = OR;
1578             break;
1579         }
1580         token = BITOR;
1581         break;
1582     case CharacterOpenParen:
1583         token = OPENPAREN;
1584         shift();
1585         break;
1586     case CharacterCloseParen:
1587         token = CLOSEPAREN;
1588         shift();
1589         break;
1590     case CharacterOpenBracket:
1591         token = OPENBRACKET;
1592         shift();
1593         break;
1594     case CharacterCloseBracket:
1595         token = CLOSEBRACKET;
1596         shift();
1597         break;
1598     case CharacterComma:
1599         token = COMMA;
1600         shift();
1601         break;
1602     case CharacterColon:
1603         token = COLON;
1604         shift();
1605         break;
1606     case CharacterQuestion:
1607         token = QUESTION;
1608         shift();
1609         break;
1610     case CharacterTilde:
1611         token = TILDE;
1612         shift();
1613         break;
1614     case CharacterSemicolon:
1615         shift();
1616         token = SEMICOLON;
1617         break;
1618     case CharacterOpenBrace:
1619         tokenData->line = lineNumber();
1620         tokenData->offset = currentOffset();
1621         tokenData->lineStartOffset = currentLineStartOffset();
1622         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1623         shift();
1624         token = OPENBRACE;
1625         break;
1626     case CharacterCloseBrace:
1627         tokenData->line = lineNumber();
1628         tokenData->offset = currentOffset();
1629         tokenData->lineStartOffset = currentLineStartOffset();
1630         ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1631         shift();
1632         token = CLOSEBRACE;
1633         break;
1634     case CharacterDot:
1635         shift();
1636         if (!isASCIIDigit(m_current)) {
1637             if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
1638                 shift();
1639                 shift();
1640                 token = DOTDOTDOT;
1641                 break;
1642             }
1643             token = DOT;
1644             break;
1645         }
1646         goto inNumberAfterDecimalPoint;
1647     case CharacterZero:
1648         shift();
1649         if ((m_current | 0x20) == 'x') {
1650             if (!isASCIIHexDigit(peek(1))) {
1651                 m_lexErrorMessage = "No hexadecimal digits after '0x'";
1652                 token = INVALID_HEX_NUMBER_ERRORTOK;
1653                 goto returnError;
1654             }
1655             parseHex(tokenData->doubleValue);
1656             if (isIdentStart(m_current)) {
1657                 m_lexErrorMessage = "No space between hexadecimal literal and identifier";
1658                 token = INVALID_HEX_NUMBER_ERRORTOK;
1659                 goto returnError;
1660             }
1661             token = NUMBER;
1662             m_buffer8.resize(0);
1663             break;
1664         }
1665
1666         record8('0');
1667         if (isASCIIOctalDigit(m_current)) {
1668             if (parseOctal(tokenData->doubleValue)) {
1669                 if (strictMode) {
1670                     m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1671                     token = INVALID_OCTAL_NUMBER_ERRORTOK;
1672                     goto returnError;
1673                 }
1674                 token = NUMBER;
1675             }
1676         }
1677         FALLTHROUGH;
1678     case CharacterNumber:
1679         if (LIKELY(token != NUMBER)) {
1680             if (!parseDecimal(tokenData->doubleValue)) {
1681                 if (m_current == '.') {
1682                     shift();
1683 inNumberAfterDecimalPoint:
1684                     parseNumberAfterDecimalPoint();
1685                 }
1686                 if ((m_current | 0x20) == 'e') {
1687                     if (!parseNumberAfterExponentIndicator()) {
1688                         m_lexErrorMessage = "Non-number found after exponent indicator";
1689                         token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
1690                         goto returnError;
1691                     }
1692                 }
1693                 size_t parsedLength;
1694                 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
1695             }
1696             token = NUMBER;
1697         }
1698
1699         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1700         if (UNLIKELY(isIdentStart(m_current))) {
1701             m_lexErrorMessage = "At least one digit must occur after a decimal point";
1702             token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
1703             goto returnError;
1704         }
1705         m_buffer8.resize(0);
1706         break;
1707     case CharacterQuote:
1708         if (lexerFlags & LexerFlagsDontBuildStrings) {
1709             StringParseResult result = parseString<false>(tokenData, strictMode);
1710             if (UNLIKELY(result != StringParsedSuccessfully)) {
1711                 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
1712                 goto returnError;
1713             }
1714         } else {
1715             StringParseResult result = parseString<true>(tokenData, strictMode);
1716             if (UNLIKELY(result != StringParsedSuccessfully)) {
1717                 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
1718                 goto returnError;
1719             }
1720         }
1721         shift();
1722         token = STRING;
1723         break;
1724     case CharacterIdentifierStart:
1725         ASSERT(isIdentStart(m_current));
1726         FALLTHROUGH;
1727     case CharacterBackSlash:
1728         parseIdent:
1729         if (lexerFlags & LexexFlagsDontBuildKeywords)
1730             token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
1731         else
1732             token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
1733         break;
1734     case CharacterLineTerminator:
1735         ASSERT(isLineTerminator(m_current));
1736         shiftLineTerminator();
1737         m_atLineStart = true;
1738         m_terminator = true;
1739         m_lineStart = m_code;
1740         goto start;
1741     case CharacterPrivateIdentifierStart:
1742         if (m_parsingBuiltinFunction)
1743             goto parseIdent;
1744
1745         FALLTHROUGH;
1746     case CharacterInvalid:
1747         m_lexErrorMessage = invalidCharacterMessage();
1748         token = ERRORTOK;
1749         goto returnError;
1750     default:
1751         RELEASE_ASSERT_NOT_REACHED();
1752         m_lexErrorMessage = "Internal Error";
1753         token = ERRORTOK;
1754         goto returnError;
1755     }
1756
1757     m_atLineStart = false;
1758     goto returnToken;
1759
1760 inSingleLineComment:
1761     while (!isLineTerminator(m_current)) {
1762         if (atEnd())
1763             return EOFTOK;
1764         shift();
1765     }
1766     shiftLineTerminator();
1767     m_atLineStart = true;
1768     m_terminator = true;
1769     m_lineStart = m_code;
1770     if (!lastTokenWasRestrKeyword())
1771         goto start;
1772
1773     token = SEMICOLON;
1774     // Fall through into returnToken.
1775
1776 returnToken:
1777     tokenLocation->line = m_lineNumber;
1778     tokenLocation->endOffset = currentOffset();
1779     tokenLocation->lineStartOffset = currentLineStartOffset();
1780     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1781     tokenRecord->m_endPosition = currentPosition();
1782     m_lastToken = token;
1783     return token;
1784
1785 returnError:
1786     m_error = true;
1787     tokenLocation->line = m_lineNumber;
1788     tokenLocation->endOffset = currentOffset();
1789     tokenLocation->lineStartOffset = currentLineStartOffset();
1790     ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1791     tokenRecord->m_endPosition = currentPosition();
1792     RELEASE_ASSERT(token & ErrorTokenFlag);
1793     return token;
1794 }
1795
1796 template <typename T>
1797 static inline void orCharacter(UChar&, UChar);
1798
1799 template <>
1800 inline void orCharacter<LChar>(UChar&, UChar) { }
1801
1802 template <>
1803 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
1804 {
1805     orAccumulator |= character;
1806 }
1807
1808 template <typename T>
1809 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1810 {
1811     ASSERT(m_buffer16.isEmpty());
1812
1813     bool lastWasEscape = false;
1814     bool inBrackets = false;
1815     UChar charactersOredTogether = 0;
1816
1817     if (patternPrefix) {
1818         ASSERT(!isLineTerminator(patternPrefix));
1819         ASSERT(patternPrefix != '/');
1820         ASSERT(patternPrefix != '[');
1821         record16(patternPrefix);
1822     }
1823
1824     while (true) {
1825         if (isLineTerminator(m_current) || atEnd()) {
1826             m_buffer16.resize(0);
1827             return false;
1828         }
1829
1830         T prev = m_current;
1831
1832         shift();
1833
1834         if (prev == '/' && !lastWasEscape && !inBrackets)
1835             break;
1836
1837         record16(prev);
1838         orCharacter<T>(charactersOredTogether, prev);
1839
1840         if (lastWasEscape) {
1841             lastWasEscape = false;
1842             continue;
1843         }
1844
1845         switch (prev) {
1846         case '[':
1847             inBrackets = true;
1848             break;
1849         case ']':
1850             inBrackets = false;
1851             break;
1852         case '\\':
1853             lastWasEscape = true;
1854             break;
1855         }
1856     }
1857
1858     pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
1859
1860     m_buffer16.resize(0);
1861     charactersOredTogether = 0;
1862
1863     while (isIdentPart(m_current)) {
1864         record16(m_current);
1865         orCharacter<T>(charactersOredTogether, m_current);
1866         shift();
1867     }
1868
1869     flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
1870     m_buffer16.resize(0);
1871
1872     return true;
1873 }
1874
1875 template <typename T>
1876 bool Lexer<T>::skipRegExp()
1877 {
1878     bool lastWasEscape = false;
1879     bool inBrackets = false;
1880
1881     while (true) {
1882         if (isLineTerminator(m_current) || atEnd())
1883             return false;
1884
1885         T prev = m_current;
1886
1887         shift();
1888
1889         if (prev == '/' && !lastWasEscape && !inBrackets)
1890             break;
1891
1892         if (lastWasEscape) {
1893             lastWasEscape = false;
1894             continue;
1895         }
1896
1897         switch (prev) {
1898         case '[':
1899             inBrackets = true;
1900             break;
1901         case ']':
1902             inBrackets = false;
1903             break;
1904         case '\\':
1905             lastWasEscape = true;
1906             break;
1907         }
1908     }
1909
1910     while (isIdentPart(m_current))
1911         shift();
1912
1913     return true;
1914 }
1915
1916 template <typename T>
1917 void Lexer<T>::clear()
1918 {
1919     m_arena = 0;
1920
1921     Vector<LChar> newBuffer8;
1922     m_buffer8.swap(newBuffer8);
1923
1924     Vector<UChar> newBuffer16;
1925     m_buffer16.swap(newBuffer16);
1926
1927     m_isReparsing = false;
1928 }
1929
1930 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1931 template class Lexer<LChar>;
1932 template class Lexer<UChar>;
1933
1934 } // namespace JSC