parser/Lexer.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *
   6  *  This library is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU Library General Public
   8  *  License as published by the Free Software Foundation; either
   9  *  version 2 of the License, or (at your option) any later version.
  10  *
  11  *  This library is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  *  Library General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU Library General Public License
  17  *  along with this library; see the file COPYING.LIB.  If not, write to
  18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  *  Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #include "config.h"
  24 #include "Lexer.h"
  25
  26 #include "JSFunction.h"
  27 #include "JSGlobalObjectFunctions.h"
  28 #include "NodeInfo.h"
  29 #include "Nodes.h"
  30 #include "dtoa.h"
  31 #include <ctype.h>
  32 #include <limits.h>
  33 #include <string.h>
  34 #include <wtf/Assertions.h>
  35
  36 using namespace WTF;
  37 using namespace Unicode;
  38
  39 // We can't specify the namespace in yacc's C output, so do it here instead.
  40 using namespace JSC;
  41
  42 #include "Grammar.h"
  43 #include "Lookup.h"
  44 #include "Lexer.lut.h"
  45
  46 namespace JSC {
  47
  48 static const UChar byteOrderMark = 0xFEFF;
  49
  50 Lexer::Lexer(JSGlobalData* globalData)
  51     : m_isReparsing(false)
  52     , m_globalData(globalData)
  53     , m_keywordTable(JSC::mainTable)
  54 {
  55 }
  56
  57 Lexer::~Lexer()
  58 {
  59     m_keywordTable.deleteTable();
  60 }
  61
  62 inline const UChar* Lexer::currentCharacter() const
  63 {
  64     return m_code - 4;
  65 }
  66
  67 inline int Lexer::currentOffset() const
  68 {
  69     return currentCharacter() - m_codeStart;
  70 }
  71
  72 ALWAYS_INLINE void Lexer::shift1()
  73 {
  74     m_current = m_next1;
  75     m_next1 = m_next2;
  76     m_next2 = m_next3;
  77     if (LIKELY(m_code < m_codeEnd))
  78         m_next3 = m_code[0];
  79     else
  80         m_next3 = -1;
  81
  82     ++m_code;
  83 }
  84
  85 ALWAYS_INLINE void Lexer::shift2()
  86 {
  87     m_current = m_next2;
  88     m_next1 = m_next3;
  89     if (LIKELY(m_code + 1 < m_codeEnd)) {
  90         m_next2 = m_code[0];
  91         m_next3 = m_code[1];
  92     } else {
  93         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
  94         m_next3 = -1;
  95     }
  96
  97     m_code += 2;
  98 }
  99
 100 ALWAYS_INLINE void Lexer::shift3()
 101 {
 102     m_current = m_next3;
 103     if (LIKELY(m_code + 2 < m_codeEnd)) {
 104         m_next1 = m_code[0];
 105         m_next2 = m_code[1];
 106         m_next3 = m_code[2];
 107     } else {
 108         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
 109         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
 110         m_next3 = -1;
 111     }
 112
 113     m_code += 3;
 114 }
 115
 116 ALWAYS_INLINE void Lexer::shift4()
 117 {
 118     if (LIKELY(m_code + 3 < m_codeEnd)) {
 119         m_current = m_code[0];
 120         m_next1 = m_code[1];
 121         m_next2 = m_code[2];
 122         m_next3 = m_code[3];
 123     } else {
 124         m_current = m_code < m_codeEnd ? m_code[0] : -1;
 125         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
 126         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
 127         m_next3 = -1;
 128     }
 129
 130     m_code += 4;
 131 }
 132
 133 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
 134 {
 135     m_arena = &arena.identifierArena();
 136
 137     m_lineNumber = source.firstLine();
 138     m_delimited = false;
 139     m_lastToken = -1;
 140
 141     const UChar* data = source.provider()->data();
 142
 143     m_source = &source;
 144     m_codeStart = data;
 145     m_code = data + source.startOffset();
 146     m_codeEnd = data + source.endOffset();
 147     m_error = false;
 148     m_atLineStart = true;
 149
 150     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
 151     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
 152
 153     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
 154     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
 155     if (source.provider()->hasBOMs()) {
 156         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
 157             if (UNLIKELY(*p == byteOrderMark)) {
 158                 copyCodeWithoutBOMs();
 159                 break;
 160             }
 161         }
 162     }
 163
 164     // Read the first characters into the 4-character buffer.
 165     shift4();
 166     ASSERT(currentOffset() == source.startOffset());
 167 }
 168
 169 void Lexer::copyCodeWithoutBOMs()
 170 {
 171     // Note: In this case, the character offset data for debugging will be incorrect.
 172     // If it's important to correctly debug code with extraneous BOMs, then the caller
 173     // should strip the BOMs when creating the SourceProvider object and do its own
 174     // mapping of offsets within the stripped text to original text offset.
 175
 176     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
 177     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
 178         UChar c = *p;
 179         if (c != byteOrderMark)
 180             m_codeWithoutBOMs.append(c);
 181     }
 182     ptrdiff_t startDelta = m_codeStart - m_code;
 183     m_code = m_codeWithoutBOMs.data();
 184     m_codeStart = m_code + startDelta;
 185     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
 186 }
 187
 188 void Lexer::shiftLineTerminator()
 189 {
 190     ASSERT(isLineTerminator(m_current));
 191
 192     // Allow both CRLF and LFCR.
 193     if (m_current + m_next1 == '\n' + '\r')
 194         shift2();
 195     else
 196         shift1();
 197
 198     ++m_lineNumber;
 199 }
 200
 201 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
 202 {
 203     return &m_arena->makeIdentifier(m_globalData, characters, length);
 204 }
 205
 206 inline bool Lexer::lastTokenWasRestrKeyword() const
 207 {
 208     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 209 }
 210
 211 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
 212 {
 213     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
 214 }
 215
 216 static inline bool isIdentStart(int c)
 217 {
 218     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
 219 }
 220
 221 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
 222 {
 223     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
 224         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
 225 }
 226
 227 static inline bool isIdentPart(int c)
 228 {
 229     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
 230 }
 231
 232 static inline int singleEscape(int c)
 233 {
 234     switch (c) {
 235         case 'b':
 236             return 0x08;
 237         case 't':
 238             return 0x09;
 239         case 'n':
 240             return 0x0A;
 241         case 'v':
 242             return 0x0B;
 243         case 'f':
 244             return 0x0C;
 245         case 'r':
 246             return 0x0D;
 247         default:
 248             return c;
 249     }
 250 }
 251
 252 inline void Lexer::record8(int c)
 253 {
 254     ASSERT(c >= 0);
 255     ASSERT(c <= 0xFF);
 256     m_buffer8.append(static_cast<char>(c));
 257 }
 258
 259 inline void Lexer::record16(UChar c)
 260 {
 261     m_buffer16.append(c);
 262 }
 263
 264 inline void Lexer::record16(int c)
 265 {
 266     ASSERT(c >= 0);
 267     ASSERT(c <= USHRT_MAX);
 268     record16(UChar(static_cast<unsigned short>(c)));
 269 }
 270
 271 int Lexer::lex(void* p1, void* p2)
 272 {
 273     ASSERT(!m_error);
 274     ASSERT(m_buffer8.isEmpty());
 275     ASSERT(m_buffer16.isEmpty());
 276
 277     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
 278     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
 279     int token = 0;
 280     m_terminator = false;
 281
 282 start:
 283     while (isWhiteSpace(m_current))
 284         shift1();
 285
 286     int startOffset = currentOffset();
 287
 288     if (m_current == -1) {
 289         if (!m_terminator && !m_delimited && !m_isReparsing) {
 290             // automatic semicolon insertion if program incomplete
 291             token = ';';
 292             goto doneSemicolon;
 293         }
 294         return 0;
 295     }
 296
 297     m_delimited = false;
 298     switch (m_current) {
 299         case '>':
 300             if (m_next1 == '>' && m_next2 == '>') {
 301                 if (m_next3 == '=') {
 302                     shift4();
 303                     token = URSHIFTEQUAL;
 304                     break;
 305                 }
 306                 shift3();
 307                 token = URSHIFT;
 308                 break;
 309             }
 310             if (m_next1 == '>') {
 311                 if (m_next2 == '=') {
 312                     shift3();
 313                     token = RSHIFTEQUAL;
 314                     break;
 315                 }
 316                 shift2();
 317                 token = RSHIFT;
 318                 break;
 319             }
 320             if (m_next1 == '=') {
 321                 shift2();
 322                 token = GE;
 323                 break;
 324             }
 325             shift1();
 326             token = '>';
 327             break;
 328         case '=':
 329             if (m_next1 == '=') {
 330                 if (m_next2 == '=') {
 331                     shift3();
 332                     token = STREQ;
 333                     break;
 334                 }
 335                 shift2();
 336                 token = EQEQ;
 337                 break;
 338             }
 339             shift1();
 340             token = '=';
 341             break;
 342         case '!':
 343             if (m_next1 == '=') {
 344                 if (m_next2 == '=') {
 345                     shift3();
 346                     token = STRNEQ;
 347                     break;
 348                 }
 349                 shift2();
 350                 token = NE;
 351                 break;
 352             }
 353             shift1();
 354             token = '!';
 355             break;
 356         case '<':
 357             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
 358                 // <!-- marks the beginning of a line comment (for www usage)
 359                 shift4();
 360                 goto inSingleLineComment;
 361             }
 362             if (m_next1 == '<') {
 363                 if (m_next2 == '=') {
 364                     shift3();
 365                     token = LSHIFTEQUAL;
 366                     break;
 367                 }
 368                 shift2();
 369                 token = LSHIFT;
 370                 break;
 371             }
 372             if (m_next1 == '=') {
 373                 shift2();
 374                 token = LE;
 375                 break;
 376             }
 377             shift1();
 378             token = '<';
 379             break;
 380         case '+':
 381             if (m_next1 == '+') {
 382                 shift2();
 383                 if (m_terminator) {
 384                     token = AUTOPLUSPLUS;
 385                     break;
 386                 }
 387                 token = PLUSPLUS;
 388                 break;
 389             }
 390             if (m_next1 == '=') {
 391                 shift2();
 392                 token = PLUSEQUAL;
 393                 break;
 394             }
 395             shift1();
 396             token = '+';
 397             break;
 398         case '-':
 399             if (m_next1 == '-') {
 400                 if (m_atLineStart && m_next2 == '>') {
 401                     shift3();
 402                     goto inSingleLineComment;
 403                 }
 404                 shift2();
 405                 if (m_terminator) {
 406                     token = AUTOMINUSMINUS;
 407                     break;
 408                 }
 409                 token = MINUSMINUS;
 410                 break;
 411             }
 412             if (m_next1 == '=') {
 413                 shift2();
 414                 token = MINUSEQUAL;
 415                 break;
 416             }
 417             shift1();
 418             token = '-';
 419             break;
 420         case '*':
 421             if (m_next1 == '=') {
 422                 shift2();
 423                 token = MULTEQUAL;
 424                 break;
 425             }
 426             shift1();
 427             token = '*';
 428             break;
 429         case '/':
 430             if (m_next1 == '/') {
 431                 shift2();
 432                 goto inSingleLineComment;
 433             }
 434             if (m_next1 == '*')
 435                 goto inMultiLineComment;
 436             if (m_next1 == '=') {
 437                 shift2();
 438                 token = DIVEQUAL;
 439                 break;
 440             }
 441             shift1();
 442             token = '/';
 443             break;
 444         case '&':
 445             if (m_next1 == '&') {
 446                 shift2();
 447                 token = AND;
 448                 break;
 449             }
 450             if (m_next1 == '=') {
 451                 shift2();
 452                 token = ANDEQUAL;
 453                 break;
 454             }
 455             shift1();
 456             token = '&';
 457             break;
 458         case '^':
 459             if (m_next1 == '=') {
 460                 shift2();
 461                 token = XOREQUAL;
 462                 break;
 463             }
 464             shift1();
 465             token = '^';
 466             break;
 467         case '%':
 468             if (m_next1 == '=') {
 469                 shift2();
 470                 token = MODEQUAL;
 471                 break;
 472             }
 473             shift1();
 474             token = '%';
 475             break;
 476         case '|':
 477             if (m_next1 == '=') {
 478                 shift2();
 479                 token = OREQUAL;
 480                 break;
 481             }
 482             if (m_next1 == '|') {
 483                 shift2();
 484                 token = OR;
 485                 break;
 486             }
 487             shift1();
 488             token = '|';
 489             break;
 490         case '.':
 491             if (isASCIIDigit(m_next1)) {
 492                 record8('.');
 493                 shift1();
 494                 goto inNumberAfterDecimalPoint;
 495             }
 496             token = '.';
 497             shift1();
 498             break;
 499         case ',':
 500         case '~':
 501         case '?':
 502         case ':':
 503         case '(':
 504         case ')':
 505         case '[':
 506         case ']':
 507             token = m_current;
 508             shift1();
 509             break;
 510         case ';':
 511             shift1();
 512             m_delimited = true;
 513             token = ';';
 514             break;
 515         case '{':
 516             lvalp->intValue = currentOffset();
 517             shift1();
 518             token = OPENBRACE;
 519             break;
 520         case '}':
 521             lvalp->intValue = currentOffset();
 522             shift1();
 523             m_delimited = true;
 524             token = CLOSEBRACE;
 525             break;
 526         case '\\':
 527             goto startIdentifierWithBackslash;
 528         case '0':
 529             goto startNumberWithZeroDigit;
 530         case '1':
 531         case '2':
 532         case '3':
 533         case '4':
 534         case '5':
 535         case '6':
 536         case '7':
 537         case '8':
 538         case '9':
 539             goto startNumber;
 540         case '"':
 541         case '\'':
 542             goto startString;
 543         default:
 544             if (isIdentStart(m_current))
 545                 goto startIdentifierOrKeyword;
 546             if (isLineTerminator(m_current)) {
 547                 shiftLineTerminator();
 548                 m_atLineStart = true;
 549                 m_terminator = true;
 550                 if (lastTokenWasRestrKeyword()) {
 551                     token = ';';
 552                     goto doneSemicolon;
 553                 }
 554                 goto start;
 555             }
 556             goto returnError;
 557     }
 558
 559     m_atLineStart = false;
 560     goto returnToken;
 561
 562 startString: {
 563     int stringQuoteCharacter = m_current;
 564     shift1();
 565
 566     const UChar* stringStart = currentCharacter();
 567     while (m_current != stringQuoteCharacter) {
 568         // Fast check for characters that require special handling.
 569         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
 570         // as possible, and lets through all common ASCII characters.
 571         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
 572             m_buffer16.append(stringStart, currentCharacter() - stringStart);
 573             goto inString;
 574         }
 575         shift1();
 576     }
 577     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
 578     shift1();
 579     m_atLineStart = false;
 580     m_delimited = false;
 581     token = STRING;
 582     goto returnToken;
 583
 584 inString:
 585     while (m_current != stringQuoteCharacter) {
 586         if (m_current == '\\')
 587             goto inStringEscapeSequence;
 588         if (UNLIKELY(isLineTerminator(m_current)))
 589             goto returnError;
 590         if (UNLIKELY(m_current == -1))
 591             goto returnError;
 592         record16(m_current);
 593         shift1();
 594     }
 595     goto doneString;
 596
 597 inStringEscapeSequence:
 598     shift1();
 599     if (m_current == 'x') {
 600         shift1();
 601         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
 602             record16(convertHex(m_current, m_next1));
 603             shift2();
 604             goto inString;
 605         }
 606         record16('x');
 607         if (m_current == stringQuoteCharacter)
 608             goto doneString;
 609         goto inString;
 610     }
 611     if (m_current == 'u') {
 612         shift1();
 613         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
 614             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
 615             shift4();
 616             goto inString;
 617         }
 618         if (m_current == stringQuoteCharacter) {
 619             record16('u');
 620             goto doneString;
 621         }
 622         goto returnError;
 623     }
 624     if (isASCIIOctalDigit(m_current)) {
 625         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
 626             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
 627             shift3();
 628             goto inString;
 629         }
 630         if (isASCIIOctalDigit(m_next1)) {
 631             record16((m_current - '0') * 8 + m_next1 - '0');
 632             shift2();
 633             goto inString;
 634         }
 635         record16(m_current - '0');
 636         shift1();
 637         goto inString;
 638     }
 639     if (isLineTerminator(m_current)) {
 640         shiftLineTerminator();
 641         goto inString;
 642     }
 643     if (m_current == -1)
 644         goto returnError;
 645     record16(singleEscape(m_current));
 646     shift1();
 647     goto inString;
 648 }
 649
 650 startIdentifierWithBackslash:
 651     shift1();
 652     if (UNLIKELY(m_current != 'u'))
 653         goto returnError;
 654     shift1();
 655     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
 656         goto returnError;
 657     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 658     if (UNLIKELY(!isIdentStart(token)))
 659         goto returnError;
 660     goto inIdentifierAfterCharacterCheck;
 661
 662 startIdentifierOrKeyword: {
 663     const UChar* identifierStart = currentCharacter();
 664     shift1();
 665     while (isIdentPart(m_current))
 666         shift1();
 667     if (LIKELY(m_current != '\\')) {
 668         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
 669         goto doneIdentifierOrKeyword;
 670     }
 671     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
 672 }
 673
 674     do {
 675         shift1();
 676         if (UNLIKELY(m_current != 'u'))
 677             goto returnError;
 678         shift1();
 679         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
 680             goto returnError;
 681         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 682         if (UNLIKELY(!isIdentPart(token)))
 683             goto returnError;
 684 inIdentifierAfterCharacterCheck:
 685         record16(token);
 686         shift4();
 687
 688         while (isIdentPart(m_current)) {
 689             record16(m_current);
 690             shift1();
 691         }
 692     } while (UNLIKELY(m_current == '\\'));
 693     goto doneIdentifier;
 694
 695 inSingleLineComment:
 696     while (!isLineTerminator(m_current)) {
 697         if (UNLIKELY(m_current == -1))
 698             return 0;
 699         shift1();
 700     }
 701     shiftLineTerminator();
 702     m_atLineStart = true;
 703     m_terminator = true;
 704     if (lastTokenWasRestrKeyword())
 705         goto doneSemicolon;
 706     goto start;
 707
 708 inMultiLineComment:
 709     shift2();
 710     while (m_current != '*' || m_next1 != '/') {
 711         if (isLineTerminator(m_current))
 712             shiftLineTerminator();
 713         else {
 714             shift1();
 715             if (UNLIKELY(m_current == -1))
 716                 goto returnError;
 717         }
 718     }
 719     shift2();
 720     m_atLineStart = false;
 721     goto start;
 722
 723 startNumberWithZeroDigit:
 724     shift1();
 725     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
 726         shift1();
 727         goto inHex;
 728     }
 729     if (m_current == '.') {
 730         record8('0');
 731         record8('.');
 732         shift1();
 733         goto inNumberAfterDecimalPoint;
 734     }
 735     if ((m_current | 0x20) == 'e') {
 736         record8('0');
 737         record8('e');
 738         shift1();
 739         goto inExponentIndicator;
 740     }
 741     if (isASCIIOctalDigit(m_current))
 742         goto inOctal;
 743     if (isASCIIDigit(m_current))
 744         goto startNumber;
 745     lvalp->doubleValue = 0;
 746     goto doneNumeric;
 747
 748 inNumberAfterDecimalPoint:
 749     while (isASCIIDigit(m_current)) {
 750         record8(m_current);
 751         shift1();
 752     }
 753     if ((m_current | 0x20) == 'e') {
 754         record8('e');
 755         shift1();
 756         goto inExponentIndicator;
 757     }
 758     goto doneNumber;
 759
 760 inExponentIndicator:
 761     if (m_current == '+' || m_current == '-') {
 762         record8(m_current);
 763         shift1();
 764     }
 765     if (!isASCIIDigit(m_current))
 766         goto returnError;
 767     do {
 768         record8(m_current);
 769         shift1();
 770     } while (isASCIIDigit(m_current));
 771     goto doneNumber;
 772
 773 inOctal: {
 774     do {
 775         record8(m_current);
 776         shift1();
 777     } while (isASCIIOctalDigit(m_current));
 778     if (isASCIIDigit(m_current))
 779         goto startNumber;
 780
 781     double dval = 0;
 782
 783     const char* end = m_buffer8.end();
 784     for (const char* p = m_buffer8.data(); p < end; ++p) {
 785         dval *= 8;
 786         dval += *p - '0';
 787     }
 788     if (dval >= mantissaOverflowLowerBound)
 789         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
 790
 791     m_buffer8.resize(0);
 792
 793     lvalp->doubleValue = dval;
 794     goto doneNumeric;
 795 }
 796
 797 inHex: {
 798     do {
 799         record8(m_current);
 800         shift1();
 801     } while (isASCIIHexDigit(m_current));
 802
 803     double dval = 0;
 804
 805     const char* end = m_buffer8.end();
 806     for (const char* p = m_buffer8.data(); p < end; ++p) {
 807         dval *= 16;
 808         dval += toASCIIHexValue(*p);
 809     }
 810     if (dval >= mantissaOverflowLowerBound)
 811         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
 812
 813     m_buffer8.resize(0);
 814
 815     lvalp->doubleValue = dval;
 816     goto doneNumeric;
 817 }
 818
 819 startNumber:
 820     record8(m_current);
 821     shift1();
 822     while (isASCIIDigit(m_current)) {
 823         record8(m_current);
 824         shift1();
 825     }
 826     if (m_current == '.') {
 827         record8('.');
 828         shift1();
 829         goto inNumberAfterDecimalPoint;
 830     }
 831     if ((m_current | 0x20) == 'e') {
 832         record8('e');
 833         shift1();
 834         goto inExponentIndicator;
 835     }
 836
 837     // Fall through into doneNumber.
 838
 839 doneNumber:
 840     // Null-terminate string for strtod.
 841     m_buffer8.append('\0');
 842     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
 843     m_buffer8.resize(0);
 844
 845     // Fall through into doneNumeric.
 846
 847 doneNumeric:
 848     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
 849     if (UNLIKELY(isIdentStart(m_current)))
 850         goto returnError;
 851
 852     m_atLineStart = false;
 853     m_delimited = false;
 854     token = NUMBER;
 855     goto returnToken;
 856
 857 doneSemicolon:
 858     token = ';';
 859     m_delimited = true;
 860     goto returnToken;
 861
 862 doneIdentifier:
 863     m_atLineStart = false;
 864     m_delimited = false;
 865     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 866     m_buffer16.resize(0);
 867     token = IDENT;
 868     goto returnToken;
 869
 870 doneIdentifierOrKeyword: {
 871     m_atLineStart = false;
 872     m_delimited = false;
 873     m_buffer16.resize(0);
 874     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
 875     token = entry ? entry->lexerValue() : IDENT;
 876     goto returnToken;
 877 }
 878
 879 doneString:
 880     // Atomize constant strings in case they're later used in property lookup.
 881     shift1();
 882     m_atLineStart = false;
 883     m_delimited = false;
 884     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 885     m_buffer16.resize(0);
 886     token = STRING;
 887
 888     // Fall through into returnToken.
 889
 890 returnToken: {
 891     int lineNumber = m_lineNumber;
 892     llocp->first_line = lineNumber;
 893     llocp->last_line = lineNumber;
 894     llocp->first_column = startOffset;
 895     llocp->last_column = currentOffset();
 896
 897     m_lastToken = token;
 898     return token;
 899 }
 900
 901 returnError:
 902     m_error = true;
 903     return -1;
 904 }
 905
 906 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
 907 {
 908     ASSERT(m_buffer16.isEmpty());
 909
 910     bool lastWasEscape = false;
 911     bool inBrackets = false;
 912
 913     if (patternPrefix) {
 914         ASSERT(!isLineTerminator(patternPrefix));
 915         ASSERT(patternPrefix != '/');
 916         ASSERT(patternPrefix != '[');
 917         record16(patternPrefix);
 918     }
 919
 920     while (true) {
 921         int current = m_current;
 922
 923         if (isLineTerminator(current) || current == -1) {
 924             m_buffer16.resize(0);
 925             return false;
 926         }
 927
 928         shift1();
 929
 930         if (current == '/' && !lastWasEscape && !inBrackets)
 931             break;
 932
 933         record16(current);
 934
 935         if (lastWasEscape) {
 936             lastWasEscape = false;
 937             continue;
 938         }
 939
 940         switch (current) {
 941         case '[':
 942             inBrackets = true;
 943             break;
 944         case ']':
 945             inBrackets = false;
 946             break;
 947         case '\\':
 948             lastWasEscape = true;
 949             break;
 950         }
 951     }
 952
 953     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 954     m_buffer16.resize(0);
 955
 956     while (isIdentPart(m_current)) {
 957         record16(m_current);
 958         shift1();
 959     }
 960
 961     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 962     m_buffer16.resize(0);
 963
 964     return true;
 965 }
 966
 967 bool Lexer::skipRegExp()
 968 {
 969     bool lastWasEscape = false;
 970     bool inBrackets = false;
 971
 972     while (true) {
 973         int current = m_current;
 974
 975         if (isLineTerminator(current) || current == -1)
 976             return false;
 977
 978         shift1();
 979
 980         if (current == '/' && !lastWasEscape && !inBrackets)
 981             break;
 982
 983         if (lastWasEscape) {
 984             lastWasEscape = false;
 985             continue;
 986         }
 987
 988         switch (current) {
 989         case '[':
 990             inBrackets = true;
 991             break;
 992         case ']':
 993             inBrackets = false;
 994             break;
 995         case '\\':
 996             lastWasEscape = true;
 997             break;
 998         }
 999     }
1000
1001     while (isIdentPart(m_current))
1002         shift1();
1003
1004     return true;
1005 }
1006
1007 void Lexer::clear()
1008 {
1009     m_arena = 0;
1010     m_codeWithoutBOMs.clear();
1011
1012     Vector<char> newBuffer8;
1013     m_buffer8.swap(newBuffer8);
1014
1015     Vector<UChar> newBuffer16;
1016     m_buffer16.swap(newBuffer16);
1017
1018     m_isReparsing = false;
1019 }
1020
1021 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1022 {
1023     if (m_codeWithoutBOMs.isEmpty())
1024         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1025
1026     const UChar* data = m_source->provider()->data();
1027
1028     ASSERT(openBrace < closeBrace);
1029     int i;
1030     for (i = m_source->startOffset(); i < openBrace; ++i) {
1031         if (data[i] == byteOrderMark) {
1032             openBrace++;
1033             closeBrace++;
1034         }
1035     }
1036     for (; i < closeBrace; ++i) {
1037         if (data[i] == byteOrderMark)
1038             closeBrace++;
1039     }
1040
1041     ASSERT(openBrace < closeBrace);
1042
1043     return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1044 }
1045
1046 } // namespace JSC