parser/Lexer.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *
   6  *  This library is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU Library General Public
   8  *  License as published by the Free Software Foundation; either
   9  *  version 2 of the License, or (at your option) any later version.
  10  *
  11  *  This library is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  *  Library General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU Library General Public License
  17  *  along with this library; see the file COPYING.LIB.  If not, write to
  18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  *  Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #include "config.h"
  24 #include "Lexer.h"
  25
  26 #include "JSFunction.h"
  27 #include "JSGlobalObjectFunctions.h"
  28 #include "NodeInfo.h"
  29 #include "Nodes.h"
  30 #include "dtoa.h"
  31 #include <ctype.h>
  32 #include <limits.h>
  33 #include <string.h>
  34 #include <wtf/Assertions.h>
  35
  36 using namespace WTF;
  37 using namespace Unicode;
  38
  39 // We can't specify the namespace in yacc's C output, so do it here instead.
  40 using namespace JSC;
  41
  42 #include "Grammar.h"
  43 #include "Lookup.h"
  44 #include "Lexer.lut.h"
  45
  46 namespace JSC {
  47
  48 static const UChar byteOrderMark = 0xFEFF;
  49
  50 Lexer::Lexer(JSGlobalData* globalData)
  51     : m_isReparsing(false)
  52     , m_globalData(globalData)
  53     , m_keywordTable(JSC::mainTable)
  54 {
  55     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
  56     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
  57 }
  58
  59 Lexer::~Lexer()
  60 {
  61     m_keywordTable.deleteTable();
  62 }
  63
  64 inline const UChar* Lexer::currentCharacter() const
  65 {
  66     return m_code - 4;
  67 }
  68
  69 inline int Lexer::currentOffset() const
  70 {
  71     return currentCharacter() - m_codeStart;
  72 }
  73
  74 ALWAYS_INLINE void Lexer::shift1()
  75 {
  76     m_current = m_next1;
  77     m_next1 = m_next2;
  78     m_next2 = m_next3;
  79     if (LIKELY(m_code < m_codeEnd))
  80         m_next3 = m_code[0];
  81     else
  82         m_next3 = -1;
  83
  84     ++m_code;
  85 }
  86
  87 ALWAYS_INLINE void Lexer::shift2()
  88 {
  89     m_current = m_next2;
  90     m_next1 = m_next3;
  91     if (LIKELY(m_code + 1 < m_codeEnd)) {
  92         m_next2 = m_code[0];
  93         m_next3 = m_code[1];
  94     } else {
  95         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
  96         m_next3 = -1;
  97     }
  98
  99     m_code += 2;
 100 }
 101
 102 ALWAYS_INLINE void Lexer::shift3()
 103 {
 104     m_current = m_next3;
 105     if (LIKELY(m_code + 2 < m_codeEnd)) {
 106         m_next1 = m_code[0];
 107         m_next2 = m_code[1];
 108         m_next3 = m_code[2];
 109     } else {
 110         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
 111         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
 112         m_next3 = -1;
 113     }
 114
 115     m_code += 3;
 116 }
 117
 118 ALWAYS_INLINE void Lexer::shift4()
 119 {
 120     if (LIKELY(m_code + 3 < m_codeEnd)) {
 121         m_current = m_code[0];
 122         m_next1 = m_code[1];
 123         m_next2 = m_code[2];
 124         m_next3 = m_code[3];
 125     } else {
 126         m_current = m_code < m_codeEnd ? m_code[0] : -1;
 127         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
 128         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
 129         m_next3 = -1;
 130     }
 131
 132     m_code += 4;
 133 }
 134
 135 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
 136 {
 137     m_arena = &arena.identifierArena();
 138
 139     m_lineNumber = source.firstLine();
 140     m_delimited = false;
 141     m_lastToken = -1;
 142
 143     const UChar* data = source.provider()->data();
 144
 145     m_source = &source;
 146     m_codeStart = data;
 147     m_code = data + source.startOffset();
 148     m_codeEnd = data + source.endOffset();
 149     m_error = false;
 150     m_atLineStart = true;
 151
 152     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
 153     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
 154     if (source.provider()->hasBOMs()) {
 155         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
 156             if (UNLIKELY(*p == byteOrderMark)) {
 157                 copyCodeWithoutBOMs();
 158                 break;
 159             }
 160         }
 161     }
 162
 163     // Read the first characters into the 4-character buffer.
 164     shift4();
 165     ASSERT(currentOffset() == source.startOffset());
 166 }
 167
 168 void Lexer::copyCodeWithoutBOMs()
 169 {
 170     // Note: In this case, the character offset data for debugging will be incorrect.
 171     // If it's important to correctly debug code with extraneous BOMs, then the caller
 172     // should strip the BOMs when creating the SourceProvider object and do its own
 173     // mapping of offsets within the stripped text to original text offset.
 174
 175     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
 176     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
 177         UChar c = *p;
 178         if (c != byteOrderMark)
 179             m_codeWithoutBOMs.append(c);
 180     }
 181     ptrdiff_t startDelta = m_codeStart - m_code;
 182     m_code = m_codeWithoutBOMs.data();
 183     m_codeStart = m_code + startDelta;
 184     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
 185 }
 186
 187 void Lexer::shiftLineTerminator()
 188 {
 189     ASSERT(isLineTerminator(m_current));
 190
 191     // Allow both CRLF and LFCR.
 192     if (m_current + m_next1 == '\n' + '\r')
 193         shift2();
 194     else
 195         shift1();
 196
 197     ++m_lineNumber;
 198 }
 199
 200 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
 201 {
 202     return &m_arena->makeIdentifier(m_globalData, characters, length);
 203 }
 204
 205 inline bool Lexer::lastTokenWasRestrKeyword() const
 206 {
 207     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 208 }
 209
 210 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
 211 {
 212     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
 213 }
 214
 215 static inline bool isIdentStart(int c)
 216 {
 217     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
 218 }
 219
 220 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
 221 {
 222     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
 223         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
 224 }
 225
 226 static inline bool isIdentPart(int c)
 227 {
 228     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
 229 }
 230
 231 static inline int singleEscape(int c)
 232 {
 233     switch (c) {
 234         case 'b':
 235             return 0x08;
 236         case 't':
 237             return 0x09;
 238         case 'n':
 239             return 0x0A;
 240         case 'v':
 241             return 0x0B;
 242         case 'f':
 243             return 0x0C;
 244         case 'r':
 245             return 0x0D;
 246         default:
 247             return c;
 248     }
 249 }
 250
 251 inline void Lexer::record8(int c)
 252 {
 253     ASSERT(c >= 0);
 254     ASSERT(c <= 0xFF);
 255     m_buffer8.append(static_cast<char>(c));
 256 }
 257
 258 inline void Lexer::record16(UChar c)
 259 {
 260     m_buffer16.append(c);
 261 }
 262
 263 inline void Lexer::record16(int c)
 264 {
 265     ASSERT(c >= 0);
 266     ASSERT(c <= USHRT_MAX);
 267     record16(UChar(static_cast<unsigned short>(c)));
 268 }
 269
 270 int Lexer::lex(void* p1, void* p2)
 271 {
 272     ASSERT(!m_error);
 273     ASSERT(m_buffer8.isEmpty());
 274     ASSERT(m_buffer16.isEmpty());
 275
 276     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
 277     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
 278     int token = 0;
 279     m_terminator = false;
 280
 281 start:
 282     while (isWhiteSpace(m_current))
 283         shift1();
 284
 285     int startOffset = currentOffset();
 286
 287     if (m_current == -1) {
 288         if (!m_terminator && !m_delimited && !m_isReparsing) {
 289             // automatic semicolon insertion if program incomplete
 290             token = ';';
 291             goto doneSemicolon;
 292         }
 293         return 0;
 294     }
 295
 296     m_delimited = false;
 297     switch (m_current) {
 298         case '>':
 299             if (m_next1 == '>' && m_next2 == '>') {
 300                 if (m_next3 == '=') {
 301                     shift4();
 302                     token = URSHIFTEQUAL;
 303                     break;
 304                 }
 305                 shift3();
 306                 token = URSHIFT;
 307                 break;
 308             }
 309             if (m_next1 == '>') {
 310                 if (m_next2 == '=') {
 311                     shift3();
 312                     token = RSHIFTEQUAL;
 313                     break;
 314                 }
 315                 shift2();
 316                 token = RSHIFT;
 317                 break;
 318             }
 319             if (m_next1 == '=') {
 320                 shift2();
 321                 token = GE;
 322                 break;
 323             }
 324             shift1();
 325             token = '>';
 326             break;
 327         case '=':
 328             if (m_next1 == '=') {
 329                 if (m_next2 == '=') {
 330                     shift3();
 331                     token = STREQ;
 332                     break;
 333                 }
 334                 shift2();
 335                 token = EQEQ;
 336                 break;
 337             }
 338             shift1();
 339             token = '=';
 340             break;
 341         case '!':
 342             if (m_next1 == '=') {
 343                 if (m_next2 == '=') {
 344                     shift3();
 345                     token = STRNEQ;
 346                     break;
 347                 }
 348                 shift2();
 349                 token = NE;
 350                 break;
 351             }
 352             shift1();
 353             token = '!';
 354             break;
 355         case '<':
 356             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
 357                 // <!-- marks the beginning of a line comment (for www usage)
 358                 shift4();
 359                 goto inSingleLineComment;
 360             }
 361             if (m_next1 == '<') {
 362                 if (m_next2 == '=') {
 363                     shift3();
 364                     token = LSHIFTEQUAL;
 365                     break;
 366                 }
 367                 shift2();
 368                 token = LSHIFT;
 369                 break;
 370             }
 371             if (m_next1 == '=') {
 372                 shift2();
 373                 token = LE;
 374                 break;
 375             }
 376             shift1();
 377             token = '<';
 378             break;
 379         case '+':
 380             if (m_next1 == '+') {
 381                 shift2();
 382                 if (m_terminator) {
 383                     token = AUTOPLUSPLUS;
 384                     break;
 385                 }
 386                 token = PLUSPLUS;
 387                 break;
 388             }
 389             if (m_next1 == '=') {
 390                 shift2();
 391                 token = PLUSEQUAL;
 392                 break;
 393             }
 394             shift1();
 395             token = '+';
 396             break;
 397         case '-':
 398             if (m_next1 == '-') {
 399                 if (m_atLineStart && m_next2 == '>') {
 400                     shift3();
 401                     goto inSingleLineComment;
 402                 }
 403                 shift2();
 404                 if (m_terminator) {
 405                     token = AUTOMINUSMINUS;
 406                     break;
 407                 }
 408                 token = MINUSMINUS;
 409                 break;
 410             }
 411             if (m_next1 == '=') {
 412                 shift2();
 413                 token = MINUSEQUAL;
 414                 break;
 415             }
 416             shift1();
 417             token = '-';
 418             break;
 419         case '*':
 420             if (m_next1 == '=') {
 421                 shift2();
 422                 token = MULTEQUAL;
 423                 break;
 424             }
 425             shift1();
 426             token = '*';
 427             break;
 428         case '/':
 429             if (m_next1 == '/') {
 430                 shift2();
 431                 goto inSingleLineComment;
 432             }
 433             if (m_next1 == '*')
 434                 goto inMultiLineComment;
 435             if (m_next1 == '=') {
 436                 shift2();
 437                 token = DIVEQUAL;
 438                 break;
 439             }
 440             shift1();
 441             token = '/';
 442             break;
 443         case '&':
 444             if (m_next1 == '&') {
 445                 shift2();
 446                 token = AND;
 447                 break;
 448             }
 449             if (m_next1 == '=') {
 450                 shift2();
 451                 token = ANDEQUAL;
 452                 break;
 453             }
 454             shift1();
 455             token = '&';
 456             break;
 457         case '^':
 458             if (m_next1 == '=') {
 459                 shift2();
 460                 token = XOREQUAL;
 461                 break;
 462             }
 463             shift1();
 464             token = '^';
 465             break;
 466         case '%':
 467             if (m_next1 == '=') {
 468                 shift2();
 469                 token = MODEQUAL;
 470                 break;
 471             }
 472             shift1();
 473             token = '%';
 474             break;
 475         case '|':
 476             if (m_next1 == '=') {
 477                 shift2();
 478                 token = OREQUAL;
 479                 break;
 480             }
 481             if (m_next1 == '|') {
 482                 shift2();
 483                 token = OR;
 484                 break;
 485             }
 486             shift1();
 487             token = '|';
 488             break;
 489         case '.':
 490             if (isASCIIDigit(m_next1)) {
 491                 record8('.');
 492                 shift1();
 493                 goto inNumberAfterDecimalPoint;
 494             }
 495             token = '.';
 496             shift1();
 497             break;
 498         case ',':
 499         case '~':
 500         case '?':
 501         case ':':
 502         case '(':
 503         case ')':
 504         case '[':
 505         case ']':
 506             token = m_current;
 507             shift1();
 508             break;
 509         case ';':
 510             shift1();
 511             m_delimited = true;
 512             token = ';';
 513             break;
 514         case '{':
 515             lvalp->intValue = currentOffset();
 516             shift1();
 517             token = OPENBRACE;
 518             break;
 519         case '}':
 520             lvalp->intValue = currentOffset();
 521             shift1();
 522             m_delimited = true;
 523             token = CLOSEBRACE;
 524             break;
 525         case '\\':
 526             goto startIdentifierWithBackslash;
 527         case '0':
 528             goto startNumberWithZeroDigit;
 529         case '1':
 530         case '2':
 531         case '3':
 532         case '4':
 533         case '5':
 534         case '6':
 535         case '7':
 536         case '8':
 537         case '9':
 538             goto startNumber;
 539         case '"':
 540         case '\'':
 541             goto startString;
 542         default:
 543             if (isIdentStart(m_current))
 544                 goto startIdentifierOrKeyword;
 545             if (isLineTerminator(m_current)) {
 546                 shiftLineTerminator();
 547                 m_atLineStart = true;
 548                 m_terminator = true;
 549                 if (lastTokenWasRestrKeyword()) {
 550                     token = ';';
 551                     goto doneSemicolon;
 552                 }
 553                 goto start;
 554             }
 555             goto returnError;
 556     }
 557
 558     m_atLineStart = false;
 559     goto returnToken;
 560
 561 startString: {
 562     int stringQuoteCharacter = m_current;
 563     shift1();
 564
 565     const UChar* stringStart = currentCharacter();
 566     while (m_current != stringQuoteCharacter) {
 567         // Fast check for characters that require special handling.
 568         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
 569         // as possible, and lets through all common ASCII characters.
 570         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
 571             m_buffer16.append(stringStart, currentCharacter() - stringStart);
 572             goto inString;
 573         }
 574         shift1();
 575     }
 576     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
 577     shift1();
 578     m_atLineStart = false;
 579     m_delimited = false;
 580     token = STRING;
 581     goto returnToken;
 582
 583 inString:
 584     while (m_current != stringQuoteCharacter) {
 585         if (m_current == '\\')
 586             goto inStringEscapeSequence;
 587         if (UNLIKELY(isLineTerminator(m_current)))
 588             goto returnError;
 589         if (UNLIKELY(m_current == -1))
 590             goto returnError;
 591         record16(m_current);
 592         shift1();
 593     }
 594     goto doneString;
 595
 596 inStringEscapeSequence:
 597     shift1();
 598     if (m_current == 'x') {
 599         shift1();
 600         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
 601             record16(convertHex(m_current, m_next1));
 602             shift2();
 603             goto inString;
 604         }
 605         record16('x');
 606         if (m_current == stringQuoteCharacter)
 607             goto doneString;
 608         goto inString;
 609     }
 610     if (m_current == 'u') {
 611         shift1();
 612         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
 613             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
 614             shift4();
 615             goto inString;
 616         }
 617         if (m_current == stringQuoteCharacter) {
 618             record16('u');
 619             goto doneString;
 620         }
 621         goto returnError;
 622     }
 623     if (isASCIIOctalDigit(m_current)) {
 624         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
 625             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
 626             shift3();
 627             goto inString;
 628         }
 629         if (isASCIIOctalDigit(m_next1)) {
 630             record16((m_current - '0') * 8 + m_next1 - '0');
 631             shift2();
 632             goto inString;
 633         }
 634         record16(m_current - '0');
 635         shift1();
 636         goto inString;
 637     }
 638     if (isLineTerminator(m_current)) {
 639         shiftLineTerminator();
 640         goto inString;
 641     }
 642     if (m_current == -1)
 643         goto returnError;
 644     record16(singleEscape(m_current));
 645     shift1();
 646     goto inString;
 647 }
 648
 649 startIdentifierWithBackslash:
 650     shift1();
 651     if (UNLIKELY(m_current != 'u'))
 652         goto returnError;
 653     shift1();
 654     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
 655         goto returnError;
 656     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 657     if (UNLIKELY(!isIdentStart(token)))
 658         goto returnError;
 659     goto inIdentifierAfterCharacterCheck;
 660
 661 startIdentifierOrKeyword: {
 662     const UChar* identifierStart = currentCharacter();
 663     shift1();
 664     while (isIdentPart(m_current))
 665         shift1();
 666     if (LIKELY(m_current != '\\')) {
 667         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
 668         goto doneIdentifierOrKeyword;
 669     }
 670     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
 671 }
 672
 673     do {
 674         shift1();
 675         if (UNLIKELY(m_current != 'u'))
 676             goto returnError;
 677         shift1();
 678         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
 679             goto returnError;
 680         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 681         if (UNLIKELY(!isIdentPart(token)))
 682             goto returnError;
 683 inIdentifierAfterCharacterCheck:
 684         record16(token);
 685         shift4();
 686
 687         while (isIdentPart(m_current)) {
 688             record16(m_current);
 689             shift1();
 690         }
 691     } while (UNLIKELY(m_current == '\\'));
 692     goto doneIdentifier;
 693
 694 inSingleLineComment:
 695     while (!isLineTerminator(m_current)) {
 696         if (UNLIKELY(m_current == -1))
 697             return 0;
 698         shift1();
 699     }
 700     shiftLineTerminator();
 701     m_atLineStart = true;
 702     m_terminator = true;
 703     if (lastTokenWasRestrKeyword())
 704         goto doneSemicolon;
 705     goto start;
 706
 707 inMultiLineComment:
 708     shift2();
 709     while (m_current != '*' || m_next1 != '/') {
 710         if (isLineTerminator(m_current))
 711             shiftLineTerminator();
 712         else {
 713             shift1();
 714             if (UNLIKELY(m_current == -1))
 715                 goto returnError;
 716         }
 717     }
 718     shift2();
 719     m_atLineStart = false;
 720     goto start;
 721
 722 startNumberWithZeroDigit:
 723     shift1();
 724     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
 725         shift1();
 726         goto inHex;
 727     }
 728     if (m_current == '.') {
 729         record8('0');
 730         record8('.');
 731         shift1();
 732         goto inNumberAfterDecimalPoint;
 733     }
 734     if ((m_current | 0x20) == 'e') {
 735         record8('0');
 736         record8('e');
 737         shift1();
 738         goto inExponentIndicator;
 739     }
 740     if (isASCIIOctalDigit(m_current))
 741         goto inOctal;
 742     if (isASCIIDigit(m_current))
 743         goto startNumber;
 744     lvalp->doubleValue = 0;
 745     goto doneNumeric;
 746
 747 inNumberAfterDecimalPoint:
 748     while (isASCIIDigit(m_current)) {
 749         record8(m_current);
 750         shift1();
 751     }
 752     if ((m_current | 0x20) == 'e') {
 753         record8('e');
 754         shift1();
 755         goto inExponentIndicator;
 756     }
 757     goto doneNumber;
 758
 759 inExponentIndicator:
 760     if (m_current == '+' || m_current == '-') {
 761         record8(m_current);
 762         shift1();
 763     }
 764     if (!isASCIIDigit(m_current))
 765         goto returnError;
 766     do {
 767         record8(m_current);
 768         shift1();
 769     } while (isASCIIDigit(m_current));
 770     goto doneNumber;
 771
 772 inOctal: {
 773     do {
 774         record8(m_current);
 775         shift1();
 776     } while (isASCIIOctalDigit(m_current));
 777     if (isASCIIDigit(m_current))
 778         goto startNumber;
 779
 780     double dval = 0;
 781
 782     const char* end = m_buffer8.end();
 783     for (const char* p = m_buffer8.data(); p < end; ++p) {
 784         dval *= 8;
 785         dval += *p - '0';
 786     }
 787     if (dval >= mantissaOverflowLowerBound)
 788         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
 789
 790     m_buffer8.resize(0);
 791
 792     lvalp->doubleValue = dval;
 793     goto doneNumeric;
 794 }
 795
 796 inHex: {
 797     do {
 798         record8(m_current);
 799         shift1();
 800     } while (isASCIIHexDigit(m_current));
 801
 802     double dval = 0;
 803
 804     const char* end = m_buffer8.end();
 805     for (const char* p = m_buffer8.data(); p < end; ++p) {
 806         dval *= 16;
 807         dval += toASCIIHexValue(*p);
 808     }
 809     if (dval >= mantissaOverflowLowerBound)
 810         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
 811
 812     m_buffer8.resize(0);
 813
 814     lvalp->doubleValue = dval;
 815     goto doneNumeric;
 816 }
 817
 818 startNumber:
 819     record8(m_current);
 820     shift1();
 821     while (isASCIIDigit(m_current)) {
 822         record8(m_current);
 823         shift1();
 824     }
 825     if (m_current == '.') {
 826         record8('.');
 827         shift1();
 828         goto inNumberAfterDecimalPoint;
 829     }
 830     if ((m_current | 0x20) == 'e') {
 831         record8('e');
 832         shift1();
 833         goto inExponentIndicator;
 834     }
 835
 836     // Fall through into doneNumber.
 837
 838 doneNumber:
 839     // Null-terminate string for strtod.
 840     m_buffer8.append('\0');
 841     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
 842     m_buffer8.resize(0);
 843
 844     // Fall through into doneNumeric.
 845
 846 doneNumeric:
 847     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
 848     if (UNLIKELY(isIdentStart(m_current)))
 849         goto returnError;
 850
 851     m_atLineStart = false;
 852     m_delimited = false;
 853     token = NUMBER;
 854     goto returnToken;
 855
 856 doneSemicolon:
 857     token = ';';
 858     m_delimited = true;
 859     goto returnToken;
 860
 861 doneIdentifier:
 862     m_atLineStart = false;
 863     m_delimited = false;
 864     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 865     m_buffer16.resize(0);
 866     token = IDENT;
 867     goto returnToken;
 868
 869 doneIdentifierOrKeyword: {
 870     m_atLineStart = false;
 871     m_delimited = false;
 872     m_buffer16.resize(0);
 873     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
 874     token = entry ? entry->lexerValue() : IDENT;
 875     goto returnToken;
 876 }
 877
 878 doneString:
 879     // Atomize constant strings in case they're later used in property lookup.
 880     shift1();
 881     m_atLineStart = false;
 882     m_delimited = false;
 883     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 884     m_buffer16.resize(0);
 885     token = STRING;
 886
 887     // Fall through into returnToken.
 888
 889 returnToken: {
 890     int lineNumber = m_lineNumber;
 891     llocp->first_line = lineNumber;
 892     llocp->last_line = lineNumber;
 893     llocp->first_column = startOffset;
 894     llocp->last_column = currentOffset();
 895
 896     m_lastToken = token;
 897     return token;
 898 }
 899
 900 returnError:
 901     m_error = true;
 902     return -1;
 903 }
 904
 905 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
 906 {
 907     ASSERT(m_buffer16.isEmpty());
 908
 909     bool lastWasEscape = false;
 910     bool inBrackets = false;
 911
 912     if (patternPrefix) {
 913         ASSERT(!isLineTerminator(patternPrefix));
 914         ASSERT(patternPrefix != '/');
 915         ASSERT(patternPrefix != '[');
 916         record16(patternPrefix);
 917     }
 918
 919     while (true) {
 920         int current = m_current;
 921
 922         if (isLineTerminator(current) || current == -1) {
 923             m_buffer16.resize(0);
 924             return false;
 925         }
 926
 927         shift1();
 928
 929         if (current == '/' && !lastWasEscape && !inBrackets)
 930             break;
 931
 932         record16(current);
 933
 934         if (lastWasEscape) {
 935             lastWasEscape = false;
 936             continue;
 937         }
 938
 939         switch (current) {
 940         case '[':
 941             inBrackets = true;
 942             break;
 943         case ']':
 944             inBrackets = false;
 945             break;
 946         case '\\':
 947             lastWasEscape = true;
 948             break;
 949         }
 950     }
 951
 952     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 953     m_buffer16.resize(0);
 954
 955     while (isIdentPart(m_current)) {
 956         record16(m_current);
 957         shift1();
 958     }
 959
 960     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
 961     m_buffer16.resize(0);
 962
 963     return true;
 964 }
 965
 966 bool Lexer::skipRegExp()
 967 {
 968     bool lastWasEscape = false;
 969     bool inBrackets = false;
 970
 971     while (true) {
 972         int current = m_current;
 973
 974         if (isLineTerminator(current) || current == -1)
 975             return false;
 976
 977         shift1();
 978
 979         if (current == '/' && !lastWasEscape && !inBrackets)
 980             break;
 981
 982         if (lastWasEscape) {
 983             lastWasEscape = false;
 984             continue;
 985         }
 986
 987         switch (current) {
 988         case '[':
 989             inBrackets = true;
 990             break;
 991         case ']':
 992             inBrackets = false;
 993             break;
 994         case '\\':
 995             lastWasEscape = true;
 996             break;
 997         }
 998     }
 999
1000     while (isIdentPart(m_current))
1001         shift1();
1002
1003     return true;
1004 }
1005
1006 void Lexer::clear()
1007 {
1008     m_arena = 0;
1009     m_codeWithoutBOMs.clear();
1010
1011     Vector<char> newBuffer8;
1012     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
1013     m_buffer8.swap(newBuffer8);
1014
1015     Vector<UChar> newBuffer16;
1016     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
1017     m_buffer16.swap(newBuffer16);
1018
1019     m_isReparsing = false;
1020 }
1021
1022 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1023 {
1024     if (m_codeWithoutBOMs.isEmpty())
1025         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1026
1027     const UChar* data = m_source->provider()->data();
1028
1029     ASSERT(openBrace < closeBrace);
1030
1031     int numBOMsBeforeOpenBrace = 0;
1032     int numBOMsBetweenBraces = 0;
1033
1034     int i;
1035     for (i = m_source->startOffset(); i < openBrace; ++i)
1036         numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1037     for (; i < closeBrace; ++i)
1038         numBOMsBetweenBraces += data[i] == byteOrderMark;
1039
1040     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1041         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1042 }
1043
1044 } // namespace JSC