parser/Lexer.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *
   6  *  This library is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU Library General Public
   8  *  License as published by the Free Software Foundation; either
   9  *  version 2 of the License, or (at your option) any later version.
  10  *
  11  *  This library is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  *  Library General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU Library General Public License
  17  *  along with this library; see the file COPYING.LIB.  If not, write to
  18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  *  Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #include "config.h"
  24 #include "Lexer.h"
  25
  26 #include "JSFunction.h"
  27 #include "JSGlobalObjectFunctions.h"
  28 #include "NodeInfo.h"
  29 #include "Nodes.h"
  30 #include "dtoa.h"
  31 #include <ctype.h>
  32 #include <limits.h>
  33 #include <string.h>
  34 #include <wtf/ASCIICType.h>
  35 #include <wtf/Assertions.h>
  36 #include <wtf/unicode/Unicode.h>
  37
  38 using namespace WTF;
  39 using namespace Unicode;
  40
  41 // we can't specify the namespace in yacc's C output, so do it here
  42 using namespace JSC;
  43
  44 #ifndef KDE_USE_FINAL
  45 #include "Grammar.h"
  46 #endif
  47
  48 #include "Lookup.h"
  49 #include "Lexer.lut.h"
  50
  51 // a bridge for yacc from the C world to C++
  52 int jscyylex(void* lvalp, void* llocp, void* globalData)
  53 {
  54     return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
  55 }
  56
  57 namespace JSC {
  58
  59 static bool isDecimalDigit(int);
  60
  61 Lexer::Lexer(JSGlobalData* globalData)
  62     : yylineno(1)
  63     , m_restrKeyword(false)
  64     , m_eatNextIdentifier(false)
  65     , m_stackToken(-1)
  66     , m_lastToken(-1)
  67     , m_position(0)
  68     , m_code(0)
  69     , m_length(0)
  70     , m_isReparsing(false)
  71     , m_atLineStart(true)
  72     , m_current(0)
  73     , m_next1(0)
  74     , m_next2(0)
  75     , m_next3(0)
  76     , m_currentOffset(0)
  77     , m_nextOffset1(0)
  78     , m_nextOffset2(0)
  79     , m_nextOffset3(0)
  80     , m_globalData(globalData)
  81     , m_mainTable(JSC::mainTable)
  82 {
  83     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
  84     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
  85 }
  86
  87 Lexer::~Lexer()
  88 {
  89     m_mainTable.deleteTable();
  90 }
  91
  92 void Lexer::setCode(const SourceCode& source)
  93 {
  94     yylineno = source.firstLine();
  95     m_restrKeyword = false;
  96     m_delimited = false;
  97     m_eatNextIdentifier = false;
  98     m_stackToken = -1;
  99     m_lastToken = -1;
 100
 101     m_position = source.startOffset();
 102     m_source = &source;
 103     m_code = source.provider()->data();
 104     m_length = source.endOffset();
 105     m_skipLF = false;
 106     m_skipCR = false;
 107     m_error = false;
 108     m_atLineStart = true;
 109
 110     // read first characters
 111     shift(4);
 112 }
 113
 114 void Lexer::shift(unsigned p)
 115 {
 116     // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
 117     // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
 118
 119     while (p--) {
 120         m_current = m_next1;
 121         m_next1 = m_next2;
 122         m_next2 = m_next3;
 123         m_currentOffset = m_nextOffset1;
 124         m_nextOffset1 = m_nextOffset2;
 125         m_nextOffset2 = m_nextOffset3;
 126         do {
 127             if (m_position >= m_length) {
 128                 m_nextOffset3 = m_position;
 129                 m_position++;
 130                 m_next3 = -1;
 131                 break;
 132             }
 133             m_nextOffset3 = m_position;
 134             m_next3 = m_code[m_position++];
 135         } while (m_next3 == 0xFEFF);
 136     }
 137 }
 138
 139 // called on each new line
 140 void Lexer::nextLine()
 141 {
 142     yylineno++;
 143     m_atLineStart = true;
 144 }
 145
 146 void Lexer::setDone(State s)
 147 {
 148     m_state = s;
 149     m_done = true;
 150 }
 151
 152 int Lexer::lex(void* p1, void* p2)
 153 {
 154     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
 155     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
 156     int token = 0;
 157     m_state = Start;
 158     unsigned short stringType = 0; // either single or double quotes
 159     m_buffer8.clear();
 160     m_buffer16.clear();
 161     m_done = false;
 162     m_terminator = false;
 163     m_skipLF = false;
 164     m_skipCR = false;
 165
 166     // did we push a token on the stack previously ?
 167     // (after an automatic semicolon insertion)
 168     if (m_stackToken >= 0) {
 169         setDone(Other);
 170         token = m_stackToken;
 171         m_stackToken = 0;
 172     }
 173     int startOffset = m_currentOffset;
 174     while (!m_done) {
 175         if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
 176             m_skipLF = false;
 177         if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
 178             m_skipCR = false;
 179         if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
 180             m_skipLF = false;
 181             m_skipCR = false;
 182             shift(1);
 183         }
 184         switch (m_state) {
 185             case Start:
 186                 startOffset = m_currentOffset;
 187                 if (isWhiteSpace()) {
 188                     // do nothing
 189                 } else if (m_current == '/' && m_next1 == '/') {
 190                     shift(1);
 191                     m_state = InSingleLineComment;
 192                 } else if (m_current == '/' && m_next1 == '*') {
 193                     shift(1);
 194                     m_state = InMultiLineComment;
 195                 } else if (m_current == -1) {
 196                     if (!m_terminator && !m_delimited && !m_isReparsing) {
 197                         // automatic semicolon insertion if program incomplete
 198                         token = ';';
 199                         m_stackToken = 0;
 200                         setDone(Other);
 201                     } else
 202                         setDone(Eof);
 203                 } else if (isLineTerminator()) {
 204                     nextLine();
 205                     m_terminator = true;
 206                     if (m_restrKeyword) {
 207                         token = ';';
 208                         setDone(Other);
 209                     }
 210                 } else if (m_current == '"' || m_current == '\'') {
 211                     m_state = InString;
 212                     stringType = static_cast<unsigned short>(m_current);
 213                 } else if (isIdentStart(m_current)) {
 214                     record16(m_current);
 215                     m_state = InIdentifierOrKeyword;
 216                 } else if (m_current == '\\')
 217                     m_state = InIdentifierStartUnicodeEscapeStart;
 218                 else if (m_current == '0') {
 219                     record8(m_current);
 220                     m_state = InNum0;
 221                 } else if (isDecimalDigit(m_current)) {
 222                     record8(m_current);
 223                     m_state = InNum;
 224                 } else if (m_current == '.' && isDecimalDigit(m_next1)) {
 225                     record8(m_current);
 226                     m_state = InDecimal;
 227                     // <!-- marks the beginning of a line comment (for www usage)
 228                 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
 229                     shift(3);
 230                     m_state = InSingleLineComment;
 231                     // same for -->
 232                 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {
 233                     shift(2);
 234                     m_state = InSingleLineComment;
 235                 } else {
 236                     token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
 237                     if (token != -1)
 238                         setDone(Other);
 239                     else
 240                         setDone(Bad);
 241                 }
 242                 break;
 243             case InString:
 244                 if (m_current == stringType) {
 245                     shift(1);
 246                     setDone(String);
 247                 } else if (isLineTerminator() || m_current == -1)
 248                     setDone(Bad);
 249                 else if (m_current == '\\')
 250                     m_state = InEscapeSequence;
 251                 else
 252                     record16(m_current);
 253                 break;
 254             // Escape Sequences inside of strings
 255             case InEscapeSequence:
 256                 if (isOctalDigit(m_current)) {
 257                     if (m_current >= '0' && m_current <= '3' &&
 258                         isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
 259                         record16(convertOctal(m_current, m_next1, m_next2));
 260                         shift(2);
 261                         m_state = InString;
 262                     } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
 263                         record16(convertOctal('0', m_current, m_next1));
 264                         shift(1);
 265                         m_state = InString;
 266                     } else if (isOctalDigit(m_current)) {
 267                         record16(convertOctal('0', '0', m_current));
 268                         m_state = InString;
 269                     } else
 270                         setDone(Bad);
 271                 } else if (m_current == 'x')
 272                     m_state = InHexEscape;
 273                 else if (m_current == 'u')
 274                     m_state = InUnicodeEscape;
 275                 else if (isLineTerminator()) {
 276                     nextLine();
 277                     m_state = InString;
 278                 } else {
 279                     record16(singleEscape(static_cast<unsigned short>(m_current)));
 280                     m_state = InString;
 281                 }
 282                 break;
 283             case InHexEscape:
 284                 if (isHexDigit(m_current) && isHexDigit(m_next1)) {
 285                     m_state = InString;
 286                     record16(convertHex(m_current, m_next1));
 287                     shift(1);
 288                 } else if (m_current == stringType) {
 289                     record16('x');
 290                     shift(1);
 291                     setDone(String);
 292                 } else {
 293                     record16('x');
 294                     record16(m_current);
 295                     m_state = InString;
 296                 }
 297                 break;
 298             case InUnicodeEscape:
 299                 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
 300                     record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
 301                     shift(3);
 302                     m_state = InString;
 303                 } else if (m_current == stringType) {
 304                     record16('u');
 305                     shift(1);
 306                     setDone(String);
 307                 } else
 308                     setDone(Bad);
 309                 break;
 310             case InSingleLineComment:
 311                 if (isLineTerminator()) {
 312                     nextLine();
 313                     m_terminator = true;
 314                     if (m_restrKeyword) {
 315                         token = ';';
 316                         setDone(Other);
 317                     } else
 318                         m_state = Start;
 319                 } else if (m_current == -1)
 320                     setDone(Eof);
 321                 break;
 322             case InMultiLineComment:
 323                 if (m_current == -1)
 324                     setDone(Bad);
 325                 else if (isLineTerminator())
 326                     nextLine();
 327                 else if (m_current == '*' && m_next1 == '/') {
 328                     m_state = Start;
 329                     shift(1);
 330                 }
 331                 break;
 332             case InIdentifierOrKeyword:
 333             case InIdentifier:
 334                 if (isIdentPart(m_current))
 335                     record16(m_current);
 336                 else if (m_current == '\\')
 337                     m_state = InIdentifierPartUnicodeEscapeStart;
 338                 else
 339                     setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
 340                 break;
 341             case InNum0:
 342                 if (m_current == 'x' || m_current == 'X') {
 343                     record8(m_current);
 344                     m_state = InHex;
 345                 } else if (m_current == '.') {
 346                     record8(m_current);
 347                     m_state = InDecimal;
 348                 } else if (m_current == 'e' || m_current == 'E') {
 349                     record8(m_current);
 350                     m_state = InExponentIndicator;
 351                 } else if (isOctalDigit(m_current)) {
 352                     record8(m_current);
 353                     m_state = InOctal;
 354                 } else if (isDecimalDigit(m_current)) {
 355                     record8(m_current);
 356                     m_state = InDecimal;
 357                 } else
 358                     setDone(Number);
 359                 break;
 360             case InHex:
 361                 if (isHexDigit(m_current))
 362                     record8(m_current);
 363                 else
 364                     setDone(Hex);
 365                 break;
 366             case InOctal:
 367                 if (isOctalDigit(m_current))
 368                     record8(m_current);
 369                 else if (isDecimalDigit(m_current)) {
 370                     record8(m_current);
 371                     m_state = InDecimal;
 372                 } else
 373                     setDone(Octal);
 374                 break;
 375             case InNum:
 376                 if (isDecimalDigit(m_current))
 377                     record8(m_current);
 378                 else if (m_current == '.') {
 379                     record8(m_current);
 380                     m_state = InDecimal;
 381                 } else if (m_current == 'e' || m_current == 'E') {
 382                     record8(m_current);
 383                     m_state = InExponentIndicator;
 384                 } else
 385                     setDone(Number);
 386                 break;
 387             case InDecimal:
 388                 if (isDecimalDigit(m_current))
 389                     record8(m_current);
 390                 else if (m_current == 'e' || m_current == 'E') {
 391                     record8(m_current);
 392                     m_state = InExponentIndicator;
 393                 } else
 394                     setDone(Number);
 395                 break;
 396             case InExponentIndicator:
 397                 if (m_current == '+' || m_current == '-')
 398                     record8(m_current);
 399                 else if (isDecimalDigit(m_current)) {
 400                     record8(m_current);
 401                     m_state = InExponent;
 402                 } else
 403                     setDone(Bad);
 404                 break;
 405             case InExponent:
 406                 if (isDecimalDigit(m_current))
 407                     record8(m_current);
 408                 else
 409                     setDone(Number);
 410                 break;
 411             case InIdentifierStartUnicodeEscapeStart:
 412                 if (m_current == 'u')
 413                     m_state = InIdentifierStartUnicodeEscape;
 414                 else
 415                     setDone(Bad);
 416                 break;
 417             case InIdentifierPartUnicodeEscapeStart:
 418                 if (m_current == 'u')
 419                     m_state = InIdentifierPartUnicodeEscape;
 420                 else
 421                     setDone(Bad);
 422                 break;
 423             case InIdentifierStartUnicodeEscape:
 424                 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
 425                     setDone(Bad);
 426                     break;
 427                 }
 428                 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 429                 shift(3);
 430                 if (!isIdentStart(token)) {
 431                     setDone(Bad);
 432                     break;
 433                 }
 434                 record16(token);
 435                 m_state = InIdentifier;
 436                 break;
 437             case InIdentifierPartUnicodeEscape:
 438                 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
 439                     setDone(Bad);
 440                     break;
 441                 }
 442                 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
 443                 shift(3);
 444                 if (!isIdentPart(token)) {
 445                     setDone(Bad);
 446                     break;
 447                 }
 448                 record16(token);
 449                 m_state = InIdentifier;
 450                 break;
 451             default:
 452                 ASSERT(!"Unhandled state in switch statement");
 453         }
 454
 455         // move on to the next character
 456         if (!m_done)
 457             shift(1);
 458         if (m_state != Start && m_state != InSingleLineComment)
 459             m_atLineStart = false;
 460     }
 461
 462     // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
 463     if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
 464         m_state = Bad;
 465
 466     // terminate string
 467     m_buffer8.append('\0');
 468
 469 #ifdef JSC_DEBUG_LEX
 470     fprintf(stderr, "line: %d ", lineNo());
 471     fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
 472     fprintf(stderr, "%s ", m_buffer8.data());
 473 #endif
 474
 475     double dval = 0;
 476     if (m_state == Number)
 477         dval = WTF::strtod(m_buffer8.data(), 0L);
 478     else if (m_state == Hex) { // scan hex numbers
 479         const char* p = m_buffer8.data() + 2;
 480         while (char c = *p++) {
 481             dval *= 16;
 482             dval += convertHex(c);
 483         }
 484
 485         if (dval >= mantissaOverflowLowerBound)
 486             dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
 487
 488         m_state = Number;
 489     } else if (m_state == Octal) {   // scan octal number
 490         const char* p = m_buffer8.data() + 1;
 491         while (char c = *p++) {
 492             dval *= 8;
 493             dval += c - '0';
 494         }
 495
 496         if (dval >= mantissaOverflowLowerBound)
 497             dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
 498
 499         m_state = Number;
 500     }
 501
 502 #ifdef JSC_DEBUG_LEX
 503     switch (m_state) {
 504         case Eof:
 505             printf("(EOF)\n");
 506             break;
 507         case Other:
 508             printf("(Other)\n");
 509             break;
 510         case Identifier:
 511             printf("(Identifier)/(Keyword)\n");
 512             break;
 513         case String:
 514             printf("(String)\n");
 515             break;
 516         case Number:
 517             printf("(Number)\n");
 518             break;
 519         default:
 520             printf("(unknown)");
 521     }
 522 #endif
 523
 524     if (m_state != Identifier)
 525         m_eatNextIdentifier = false;
 526
 527     m_restrKeyword = false;
 528     m_delimited = false;
 529     llocp->first_line = yylineno;
 530     llocp->last_line = yylineno;
 531     llocp->first_column = startOffset;
 532     llocp->last_column = m_currentOffset;
 533     switch (m_state) {
 534         case Eof:
 535             token = 0;
 536             break;
 537         case Other:
 538             if (token == '}' || token == ';')
 539                 m_delimited = true;
 540             break;
 541         case Identifier:
 542             // Apply anonymous-function hack below (eat the identifier).
 543             if (m_eatNextIdentifier) {
 544                 m_eatNextIdentifier = false;
 545                 token = lex(lvalp, llocp);
 546                 break;
 547             }
 548             lvalp->ident = makeIdentifier(m_buffer16);
 549             token = IDENT;
 550             break;
 551         case IdentifierOrKeyword: {
 552             lvalp->ident = makeIdentifier(m_buffer16);
 553             const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
 554             if (!entry) {
 555                 // Lookup for keyword failed, means this is an identifier.
 556                 token = IDENT;
 557                 break;
 558             }
 559             token = entry->lexerValue();
 560             // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
 561             m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
 562             if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
 563                 m_restrKeyword = true;
 564             break;
 565         }
 566         case String:
 567             // Atomize constant strings in case they're later used in property lookup.
 568             lvalp->ident = makeIdentifier(m_buffer16);
 569             token = STRING;
 570             break;
 571         case Number:
 572             lvalp->doubleValue = dval;
 573             token = NUMBER;
 574             break;
 575         case Bad:
 576 #ifdef JSC_DEBUG_LEX
 577             fprintf(stderr, "yylex: ERROR.\n");
 578 #endif
 579             m_error = true;
 580             return -1;
 581         default:
 582             ASSERT(!"unhandled numeration value in switch");
 583             m_error = true;
 584             return -1;
 585     }
 586     m_lastToken = token;
 587     return token;
 588 }
 589
 590 bool Lexer::isWhiteSpace() const
 591 {
 592     return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
 593 }
 594
 595 bool Lexer::isLineTerminator()
 596 {
 597     bool cr = (m_current == '\r');
 598     bool lf = (m_current == '\n');
 599     if (cr)
 600         m_skipLF = true;
 601     else if (lf)
 602         m_skipCR = true;
 603     return cr || lf || m_current == 0x2028 || m_current == 0x2029;
 604 }
 605
 606 bool Lexer::isIdentStart(int c)
 607 {
 608     return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
 609 }
 610
 611 bool Lexer::isIdentPart(int c)
 612 {
 613     return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
 614                             | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
 615 }
 616
 617 static bool isDecimalDigit(int c)
 618 {
 619     return isASCIIDigit(c);
 620 }
 621
 622 bool Lexer::isHexDigit(int c)
 623 {
 624     return isASCIIHexDigit(c);
 625 }
 626
 627 bool Lexer::isOctalDigit(int c)
 628 {
 629     return isASCIIOctalDigit(c);
 630 }
 631
 632 int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
 633 {
 634     if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
 635         shift(4);
 636         return URSHIFTEQUAL;
 637     }
 638     if (c1 == '=' && c2 == '=' && c3 == '=') {
 639         shift(3);
 640         return STREQ;
 641     }
 642     if (c1 == '!' && c2 == '=' && c3 == '=') {
 643         shift(3);
 644         return STRNEQ;
 645     }
 646     if (c1 == '>' && c2 == '>' && c3 == '>') {
 647         shift(3);
 648         return URSHIFT;
 649     }
 650     if (c1 == '<' && c2 == '<' && c3 == '=') {
 651         shift(3);
 652         return LSHIFTEQUAL;
 653     }
 654     if (c1 == '>' && c2 == '>' && c3 == '=') {
 655         shift(3);
 656         return RSHIFTEQUAL;
 657     }
 658     if (c1 == '<' && c2 == '=') {
 659         shift(2);
 660         return LE;
 661     }
 662     if (c1 == '>' && c2 == '=') {
 663         shift(2);
 664         return GE;
 665     }
 666     if (c1 == '!' && c2 == '=') {
 667         shift(2);
 668         return NE;
 669     }
 670     if (c1 == '+' && c2 == '+') {
 671         shift(2);
 672         if (m_terminator)
 673             return AUTOPLUSPLUS;
 674         return PLUSPLUS;
 675     }
 676     if (c1 == '-' && c2 == '-') {
 677         shift(2);
 678         if (m_terminator)
 679             return AUTOMINUSMINUS;
 680         return MINUSMINUS;
 681     }
 682     if (c1 == '=' && c2 == '=') {
 683         shift(2);
 684         return EQEQ;
 685     }
 686     if (c1 == '+' && c2 == '=') {
 687         shift(2);
 688         return PLUSEQUAL;
 689     }
 690     if (c1 == '-' && c2 == '=') {
 691         shift(2);
 692         return MINUSEQUAL;
 693     }
 694     if (c1 == '*' && c2 == '=') {
 695         shift(2);
 696         return MULTEQUAL;
 697     }
 698     if (c1 == '/' && c2 == '=') {
 699         shift(2);
 700         return DIVEQUAL;
 701     }
 702     if (c1 == '&' && c2 == '=') {
 703         shift(2);
 704         return ANDEQUAL;
 705     }
 706     if (c1 == '^' && c2 == '=') {
 707         shift(2);
 708         return XOREQUAL;
 709     }
 710     if (c1 == '%' && c2 == '=') {
 711         shift(2);
 712         return MODEQUAL;
 713     }
 714     if (c1 == '|' && c2 == '=') {
 715         shift(2);
 716         return OREQUAL;
 717     }
 718     if (c1 == '<' && c2 == '<') {
 719         shift(2);
 720         return LSHIFT;
 721     }
 722     if (c1 == '>' && c2 == '>') {
 723         shift(2);
 724         return RSHIFT;
 725     }
 726     if (c1 == '&' && c2 == '&') {
 727         shift(2);
 728         return AND;
 729     }
 730     if (c1 == '|' && c2 == '|') {
 731         shift(2);
 732         return OR;
 733     }
 734
 735     switch (c1) {
 736         case '=':
 737         case '>':
 738         case '<':
 739         case ',':
 740         case '!':
 741         case '~':
 742         case '?':
 743         case ':':
 744         case '.':
 745         case '+':
 746         case '-':
 747         case '*':
 748         case '/':
 749         case '&':
 750         case '|':
 751         case '^':
 752         case '%':
 753         case '(':
 754         case ')':
 755         case '[':
 756         case ']':
 757         case ';':
 758             shift(1);
 759             return static_cast<int>(c1);
 760         case '{':
 761             charPos = m_currentOffset;
 762             shift(1);
 763             return OPENBRACE;
 764         case '}':
 765             charPos = m_currentOffset;
 766             shift(1);
 767             return CLOSEBRACE;
 768         default:
 769             return -1;
 770     }
 771 }
 772
 773 unsigned short Lexer::singleEscape(unsigned short c)
 774 {
 775     switch (c) {
 776         case 'b':
 777             return 0x08;
 778         case 't':
 779             return 0x09;
 780         case 'n':
 781             return 0x0A;
 782         case 'v':
 783             return 0x0B;
 784         case 'f':
 785             return 0x0C;
 786         case 'r':
 787             return 0x0D;
 788         case '"':
 789             return 0x22;
 790         case '\'':
 791             return 0x27;
 792         case '\\':
 793             return 0x5C;
 794         default:
 795             return c;
 796     }
 797 }
 798
 799 unsigned short Lexer::convertOctal(int c1, int c2, int c3)
 800 {
 801     return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
 802 }
 803
 804 unsigned char Lexer::convertHex(int c)
 805 {
 806     if (c >= '0' && c <= '9')
 807         return static_cast<unsigned char>(c - '0');
 808     if (c >= 'a' && c <= 'f')
 809         return static_cast<unsigned char>(c - 'a' + 10);
 810     return static_cast<unsigned char>(c - 'A' + 10);
 811 }
 812
 813 unsigned char Lexer::convertHex(int c1, int c2)
 814 {
 815     return ((convertHex(c1) << 4) + convertHex(c2));
 816 }
 817
 818 UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
 819 {
 820     unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
 821     unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
 822     return (highByte << 8 | lowByte);
 823 }
 824
 825 void Lexer::record8(int c)
 826 {
 827     ASSERT(c >= 0);
 828     ASSERT(c <= 0xff);
 829     m_buffer8.append(static_cast<char>(c));
 830 }
 831
 832 void Lexer::record16(int c)
 833 {
 834     ASSERT(c >= 0);
 835     ASSERT(c <= USHRT_MAX);
 836     record16(UChar(static_cast<unsigned short>(c)));
 837 }
 838
 839 void Lexer::record16(UChar c)
 840 {
 841     m_buffer16.append(c);
 842 }
 843
 844 bool Lexer::scanRegExp()
 845 {
 846     m_buffer16.clear();
 847     bool lastWasEscape = false;
 848     bool inBrackets = false;
 849
 850     while (1) {
 851         if (isLineTerminator() || m_current == -1)
 852             return false;
 853         else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
 854             // keep track of '[' and ']'
 855             if (!lastWasEscape) {
 856                 if ( m_current == '[' && !inBrackets )
 857                     inBrackets = true;
 858                 if ( m_current == ']' && inBrackets )
 859                     inBrackets = false;
 860             }
 861             record16(m_current);
 862             lastWasEscape =
 863             !lastWasEscape && (m_current == '\\');
 864         } else { // end of regexp
 865             m_pattern = UString(m_buffer16);
 866             m_buffer16.clear();
 867             shift(1);
 868             break;
 869         }
 870         shift(1);
 871     }
 872
 873     while (isIdentPart(m_current)) {
 874         record16(m_current);
 875         shift(1);
 876     }
 877     m_flags = UString(m_buffer16);
 878
 879     return true;
 880 }
 881
 882 void Lexer::clear()
 883 {
 884     m_identifiers.clear();
 885
 886     Vector<char> newBuffer8;
 887     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
 888     m_buffer8.swap(newBuffer8);
 889
 890     Vector<UChar> newBuffer16;
 891     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
 892     m_buffer16.swap(newBuffer16);
 893
 894     m_isReparsing = false;
 895
 896     m_pattern = 0;
 897     m_flags = 0;
 898 }
 899
 900 } // namespace JSC