JavaScriptCore-521.tar.gz

[apple/javascriptcore.git] / parser / Lexer.cpp
diff --git a/parser/Lexer.cpp b/parser/Lexer.cpp

new file mode 100644 (file)

index 0000000..c2880dc
--- /dev/null
+++ b/parser/Lexer.cpp
@@ -0,0 +1,900 @@
+/*
+ *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
+ *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
+ *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "Lexer.h"
+
+#include "JSFunction.h"
+#include "JSGlobalObjectFunctions.h"
+#include "NodeInfo.h"
+#include "Nodes.h"
+#include "dtoa.h"
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/Assertions.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace Unicode;
+
+// we can't specify the namespace in yacc's C output, so do it here
+using namespace JSC;
+
+#ifndef KDE_USE_FINAL
+#include "Grammar.h"
+#endif
+
+#include "Lookup.h"
+#include "Lexer.lut.h"
+
+// a bridge for yacc from the C world to C++
+int jscyylex(void* lvalp, void* llocp, void* globalData)
+{
+    return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
+}
+
+namespace JSC {
+
+static bool isDecimalDigit(int);
+
+Lexer::Lexer(JSGlobalData* globalData)
+    : yylineno(1)
+    , m_restrKeyword(false)
+    , m_eatNextIdentifier(false)
+    , m_stackToken(-1)
+    , m_lastToken(-1)
+    , m_position(0)
+    , m_code(0)
+    , m_length(0)
+    , m_isReparsing(false)
+    , m_atLineStart(true)
+    , m_current(0)
+    , m_next1(0)
+    , m_next2(0)
+    , m_next3(0)
+    , m_currentOffset(0)
+    , m_nextOffset1(0)
+    , m_nextOffset2(0)
+    , m_nextOffset3(0)
+    , m_globalData(globalData)
+    , m_mainTable(JSC::mainTable)
+{
+    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
+    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
+}
+
+Lexer::~Lexer()
+{
+    m_mainTable.deleteTable();
+}
+
+void Lexer::setCode(const SourceCode& source)
+{
+    yylineno = source.firstLine();
+    m_restrKeyword = false;
+    m_delimited = false;
+    m_eatNextIdentifier = false;
+    m_stackToken = -1;
+    m_lastToken = -1;
+
+    m_position = source.startOffset();
+    m_source = &source;
+    m_code = source.provider()->data();
+    m_length = source.endOffset();
+    m_skipLF = false;
+    m_skipCR = false;
+    m_error = false;
+    m_atLineStart = true;
+
+    // read first characters
+    shift(4);
+}
+
+void Lexer::shift(unsigned p)
+{
+    // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
+    // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
+
+    while (p--) {
+        m_current = m_next1;
+        m_next1 = m_next2;
+        m_next2 = m_next3;
+        m_currentOffset = m_nextOffset1;
+        m_nextOffset1 = m_nextOffset2;
+        m_nextOffset2 = m_nextOffset3;
+        do {
+            if (m_position >= m_length) {
+                m_nextOffset3 = m_position;
+                m_position++;
+                m_next3 = -1;
+                break;
+            }
+            m_nextOffset3 = m_position;
+            m_next3 = m_code[m_position++];
+        } while (m_next3 == 0xFEFF);
+    }
+}
+
+// called on each new line
+void Lexer::nextLine()
+{
+    yylineno++;
+    m_atLineStart = true;
+}
+
+void Lexer::setDone(State s)
+{
+    m_state = s;
+    m_done = true;
+}
+
+int Lexer::lex(void* p1, void* p2)
+{
+    YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
+    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
+    int token = 0;
+    m_state = Start;
+    unsigned short stringType = 0; // either single or double quotes
+    m_buffer8.clear();
+    m_buffer16.clear();
+    m_done = false;
+    m_terminator = false;
+    m_skipLF = false;
+    m_skipCR = false;
+
+    // did we push a token on the stack previously ?
+    // (after an automatic semicolon insertion)
+    if (m_stackToken >= 0) {
+        setDone(Other);
+        token = m_stackToken;
+        m_stackToken = 0;
+    }
+    int startOffset = m_currentOffset;
+    while (!m_done) {
+        if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
+            m_skipLF = false;
+        if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
+            m_skipCR = false;
+        if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
+            m_skipLF = false;
+            m_skipCR = false;
+            shift(1);
+        }
+        switch (m_state) {
+            case Start:
+                startOffset = m_currentOffset;
+                if (isWhiteSpace()) {
+                    // do nothing
+                } else if (m_current == '/' && m_next1 == '/') {
+                    shift(1);
+                    m_state = InSingleLineComment;
+                } else if (m_current == '/' && m_next1 == '*') {
+                    shift(1);
+                    m_state = InMultiLineComment;
+                } else if (m_current == -1) {
+                    if (!m_terminator && !m_delimited && !m_isReparsing) {
+                        // automatic semicolon insertion if program incomplete
+                        token = ';';
+                        m_stackToken = 0;
+                        setDone(Other);
+                    } else
+                        setDone(Eof);
+                } else if (isLineTerminator()) {
+                    nextLine();
+                    m_terminator = true;
+                    if (m_restrKeyword) {
+                        token = ';';
+                        setDone(Other);
+                    }
+                } else if (m_current == '"' || m_current == '\'') {
+                    m_state = InString;
+                    stringType = static_cast<unsigned short>(m_current);
+                } else if (isIdentStart(m_current)) {
+                    record16(m_current);
+                    m_state = InIdentifierOrKeyword;
+                } else if (m_current == '\\')
+                    m_state = InIdentifierStartUnicodeEscapeStart;
+                else if (m_current == '0') {
+                    record8(m_current);
+                    m_state = InNum0;
+                } else if (isDecimalDigit(m_current)) {
+                    record8(m_current);
+                    m_state = InNum;
+                } else if (m_current == '.' && isDecimalDigit(m_next1)) {
+                    record8(m_current);
+                    m_state = InDecimal;
+                    // <!-- marks the beginning of a line comment (for www usage)
+                } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
+                    shift(3);
+                    m_state = InSingleLineComment;
+                    // same for -->
+                } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {
+                    shift(2);
+                    m_state = InSingleLineComment;
+                } else {
+                    token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
+                    if (token != -1)
+                        setDone(Other);
+                    else
+                        setDone(Bad);
+                }
+                break;
+            case InString:
+                if (m_current == stringType) {
+                    shift(1);
+                    setDone(String);
+                } else if (isLineTerminator() || m_current == -1)
+                    setDone(Bad);
+                else if (m_current == '\\')
+                    m_state = InEscapeSequence;
+                else
+                    record16(m_current);
+                break;
+            // Escape Sequences inside of strings
+            case InEscapeSequence:
+                if (isOctalDigit(m_current)) {
+                    if (m_current >= '0' && m_current <= '3' &&
+                        isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
+                        record16(convertOctal(m_current, m_next1, m_next2));
+                        shift(2);
+                        m_state = InString;
+                    } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
+                        record16(convertOctal('0', m_current, m_next1));
+                        shift(1);
+                        m_state = InString;
+                    } else if (isOctalDigit(m_current)) {
+                        record16(convertOctal('0', '0', m_current));
+                        m_state = InString;
+                    } else
+                        setDone(Bad);
+                } else if (m_current == 'x')
+                    m_state = InHexEscape;
+                else if (m_current == 'u')
+                    m_state = InUnicodeEscape;
+                else if (isLineTerminator()) {
+                    nextLine();
+                    m_state = InString;
+                } else {
+                    record16(singleEscape(static_cast<unsigned short>(m_current)));
+                    m_state = InString;
+                }
+                break;
+            case InHexEscape:
+                if (isHexDigit(m_current) && isHexDigit(m_next1)) {
+                    m_state = InString;
+                    record16(convertHex(m_current, m_next1));
+                    shift(1);
+                } else if (m_current == stringType) {
+                    record16('x');
+                    shift(1);
+                    setDone(String);
+                } else {
+                    record16('x');
+                    record16(m_current);
+                    m_state = InString;
+                }
+                break;
+            case InUnicodeEscape:
+                if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
+                    record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
+                    shift(3);
+                    m_state = InString;
+                } else if (m_current == stringType) {
+                    record16('u');
+                    shift(1);
+                    setDone(String);
+                } else
+                    setDone(Bad);
+                break;
+            case InSingleLineComment:
+                if (isLineTerminator()) {
+                    nextLine();
+                    m_terminator = true;
+                    if (m_restrKeyword) {
+                        token = ';';
+                        setDone(Other);
+                    } else
+                        m_state = Start;
+                } else if (m_current == -1)
+                    setDone(Eof);
+                break;
+            case InMultiLineComment:
+                if (m_current == -1)
+                    setDone(Bad);
+                else if (isLineTerminator())
+                    nextLine();
+                else if (m_current == '*' && m_next1 == '/') {
+                    m_state = Start;
+                    shift(1);
+                }
+                break;
+            case InIdentifierOrKeyword:
+            case InIdentifier:
+                if (isIdentPart(m_current))
+                    record16(m_current);
+                else if (m_current == '\\')
+                    m_state = InIdentifierPartUnicodeEscapeStart;
+                else
+                    setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
+                break;
+            case InNum0:
+                if (m_current == 'x' || m_current == 'X') {
+                    record8(m_current);
+                    m_state = InHex;
+                } else if (m_current == '.') {
+                    record8(m_current);
+                    m_state = InDecimal;
+                } else if (m_current == 'e' || m_current == 'E') {
+                    record8(m_current);
+                    m_state = InExponentIndicator;
+                } else if (isOctalDigit(m_current)) {
+                    record8(m_current);
+                    m_state = InOctal;
+                } else if (isDecimalDigit(m_current)) {
+                    record8(m_current);
+                    m_state = InDecimal;
+                } else
+                    setDone(Number);
+                break;
+            case InHex:
+                if (isHexDigit(m_current))
+                    record8(m_current);
+                else
+                    setDone(Hex);
+                break;
+            case InOctal:
+                if (isOctalDigit(m_current))
+                    record8(m_current);
+                else if (isDecimalDigit(m_current)) {
+                    record8(m_current);
+                    m_state = InDecimal;
+                } else
+                    setDone(Octal);
+                break;
+            case InNum:
+                if (isDecimalDigit(m_current))
+                    record8(m_current);
+                else if (m_current == '.') {
+                    record8(m_current);
+                    m_state = InDecimal;
+                } else if (m_current == 'e' || m_current == 'E') {
+                    record8(m_current);
+                    m_state = InExponentIndicator;
+                } else
+                    setDone(Number);
+                break;
+            case InDecimal:
+                if (isDecimalDigit(m_current))
+                    record8(m_current);
+                else if (m_current == 'e' || m_current == 'E') {
+                    record8(m_current);
+                    m_state = InExponentIndicator;
+                } else
+                    setDone(Number);
+                break;
+            case InExponentIndicator:
+                if (m_current == '+' || m_current == '-')
+                    record8(m_current);
+                else if (isDecimalDigit(m_current)) {
+                    record8(m_current);
+                    m_state = InExponent;
+                } else
+                    setDone(Bad);
+                break;
+            case InExponent:
+                if (isDecimalDigit(m_current))
+                    record8(m_current);
+                else
+                    setDone(Number);
+                break;
+            case InIdentifierStartUnicodeEscapeStart:
+                if (m_current == 'u')
+                    m_state = InIdentifierStartUnicodeEscape;
+                else
+                    setDone(Bad);
+                break;
+            case InIdentifierPartUnicodeEscapeStart:
+                if (m_current == 'u')
+                    m_state = InIdentifierPartUnicodeEscape;
+                else
+                    setDone(Bad);
+                break;
+            case InIdentifierStartUnicodeEscape:
+                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
+                    setDone(Bad);
+                    break;
+                }
+                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+                shift(3);
+                if (!isIdentStart(token)) {
+                    setDone(Bad);
+                    break;
+                }
+                record16(token);
+                m_state = InIdentifier;
+                break;
+            case InIdentifierPartUnicodeEscape:
+                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
+                    setDone(Bad);
+                    break;
+                }
+                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+                shift(3);
+                if (!isIdentPart(token)) {
+                    setDone(Bad);
+                    break;
+                }
+                record16(token);
+                m_state = InIdentifier;
+                break;
+            default:
+                ASSERT(!"Unhandled state in switch statement");
+        }
+
+        // move on to the next character
+        if (!m_done)
+            shift(1);
+        if (m_state != Start && m_state != InSingleLineComment)
+            m_atLineStart = false;
+    }
+
+    // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
+    if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
+        m_state = Bad;
+
+    // terminate string
+    m_buffer8.append('\0');
+
+#ifdef JSC_DEBUG_LEX
+    fprintf(stderr, "line: %d ", lineNo());
+    fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
+    fprintf(stderr, "%s ", m_buffer8.data());
+#endif
+
+    double dval = 0;
+    if (m_state == Number)
+        dval = WTF::strtod(m_buffer8.data(), 0L);
+    else if (m_state == Hex) { // scan hex numbers
+        const char* p = m_buffer8.data() + 2;
+        while (char c = *p++) {
+            dval *= 16;
+            dval += convertHex(c);
+        }
+
+        if (dval >= mantissaOverflowLowerBound)
+            dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
+
+        m_state = Number;
+    } else if (m_state == Octal) {   // scan octal number
+        const char* p = m_buffer8.data() + 1;
+        while (char c = *p++) {
+            dval *= 8;
+            dval += c - '0';
+        }
+
+        if (dval >= mantissaOverflowLowerBound)
+            dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
+
+        m_state = Number;
+    }
+
+#ifdef JSC_DEBUG_LEX
+    switch (m_state) {
+        case Eof:
+            printf("(EOF)\n");
+            break;
+        case Other:
+            printf("(Other)\n");
+            break;
+        case Identifier:
+            printf("(Identifier)/(Keyword)\n");
+            break;
+        case String:
+            printf("(String)\n");
+            break;
+        case Number:
+            printf("(Number)\n");
+            break;
+        default:
+            printf("(unknown)");
+    }
+#endif
+
+    if (m_state != Identifier)
+        m_eatNextIdentifier = false;
+
+    m_restrKeyword = false;
+    m_delimited = false;
+    llocp->first_line = yylineno;
+    llocp->last_line = yylineno;
+    llocp->first_column = startOffset;
+    llocp->last_column = m_currentOffset;
+    switch (m_state) {
+        case Eof:
+            token = 0;
+            break;
+        case Other:
+            if (token == '}' || token == ';')
+                m_delimited = true;
+            break;
+        case Identifier:
+            // Apply anonymous-function hack below (eat the identifier).
+            if (m_eatNextIdentifier) {
+                m_eatNextIdentifier = false;
+                token = lex(lvalp, llocp);
+                break;
+            }
+            lvalp->ident = makeIdentifier(m_buffer16);
+            token = IDENT;
+            break;
+        case IdentifierOrKeyword: {
+            lvalp->ident = makeIdentifier(m_buffer16);
+            const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
+            if (!entry) {
+                // Lookup for keyword failed, means this is an identifier.
+                token = IDENT;
+                break;
+            }
+            token = entry->lexerValue();
+            // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
+            m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
+            if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
+                m_restrKeyword = true;
+            break;
+        }
+        case String:
+            // Atomize constant strings in case they're later used in property lookup.
+            lvalp->ident = makeIdentifier(m_buffer16);
+            token = STRING;
+            break;
+        case Number:
+            lvalp->doubleValue = dval;
+            token = NUMBER;
+            break;
+        case Bad:
+#ifdef JSC_DEBUG_LEX
+            fprintf(stderr, "yylex: ERROR.\n");
+#endif
+            m_error = true;
+            return -1;
+        default:
+            ASSERT(!"unhandled numeration value in switch");
+            m_error = true;
+            return -1;
+    }
+    m_lastToken = token;
+    return token;
+}
+
+bool Lexer::isWhiteSpace() const
+{
+    return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
+}
+
+bool Lexer::isLineTerminator()
+{
+    bool cr = (m_current == '\r');
+    bool lf = (m_current == '\n');
+    if (cr)
+        m_skipLF = true;
+    else if (lf)
+        m_skipCR = true;
+    return cr || lf || m_current == 0x2028 || m_current == 0x2029;
+}
+
+bool Lexer::isIdentStart(int c)
+{
+    return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
+}
+
+bool Lexer::isIdentPart(int c)
+{
+    return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
+                            | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
+}
+
+static bool isDecimalDigit(int c)
+{
+    return isASCIIDigit(c);
+}
+
+bool Lexer::isHexDigit(int c)
+{
+    return isASCIIHexDigit(c); 
+}
+
+bool Lexer::isOctalDigit(int c)
+{
+    return isASCIIOctalDigit(c);
+}
+
+int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
+{
+    if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
+        shift(4);
+        return URSHIFTEQUAL;
+    }
+    if (c1 == '=' && c2 == '=' && c3 == '=') {
+        shift(3);
+        return STREQ;
+    }
+    if (c1 == '!' && c2 == '=' && c3 == '=') {
+        shift(3);
+        return STRNEQ;
+    }
+    if (c1 == '>' && c2 == '>' && c3 == '>') {
+        shift(3);
+        return URSHIFT;
+    }
+    if (c1 == '<' && c2 == '<' && c3 == '=') {
+        shift(3);
+        return LSHIFTEQUAL;
+    }
+    if (c1 == '>' && c2 == '>' && c3 == '=') {
+        shift(3);
+        return RSHIFTEQUAL;
+    }
+    if (c1 == '<' && c2 == '=') {
+        shift(2);
+        return LE;
+    }
+    if (c1 == '>' && c2 == '=') {
+        shift(2);
+        return GE;
+    }
+    if (c1 == '!' && c2 == '=') {
+        shift(2);
+        return NE;
+    }
+    if (c1 == '+' && c2 == '+') {
+        shift(2);
+        if (m_terminator)
+            return AUTOPLUSPLUS;
+        return PLUSPLUS;
+    }
+    if (c1 == '-' && c2 == '-') {
+        shift(2);
+        if (m_terminator)
+            return AUTOMINUSMINUS;
+        return MINUSMINUS;
+    }
+    if (c1 == '=' && c2 == '=') {
+        shift(2);
+        return EQEQ;
+    }
+    if (c1 == '+' && c2 == '=') {
+        shift(2);
+        return PLUSEQUAL;
+    }
+    if (c1 == '-' && c2 == '=') {
+        shift(2);
+        return MINUSEQUAL;
+    }
+    if (c1 == '*' && c2 == '=') {
+        shift(2);
+        return MULTEQUAL;
+    }
+    if (c1 == '/' && c2 == '=') {
+        shift(2);
+        return DIVEQUAL;
+    }
+    if (c1 == '&' && c2 == '=') {
+        shift(2);
+        return ANDEQUAL;
+    }
+    if (c1 == '^' && c2 == '=') {
+        shift(2);
+        return XOREQUAL;
+    }
+    if (c1 == '%' && c2 == '=') {
+        shift(2);
+        return MODEQUAL;
+    }
+    if (c1 == '|' && c2 == '=') {
+        shift(2);
+        return OREQUAL;
+    }
+    if (c1 == '<' && c2 == '<') {
+        shift(2);
+        return LSHIFT;
+    }
+    if (c1 == '>' && c2 == '>') {
+        shift(2);
+        return RSHIFT;
+    }
+    if (c1 == '&' && c2 == '&') {
+        shift(2);
+        return AND;
+    }
+    if (c1 == '|' && c2 == '|') {
+        shift(2);
+        return OR;
+    }
+
+    switch (c1) {
+        case '=':
+        case '>':
+        case '<':
+        case ',':
+        case '!':
+        case '~':
+        case '?':
+        case ':':
+        case '.':
+        case '+':
+        case '-':
+        case '*':
+        case '/':
+        case '&':
+        case '|':
+        case '^':
+        case '%':
+        case '(':
+        case ')':
+        case '[':
+        case ']':
+        case ';':
+            shift(1);
+            return static_cast<int>(c1);
+        case '{':
+            charPos = m_currentOffset;
+            shift(1);
+            return OPENBRACE;
+        case '}':
+            charPos = m_currentOffset;
+            shift(1);
+            return CLOSEBRACE;
+        default:
+            return -1;
+    }
+}
+
+unsigned short Lexer::singleEscape(unsigned short c)
+{
+    switch (c) {
+        case 'b':
+            return 0x08;
+        case 't':
+            return 0x09;
+        case 'n':
+            return 0x0A;
+        case 'v':
+            return 0x0B;
+        case 'f':
+            return 0x0C;
+        case 'r':
+            return 0x0D;
+        case '"':
+            return 0x22;
+        case '\'':
+            return 0x27;
+        case '\\':
+            return 0x5C;
+        default:
+            return c;
+    }
+}
+
+unsigned short Lexer::convertOctal(int c1, int c2, int c3)
+{
+    return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
+}
+
+unsigned char Lexer::convertHex(int c)
+{
+    if (c >= '0' && c <= '9')
+        return static_cast<unsigned char>(c - '0');
+    if (c >= 'a' && c <= 'f')
+        return static_cast<unsigned char>(c - 'a' + 10);
+    return static_cast<unsigned char>(c - 'A' + 10);
+}
+
+unsigned char Lexer::convertHex(int c1, int c2)
+{
+    return ((convertHex(c1) << 4) + convertHex(c2));
+}
+
+UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
+{
+    unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
+    unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
+    return (highByte << 8 | lowByte);
+}
+
+void Lexer::record8(int c)
+{
+    ASSERT(c >= 0);
+    ASSERT(c <= 0xff);
+    m_buffer8.append(static_cast<char>(c));
+}
+
+void Lexer::record16(int c)
+{
+    ASSERT(c >= 0);
+    ASSERT(c <= USHRT_MAX);
+    record16(UChar(static_cast<unsigned short>(c)));
+}
+
+void Lexer::record16(UChar c)
+{
+    m_buffer16.append(c);
+}
+
+bool Lexer::scanRegExp()
+{
+    m_buffer16.clear();
+    bool lastWasEscape = false;
+    bool inBrackets = false;
+
+    while (1) {
+        if (isLineTerminator() || m_current == -1)
+            return false;
+        else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
+            // keep track of '[' and ']'
+            if (!lastWasEscape) {
+                if ( m_current == '[' && !inBrackets )
+                    inBrackets = true;
+                if ( m_current == ']' && inBrackets )
+                    inBrackets = false;
+            }
+            record16(m_current);
+            lastWasEscape =
+            !lastWasEscape && (m_current == '\\');
+        } else { // end of regexp
+            m_pattern = UString(m_buffer16);
+            m_buffer16.clear();
+            shift(1);
+            break;
+        }
+        shift(1);
+    }
+
+    while (isIdentPart(m_current)) {
+        record16(m_current);
+        shift(1);
+    }
+    m_flags = UString(m_buffer16);
+
+    return true;
+}
+
+void Lexer::clear()
+{
+    m_identifiers.clear();
+
+    Vector<char> newBuffer8;
+    newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
+    m_buffer8.swap(newBuffer8);
+
+    Vector<UChar> newBuffer16;
+    newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
+    m_buffer16.swap(newBuffer16);
+
+    m_isReparsing = false;
+
+    m_pattern = 0;
+    m_flags = 0;
+}
+
+} // namespace JSC