[apple/javascriptcore.git] / parser / Lexer.h

/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
 *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#ifndef Lexer_h
#define Lexer_h

#include "Lookup.h"
#include "ParserArena.h"
#include "ParserTokens.h"
#include "SourceCode.h"
#include <wtf/ASCIICType.h>
#include <wtf/SegmentedVector.h>
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>

namespace JSC {

class Keywords {
public:
    bool isKeyword(const Identifier& ident) const
    {
        return m_keywordTable.entry(m_vm, ident);
    }
    
    const HashEntry* getKeyword(const Identifier& ident) const
    {
        return m_keywordTable.entry(m_vm, ident);
    }
    
    ~Keywords()
    {
        m_keywordTable.deleteTable();
    }
    
private:
    friend class VM;
    
    Keywords(VM*);
    
    VM* m_vm;
    const HashTable m_keywordTable;
};

enum LexerFlags {
    LexerFlagsIgnoreReservedWords = 1, 
    LexerFlagsDontBuildStrings = 2,
    LexexFlagsDontBuildKeywords = 4
};

template <typename T>
class Lexer {
    WTF_MAKE_NONCOPYABLE(Lexer);
    WTF_MAKE_FAST_ALLOCATED;

public:
    Lexer(VM*);
    ~Lexer();

    // Character manipulation functions.
    static bool isWhiteSpace(T character);
    static bool isLineTerminator(T character);
    static unsigned char convertHex(int c1, int c2);
    static UChar convertUnicode(int c1, int c2, int c3, int c4);

    // Functions to set up parsing.
    void setCode(const SourceCode&, ParserArena*);
    void setIsReparsing() { m_isReparsing = true; }
    bool isReparsing() const { return m_isReparsing; }

    JSTokenType lex(JSTokenData*, JSTokenLocation*, unsigned, bool strictMode);
    bool nextTokenIsColon();
    int lineNumber() const { return m_lineNumber; }
    ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
    ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
    void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
    int lastLineNumber() const { return m_lastLineNumber; }
    bool prevTerminator() const { return m_terminator; }
    SourceCode sourceCode(int openBrace, int closeBrace, int firstLine, unsigned startColumn);
    bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
    bool skipRegExp();

    // Functions for use after parsing.
    bool sawError() const { return m_error; }
    String getErrorMessage() const { return m_lexErrorMessage; }
    void clear();
    void setOffset(int offset, int lineStartOffset)
    {
        m_error = 0;
        m_lexErrorMessage = String();

        m_code = sourcePtrFromOffset(offset);
        m_lineStart = sourcePtrFromOffset(lineStartOffset);
        ASSERT(currentOffset() >= currentLineStartOffset());

        m_buffer8.resize(0);
        m_buffer16.resize(0);
        if (LIKELY(m_code < m_codeEnd))
            m_current = *m_code;
        else
            m_current = 0;
    }
    void setLineNumber(int line)
    {
        m_lineNumber = line;
    }

    SourceProvider* sourceProvider() const { return m_source->provider(); }

    JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenLocation*, unsigned, bool strictMode);

private:
    void record8(int);
    void append8(const T*, size_t);
    void record16(int);
    void record16(T);
    void append16(const LChar*, size_t);
    void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }

    ALWAYS_INLINE void shift();
    ALWAYS_INLINE bool atEnd() const;
    ALWAYS_INLINE T peek(int offset) const;
    struct UnicodeHexValue {
        
        enum ValueType { ValidHex, IncompleteHex, InvalidHex };
        
        explicit UnicodeHexValue(int value)
            : m_value(value)
        {
        }
        explicit UnicodeHexValue(ValueType type)
            : m_value(type == IncompleteHex ? -2 : -1)
        {
        }

        ValueType valueType() const
        {
            if (m_value >= 0)
                return ValidHex;
            return m_value == -2 ? IncompleteHex : InvalidHex;
        }
        bool isValid() const { return m_value >= 0; }
        int value() const
        {
            ASSERT(m_value >= 0);
            return m_value;
        }
        
    private:
        int m_value;
    };
    UnicodeHexValue parseFourDigitUnicodeHex();
    void shiftLineTerminator();

    ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
    ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }

    String invalidCharacterMessage() const;
    ALWAYS_INLINE const T* currentSourcePtr() const;
    ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }

    ALWAYS_INLINE void setCodeStart(const StringImpl*);

    ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
    ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
    ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
    ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
    ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
    ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);

    ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;

    template <int shiftAmount> void internalShift();
    template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
    template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
    template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
    enum StringParseResult {
        StringParsedSuccessfully,
        StringUnterminated,
        StringCannotBeParsed
    };
    template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
    template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
    ALWAYS_INLINE void parseHex(double& returnValue);
    ALWAYS_INLINE bool parseOctal(double& returnValue);
    ALWAYS_INLINE bool parseDecimal(double& returnValue);
    ALWAYS_INLINE void parseNumberAfterDecimalPoint();
    ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
    ALWAYS_INLINE bool parseMultilineComment();

    static const size_t initialReadBufferCapacity = 32;

    int m_lineNumber;
    int m_lastLineNumber;

    Vector<LChar> m_buffer8;
    Vector<UChar> m_buffer16;
    bool m_terminator;
    int m_lastToken;

    const SourceCode* m_source;
    unsigned m_sourceOffset;
    const T* m_code;
    const T* m_codeStart;
    const T* m_codeEnd;
    const T* m_codeStartPlusOffset;
    const T* m_lineStart;
    bool m_isReparsing;
    bool m_atLineStart;
    bool m_error;
    String m_lexErrorMessage;

    T m_current;

    IdentifierArena* m_arena;

    VM* m_vm;
};

template <>
ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
{
    return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
}

template <>
ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
{
    return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
}

template <>
ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
{
    return ch == '\r' || ch == '\n';
}

template <>
ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
{
    return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
}

template <typename T>
inline unsigned char Lexer<T>::convertHex(int c1, int c2)
{
    return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
}

template <typename T>
inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
{
    return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
}

template <typename T>
ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
{
    return &m_arena->makeIdentifier(m_vm, characters, length);
}

template <typename T>
ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
{
    return &m_arena->makeIdentifier(m_vm, characters, length);
}

template <>
ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
{
    return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
}

template <>
ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
{
    if (!(orAllChars & ~0xff))
        return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);

    return &m_arena->makeIdentifier(m_vm, characters, length);
}

template <>
ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
{
    ASSERT(sourceString->is8Bit());
    m_codeStart = sourceString->characters8();
}

template <>
ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
{
    ASSERT(!sourceString->is8Bit());
    m_codeStart = sourceString->characters16();
}

template <typename T>
ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
{
    return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
}

template <typename T>
ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
{
    return &m_arena->makeIdentifier(m_vm, characters, length);
}

template <typename T>
ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
{
    return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
}

template <typename T>
ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSTokenData* tokenData, JSTokenLocation* tokenLocation, unsigned lexerFlags, bool strictMode)
{
    ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
    const T* start = m_code;
    const T* ptr = start;
    const T* end = m_codeEnd;
    if (ptr >= end) {
        ASSERT(ptr == end);
        goto slowCase;
    }
    if (!WTF::isASCIIAlpha(*ptr))
        goto slowCase;
    ++ptr;
    while (ptr < end) {
        if (!WTF::isASCIIAlphanumeric(*ptr))
            break;
        ++ptr;
    }

    // Here's the shift
    if (ptr < end) {
        if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
            goto slowCase;
        m_current = *ptr;
    } else
        m_current = 0;

    m_code = ptr;
    ASSERT(currentOffset() >= currentLineStartOffset());

    // Create the identifier if needed
    if (lexerFlags & LexexFlagsDontBuildKeywords)
        tokenData->ident = 0;
    else
        tokenData->ident = makeLCharIdentifier(start, ptr - start);
    tokenLocation->line = m_lineNumber;
    tokenLocation->lineStartOffset = currentLineStartOffset();
    tokenLocation->startOffset = offsetFromSourcePtr(start);
    tokenLocation->endOffset = currentOffset();
    ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
    m_lastToken = IDENT;
    return IDENT;
    
slowCase:
    return lex(tokenData, tokenLocation, lexerFlags, strictMode);
}

} // namespace JSC

#endif // Lexer_h
Commit	Line	Data
9dae56ea A	1	/*
9dae56ea A	2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
93a37866	3	* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
14957cd0	4	* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
9dae56ea A	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Library General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Library General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Library General Public License
	17	* along with this library; see the file COPYING.LIB. If not, write to
	18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	19	* Boston, MA 02110-1301, USA.
	20	*
	21	*/
	22
	23	#ifndef Lexer_h
	24	#define Lexer_h
	25
9dae56ea	26	#include "Lookup.h"
f9bf01c6	27	#include "ParserArena.h"
6fe7ccc8	28	#include "ParserTokens.h"
9dae56ea	29	#include "SourceCode.h"
ba379fdc A	30	#include <wtf/ASCIICType.h>
ba379fdc A	31	#include <wtf/SegmentedVector.h>
9dae56ea	32	#include <wtf/Vector.h>
ba379fdc	33	#include <wtf/unicode/Unicode.h>
9dae56ea A	34
	35	namespace JSC {
	36
6fe7ccc8 A	37	class Keywords {
	38	public:
	39	bool isKeyword(const Identifier& ident) const
ba379fdc	40	{
93a37866	41	return m_keywordTable.entry(m_vm, ident);
ba379fdc	42	}
6fe7ccc8 A	43
6fe7ccc8 A	44	const HashEntry* getKeyword(const Identifier& ident) const
ba379fdc	45	{
93a37866	46	return m_keywordTable.entry(m_vm, ident);
ba379fdc	47	}
6fe7ccc8 A	48
6fe7ccc8 A	49	~Keywords()
ba379fdc	50	{
6fe7ccc8	51	m_keywordTable.deleteTable();
ba379fdc	52	}
6fe7ccc8 A	53
6fe7ccc8 A	54	private:
93a37866	55	friend class VM;
6fe7ccc8	56
93a37866	57	Keywords(VM*);
6fe7ccc8	58
93a37866	59	VM* m_vm;
6fe7ccc8 A	60	const HashTable m_keywordTable;
	61	};
	62
	63	enum LexerFlags {
	64	LexerFlagsIgnoreReservedWords = 1,
	65	LexerFlagsDontBuildStrings = 2,
	66	LexexFlagsDontBuildKeywords = 4
	67	};
	68
	69	template <typename T>
	70	class Lexer {
	71	WTF_MAKE_NONCOPYABLE(Lexer);
	72	WTF_MAKE_FAST_ALLOCATED;
	73
	74	public:
93a37866	75	Lexer(VM*);
6fe7ccc8 A	76	~Lexer();
	77
	78	// Character manipulation functions.
	79	static bool isWhiteSpace(T character);
	80	static bool isLineTerminator(T character);
	81	static unsigned char convertHex(int c1, int c2);
	82	static UChar convertUnicode(int c1, int c2, int c3, int c4);
	83
	84	// Functions to set up parsing.
	85	void setCode(const SourceCode&, ParserArena*);
	86	void setIsReparsing() { m_isReparsing = true; }
	87	bool isReparsing() const { return m_isReparsing; }
	88
93a37866	89	JSTokenType lex(JSTokenData, JSTokenLocation, unsigned, bool strictMode);
6fe7ccc8 A	90	bool nextTokenIsColon();
6fe7ccc8 A	91	int lineNumber() const { return m_lineNumber; }
93a37866 A	92	ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
93a37866 A	93	ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
6fe7ccc8 A	94	void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
	95	int lastLineNumber() const { return m_lastLineNumber; }
	96	bool prevTerminator() const { return m_terminator; }
93a37866	97	SourceCode sourceCode(int openBrace, int closeBrace, int firstLine, unsigned startColumn);
6fe7ccc8 A	98	bool scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix = 0);
	99	bool skipRegExp();
	100
	101	// Functions for use after parsing.
	102	bool sawError() const { return m_error; }
93a37866	103	String getErrorMessage() const { return m_lexErrorMessage; }
6fe7ccc8	104	void clear();
93a37866	105	void setOffset(int offset, int lineStartOffset)
ba379fdc	106	{
6fe7ccc8	107	m_error = 0;
93a37866 A	108	m_lexErrorMessage = String();
	109
	110	m_code = sourcePtrFromOffset(offset);
	111	m_lineStart = sourcePtrFromOffset(lineStartOffset);
	112	ASSERT(currentOffset() >= currentLineStartOffset());
	113
6fe7ccc8 A	114	m_buffer8.resize(0);
	115	m_buffer16.resize(0);
	116	if (LIKELY(m_code < m_codeEnd))
	117	m_current = *m_code;
	118	else
	119	m_current = 0;
ba379fdc	120	}
6fe7ccc8	121	void setLineNumber(int line)
14957cd0	122	{
6fe7ccc8	123	m_lineNumber = line;
14957cd0	124	}
ba379fdc	125
6fe7ccc8 A	126	SourceProvider* sourceProvider() const { return m_source->provider(); }
6fe7ccc8 A	127
93a37866	128	JSTokenType lexExpectIdentifier(JSTokenData, JSTokenLocation, unsigned, bool strictMode);
6fe7ccc8 A	129
	130	private:
	131	void record8(int);
	132	void append8(const T*, size_t);
	133	void record16(int);
	134	void record16(T);
	135	void append16(const LChar*, size_t);
	136	void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
	137
	138	ALWAYS_INLINE void shift();
	139	ALWAYS_INLINE bool atEnd() const;
	140	ALWAYS_INLINE T peek(int offset) const;
93a37866 A	141	struct UnicodeHexValue {
	142
	143	enum ValueType { ValidHex, IncompleteHex, InvalidHex };
	144
	145	explicit UnicodeHexValue(int value)
	146	: m_value(value)
	147	{
	148	}
	149	explicit UnicodeHexValue(ValueType type)
	150	: m_value(type == IncompleteHex ? -2 : -1)
	151	{
	152	}
	153
	154	ValueType valueType() const
	155	{
	156	if (m_value >= 0)
	157	return ValidHex;
	158	return m_value == -2 ? IncompleteHex : InvalidHex;
	159	}
	160	bool isValid() const { return m_value >= 0; }
	161	int value() const
	162	{
	163	ASSERT(m_value >= 0);
	164	return m_value;
	165	}
	166
	167	private:
	168	int m_value;
	169	};
	170	UnicodeHexValue parseFourDigitUnicodeHex();
6fe7ccc8 A	171	void shiftLineTerminator();
6fe7ccc8 A	172
93a37866 A	173	ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
	174	ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
	175
	176	String invalidCharacterMessage() const;
	177	ALWAYS_INLINE const T* currentSourcePtr() const;
	178	ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
6fe7ccc8 A	179
	180	ALWAYS_INLINE void setCodeStart(const StringImpl*);
	181
	182	ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
	183	ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
93a37866 A	184	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
	185	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
	186	ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
6fe7ccc8 A	187	ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
	188
	189	ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
	190
	191	template <int shiftAmount> void internalShift();
	192	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
	193	template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
	194	template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
93a37866 A	195	enum StringParseResult {
	196	StringParsedSuccessfully,
	197	StringUnterminated,
	198	StringCannotBeParsed
	199	};
	200	template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
	201	template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
6fe7ccc8 A	202	ALWAYS_INLINE void parseHex(double& returnValue);
	203	ALWAYS_INLINE bool parseOctal(double& returnValue);
	204	ALWAYS_INLINE bool parseDecimal(double& returnValue);
	205	ALWAYS_INLINE void parseNumberAfterDecimalPoint();
	206	ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
	207	ALWAYS_INLINE bool parseMultilineComment();
	208
	209	static const size_t initialReadBufferCapacity = 32;
	210
	211	int m_lineNumber;
	212	int m_lastLineNumber;
	213
	214	Vector<LChar> m_buffer8;
	215	Vector<UChar> m_buffer16;
	216	bool m_terminator;
	217	int m_lastToken;
	218
	219	const SourceCode* m_source;
93a37866	220	unsigned m_sourceOffset;
6fe7ccc8 A	221	const T* m_code;
	222	const T* m_codeStart;
	223	const T* m_codeEnd;
93a37866 A	224	const T* m_codeStartPlusOffset;
93a37866 A	225	const T* m_lineStart;
6fe7ccc8 A	226	bool m_isReparsing;
	227	bool m_atLineStart;
	228	bool m_error;
93a37866	229	String m_lexErrorMessage;
6fe7ccc8 A	230
	231	T m_current;
	232
	233	IdentifierArena* m_arena;
	234
93a37866	235	VM* m_vm;
6fe7ccc8 A	236	};
	237
	238	template <>
	239	ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
	240	{
	241	return ch == ' ' \|\| ch == '\t' \|\| ch == 0xB \|\| ch == 0xC \|\| ch == 0xA0;
	242	}
	243
	244	template <>
	245	ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
	246	{
	247	return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (WTF::Unicode::isSeparatorSpace(ch) \|\| ch == 0xFEFF);
	248	}
	249
	250	template <>
	251	ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
	252	{
	253	return ch == '\r' \|\| ch == '\n';
	254	}
	255
	256	template <>
	257	ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
	258	{
	259	return ch == '\r' \|\| ch == '\n' \|\| (ch & ~1) == 0x2028;
	260	}
	261
	262	template <typename T>
	263	inline unsigned char Lexer<T>::convertHex(int c1, int c2)
	264	{
	265	return (toASCIIHexValue(c1) << 4) \| toASCIIHexValue(c2);
	266	}
	267
	268	template <typename T>
	269	inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
	270	{
	271	return (convertHex(c1, c2) << 8) \| convertHex(c3, c4);
	272	}
	273
	274	template <typename T>
	275	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
	276	{
93a37866	277	return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8 A	278	}
	279
	280	template <typename T>
	281	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
	282	{
93a37866 A	283	return &m_arena->makeIdentifier(m_vm, characters, length);
	284	}
	285
	286	template <>
	287	ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
	288	{
	289	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	290	}
	291
	292	template <>
	293	ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
	294	{
	295	if (!(orAllChars & ~0xff))
	296	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	297
	298	return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8 A	299	}
	300
	301	template <>
	302	ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
	303	{
	304	ASSERT(sourceString->is8Bit());
	305	m_codeStart = sourceString->characters8();
	306	}
	307
	308	template <>
	309	ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
	310	{
	311	ASSERT(!sourceString->is8Bit());
	312	m_codeStart = sourceString->characters16();
	313	}
	314
	315	template <typename T>
	316	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
	317	{
93a37866 A	318	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	319	}
	320
	321	template <typename T>
	322	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
	323	{
	324	return &m_arena->makeIdentifier(m_vm, characters, length);
	325	}
	326
	327	template <typename T>
	328	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
	329	{
	330	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
6fe7ccc8 A	331	}
	332
	333	template <typename T>
93a37866	334	ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSTokenData* tokenData, JSTokenLocation* tokenLocation, unsigned lexerFlags, bool strictMode)
6fe7ccc8 A	335	{
	336	ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
	337	const T* start = m_code;
	338	const T* ptr = start;
	339	const T* end = m_codeEnd;
	340	if (ptr >= end) {
	341	ASSERT(ptr == end);
	342	goto slowCase;
	343	}
	344	if (!WTF::isASCIIAlpha(*ptr))
	345	goto slowCase;
	346	++ptr;
	347	while (ptr < end) {
	348	if (!WTF::isASCIIAlphanumeric(*ptr))
	349	break;
14957cd0	350	++ptr;
f9bf01c6 A	351	}
f9bf01c6 A	352
6fe7ccc8 A	353	// Here's the shift
	354	if (ptr < end) {
	355	if ((!WTF::isASCII(ptr)) \|\| (ptr == '\\') \|\| (ptr == '_') \|\| (ptr == '$'))
	356	goto slowCase;
	357	m_current = *ptr;
	358	} else
	359	m_current = 0;
	360
	361	m_code = ptr;
93a37866	362	ASSERT(currentOffset() >= currentLineStartOffset());
6fe7ccc8 A	363
	364	// Create the identifier if needed
	365	if (lexerFlags & LexexFlagsDontBuildKeywords)
	366	tokenData->ident = 0;
	367	else
93a37866 A	368	tokenData->ident = makeLCharIdentifier(start, ptr - start);
	369	tokenLocation->line = m_lineNumber;
	370	tokenLocation->lineStartOffset = currentLineStartOffset();
	371	tokenLocation->startOffset = offsetFromSourcePtr(start);
	372	tokenLocation->endOffset = currentOffset();
	373	ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
6fe7ccc8 A	374	m_lastToken = IDENT;
	375	return IDENT;
	376
	377	slowCase:
93a37866	378	return lex(tokenData, tokenLocation, lexerFlags, strictMode);
6fe7ccc8 A	379	}
6fe7ccc8 A	380
9dae56ea A	381	} // namespace JSC
	382
	383	#endif // Lexer_h