/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
#include "Lookup.h"
#include "ParserArena.h"
+#include "ParserTokens.h"
#include "SourceCode.h"
#include <wtf/ASCIICType.h>
#include <wtf/SegmentedVector.h>
#include <wtf/Vector.h>
-#include <wtf/unicode/Unicode.h>
namespace JSC {
- class RegExp;
+class Keywords {
+public:
+ bool isKeyword(const Identifier& ident) const
+ {
+ return m_keywordTable.entry(m_vm, ident);
+ }
+
+ const HashTableValue* getKeyword(const Identifier& ident) const
+ {
+ return m_keywordTable.entry(m_vm, ident);
+ }
+
+ ~Keywords()
+ {
+ m_keywordTable.deleteTable();
+ }
+
+private:
+ friend class VM;
+
+ explicit Keywords(VM&);
+
+ VM& m_vm;
+ const HashTable m_keywordTable;
+};
- class Lexer : public Noncopyable {
- public:
- // Character manipulation functions.
- static bool isWhiteSpace(int character);
- static bool isLineTerminator(int character);
- static unsigned char convertHex(int c1, int c2);
- static UChar convertUnicode(int c1, int c2, int c3, int c4);
+enum LexerFlags {
+ LexerFlagsIgnoreReservedWords = 1,
+ LexerFlagsDontBuildStrings = 2,
+ LexexFlagsDontBuildKeywords = 4
+};
- // Functions to set up parsing.
- void setCode(const SourceCode&, ParserArena&);
- void setIsReparsing() { m_isReparsing = true; }
+template <typename T>
+class Lexer {
+ WTF_MAKE_NONCOPYABLE(Lexer);
+ WTF_MAKE_FAST_ALLOCATED;
- // Functions for the parser itself.
- int lex(void* lvalp, void* llocp);
- int lineNumber() const { return m_lineNumber; }
- bool prevTerminator() const { return m_terminator; }
- SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
- bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
- bool skipRegExp();
+public:
+ Lexer(VM*, JSParserStrictness);
+ ~Lexer();
- // Functions for use after parsing.
- bool sawError() const { return m_error; }
- void clear();
+ // Character manipulation functions.
+ static bool isWhiteSpace(T character);
+ static bool isLineTerminator(T character);
+ static unsigned char convertHex(int c1, int c2);
+ static UChar convertUnicode(int c1, int c2, int c3, int c4);
- private:
- friend class JSGlobalData;
+ // Functions to set up parsing.
+ void setCode(const SourceCode&, ParserArena*);
+ void setIsReparsing() { m_isReparsing = true; }
+ bool isReparsing() const { return m_isReparsing; }
- Lexer(JSGlobalData*);
- ~Lexer();
+ JSTokenType lex(JSToken*, unsigned, bool strictMode);
+ bool nextTokenIsColon();
+ int lineNumber() const { return m_lineNumber; }
+ ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
+ ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
+ ALWAYS_INLINE JSTextPosition currentPosition() const
+ {
+ return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset());
+ }
+ JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; }
+ void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
+ int lastLineNumber() const { return m_lastLineNumber; }
+ bool prevTerminator() const { return m_terminator; }
+ bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
+ bool skipRegExp();
- void shift1();
- void shift2();
- void shift3();
- void shift4();
- void shiftLineTerminator();
+ // Functions for use after parsing.
+ bool sawError() const { return m_error; }
+ String getErrorMessage() const { return m_lexErrorMessage; }
+ void clear();
+ void setOffset(int offset, int lineStartOffset)
+ {
+ m_error = 0;
+ m_lexErrorMessage = String();
- void record8(int);
- void record16(int);
- void record16(UChar);
+ m_code = sourcePtrFromOffset(offset);
+ m_lineStart = sourcePtrFromOffset(lineStartOffset);
+ ASSERT(currentOffset() >= currentLineStartOffset());
- void copyCodeWithoutBOMs();
+ m_buffer8.resize(0);
+ m_buffer16.resize(0);
+ if (LIKELY(m_code < m_codeEnd))
+ m_current = *m_code;
+ else
+ m_current = 0;
+ }
+ void setLineNumber(int line)
+ {
+ m_lineNumber = line;
+ }
- int currentOffset() const;
- const UChar* currentCharacter() const;
+ SourceProvider* sourceProvider() const { return m_source->provider(); }
- const Identifier* makeIdentifier(const UChar* characters, size_t length);
+ JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode);
- bool lastTokenWasRestrKeyword() const;
+private:
+ void record8(int);
+ void append8(const T*, size_t);
+ void record16(int);
+ void record16(T);
+ void append16(const LChar*, size_t);
+ void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
- static const size_t initialReadBufferCapacity = 32;
+ ALWAYS_INLINE void shift();
+ ALWAYS_INLINE bool atEnd() const;
+ ALWAYS_INLINE T peek(int offset) const;
+ struct UnicodeHexValue {
+
+ enum ValueType { ValidHex, IncompleteHex, InvalidHex };
+
+ explicit UnicodeHexValue(int value)
+ : m_value(value)
+ {
+ }
+ explicit UnicodeHexValue(ValueType type)
+ : m_value(type == IncompleteHex ? -2 : -1)
+ {
+ }
- int m_lineNumber;
+ ValueType valueType() const
+ {
+ if (m_value >= 0)
+ return ValidHex;
+ return m_value == -2 ? IncompleteHex : InvalidHex;
+ }
+ bool isValid() const { return m_value >= 0; }
+ int value() const
+ {
+ ASSERT(m_value >= 0);
+ return m_value;
+ }
+
+ private:
+ int m_value;
+ };
+ UnicodeHexValue parseFourDigitUnicodeHex();
+ void shiftLineTerminator();
- Vector<char> m_buffer8;
- Vector<UChar> m_buffer16;
- bool m_terminator;
- bool m_delimited; // encountered delimiter like "'" and "}" on last run
- int m_lastToken;
+ ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
+ ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
- const SourceCode* m_source;
- const UChar* m_code;
- const UChar* m_codeStart;
- const UChar* m_codeEnd;
- bool m_isReparsing;
- bool m_atLineStart;
- bool m_error;
+ String invalidCharacterMessage() const;
+ ALWAYS_INLINE const T* currentSourcePtr() const;
+ ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
- // current and following unicode characters (int to allow for -1 for end-of-file marker)
- int m_current;
- int m_next1;
- int m_next2;
- int m_next3;
-
- IdentifierArena* m_arena;
+ ALWAYS_INLINE void setCodeStart(const StringImpl*);
- JSGlobalData* m_globalData;
+ ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
+ ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
+ ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
+ ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
+ ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
+ ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
- const HashTable m_keywordTable;
+ ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
- Vector<UChar> m_codeWithoutBOMs;
+ template <int shiftAmount> void internalShift();
+ template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
+ template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
+ template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
+ enum StringParseResult {
+ StringParsedSuccessfully,
+ StringUnterminated,
+ StringCannotBeParsed
};
+ template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
+ template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
+ ALWAYS_INLINE void parseHex(double& returnValue);
+ ALWAYS_INLINE bool parseOctal(double& returnValue);
+ ALWAYS_INLINE bool parseDecimal(double& returnValue);
+ ALWAYS_INLINE void parseNumberAfterDecimalPoint();
+ ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
+ ALWAYS_INLINE bool parseMultilineComment();
- inline bool Lexer::isWhiteSpace(int ch)
- {
- return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
- }
+ static const size_t initialReadBufferCapacity = 32;
- inline bool Lexer::isLineTerminator(int ch)
- {
- return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
- }
+ int m_lineNumber;
+ int m_lastLineNumber;
- inline unsigned char Lexer::convertHex(int c1, int c2)
- {
- return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
- }
+ Vector<LChar> m_buffer8;
+ Vector<UChar> m_buffer16;
+ bool m_terminator;
+ int m_lastToken;
- inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
- {
- return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
+ const SourceCode* m_source;
+ unsigned m_sourceOffset;
+ const T* m_code;
+ const T* m_codeStart;
+ const T* m_codeEnd;
+ const T* m_codeStartPlusOffset;
+ const T* m_lineStart;
+ JSTextPosition m_positionBeforeLastNewline;
+ bool m_isReparsing;
+ bool m_atLineStart;
+ bool m_error;
+ String m_lexErrorMessage;
+
+ T m_current;
+
+ IdentifierArena* m_arena;
+
+ VM* m_vm;
+ bool m_parsingBuiltinFunction;
+};
+
+template <>
+ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
+{
+ return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
+}
+
+template <>
+ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
+{
+ // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
+ return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0x180E || ch == 0xFEFF);
+}
+
+template <>
+ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
+{
+ return ch == '\r' || ch == '\n';
+}
+
+template <>
+ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
+{
+ return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
+}
+
+template <typename T>
+inline unsigned char Lexer<T>::convertHex(int c1, int c2)
+{
+ return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
+}
+
+template <typename T>
+inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
+{
+ return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
+}
+
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
+{
+ return &m_arena->makeIdentifier(m_vm, characters, length);
+}
+
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
+{
+ return &m_arena->makeIdentifier(m_vm, characters, length);
+}
+
+template <>
+ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
+{
+ return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
+}
+
+template <>
+ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
+{
+ if (!(orAllChars & ~0xff))
+ return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
+
+ return &m_arena->makeIdentifier(m_vm, characters, length);
+}
+
+template <>
+ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
+{
+ ASSERT(sourceString->is8Bit());
+ m_codeStart = sourceString->characters8();
+}
+
+template <>
+ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
+{
+ ASSERT(!sourceString->is8Bit());
+ m_codeStart = sourceString->characters16();
+}
+
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
+{
+ return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
+}
+
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
+{
+ return &m_arena->makeIdentifier(m_vm, characters, length);
+}
+
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
+{
+ return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
+}
+
+#if ASSERT_DISABLED
+ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; }
+#else
+bool isSafeBuiltinIdentifier(VM&, const Identifier*);
+#endif
+
+template <typename T>
+ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
+{
+ JSTokenData* tokenData = &tokenRecord->m_data;
+ JSTokenLocation* tokenLocation = &tokenRecord->m_location;
+ ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
+ const T* start = m_code;
+ const T* ptr = start;
+ const T* end = m_codeEnd;
+ JSTextPosition startPosition = currentPosition();
+ if (ptr >= end) {
+ ASSERT(ptr == end);
+ goto slowCase;
+ }
+ if (!WTF::isASCIIAlpha(*ptr))
+ goto slowCase;
+ ++ptr;
+ while (ptr < end) {
+ if (!WTF::isASCIIAlphanumeric(*ptr))
+ break;
+ ++ptr;
}
- // A bridge for yacc from the C world to the C++ world.
- inline int jscyylex(void* lvalp, void* llocp, void* globalData)
- {
- return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
+ // Here's the shift
+ if (ptr < end) {
+ if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
+ goto slowCase;
+ m_current = *ptr;
+ } else
+ m_current = 0;
+
+ m_code = ptr;
+ ASSERT(currentOffset() >= currentLineStartOffset());
+
+ // Create the identifier if needed
+ if (lexerFlags & LexexFlagsDontBuildKeywords
+#if !ASSERT_DISABLED
+ && !m_parsingBuiltinFunction
+#endif
+ )
+ tokenData->ident = 0;
+ else
+ tokenData->ident = makeLCharIdentifier(start, ptr - start);
+
+ tokenLocation->line = m_lineNumber;
+ tokenLocation->lineStartOffset = currentLineStartOffset();
+ tokenLocation->startOffset = offsetFromSourcePtr(start);
+ tokenLocation->endOffset = currentOffset();
+ ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
+ tokenRecord->m_startPosition = startPosition;
+ tokenRecord->m_endPosition = currentPosition();
+#if !ASSERT_DISABLED
+ if (m_parsingBuiltinFunction) {
+ if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident))
+ return ERRORTOK;
}
+#endif
+
+ m_lastToken = IDENT;
+ return IDENT;
+
+slowCase:
+ return lex(tokenRecord, lexerFlags, strictMode);
+}
} // namespace JSC