/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
#ifndef Lexer_h
#define Lexer_h
-#include "Identifier.h"
#include "Lookup.h"
-#include "SegmentedVector.h"
+#include "ParserArena.h"
#include "SourceCode.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/SegmentedVector.h>
#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
namespace JSC {
class RegExp;
- class Lexer : Noncopyable {
+ class Lexer : public Noncopyable {
public:
- void setCode(const SourceCode&);
- void setIsReparsing() { m_isReparsing = true; }
- int lex(void* lvalp, void* llocp);
+ // Character manipulation functions.
+ static bool isWhiteSpace(int character);
+ static bool isLineTerminator(int character);
+ static unsigned char convertHex(int c1, int c2);
+ static UChar convertUnicode(int c1, int c2, int c3, int c4);
- int lineNo() const { return yylineno; }
+ // Functions to set up parsing.
+ void setCode(const SourceCode&, ParserArena&);
+ void setIsReparsing() { m_isReparsing = true; }
+ // Functions for the parser itself.
+ int lex(void* lvalp, void* llocp);
+ int lineNumber() const { return m_lineNumber; }
bool prevTerminator() const { return m_terminator; }
+ SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
+ bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
+ bool skipRegExp();
- enum State {
- Start,
- IdentifierOrKeyword,
- Identifier,
- InIdentifierOrKeyword,
- InIdentifier,
- InIdentifierStartUnicodeEscapeStart,
- InIdentifierStartUnicodeEscape,
- InIdentifierPartUnicodeEscapeStart,
- InIdentifierPartUnicodeEscape,
- InSingleLineComment,
- InMultiLineComment,
- InNum,
- InNum0,
- InHex,
- InOctal,
- InDecimal,
- InExponentIndicator,
- InExponent,
- Hex,
- Octal,
- Number,
- String,
- Eof,
- InString,
- InEscapeSequence,
- InHexEscape,
- InUnicodeEscape,
- Other,
- Bad
- };
-
- bool scanRegExp();
- const UString& pattern() const { return m_pattern; }
- const UString& flags() const { return m_flags; }
-
- static unsigned char convertHex(int);
- static unsigned char convertHex(int c1, int c2);
- static UChar convertUnicode(int c1, int c2, int c3, int c4);
- static bool isIdentStart(int);
- static bool isIdentPart(int);
- static bool isHexDigit(int);
-
+ // Functions for use after parsing.
bool sawError() const { return m_error; }
-
void clear();
- SourceCode sourceCode(int openBrace, int closeBrace, int firstLine) { return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); }
private:
friend class JSGlobalData;
+
Lexer(JSGlobalData*);
~Lexer();
- void setDone(State);
- void shift(unsigned int p);
- void nextLine();
- int lookupKeyword(const char *);
-
- bool isWhiteSpace() const;
- bool isLineTerminator();
- static bool isOctalDigit(int);
-
- ALWAYS_INLINE int matchPunctuator(int& charPos, int c1, int c2, int c3, int c4);
- static unsigned short singleEscape(unsigned short);
- static unsigned short convertOctal(int c1, int c2, int c3);
+ void shift1();
+ void shift2();
+ void shift3();
+ void shift4();
+ void shiftLineTerminator();
void record8(int);
void record16(int);
void record16(UChar);
- JSC::Identifier* makeIdentifier(const Vector<UChar>& buffer)
- {
- m_identifiers.append(JSC::Identifier(m_globalData, buffer.data(), buffer.size()));
- return &m_identifiers.last();
- }
+ void copyCodeWithoutBOMs();
+
+ int currentOffset() const;
+ const UChar* currentCharacter() const;
+
+ const Identifier* makeIdentifier(const UChar* characters, size_t length);
+
+ bool lastTokenWasRestrKeyword() const;
static const size_t initialReadBufferCapacity = 32;
- static const size_t initialIdentifierTableCapacity = 64;
- int yylineno;
- int yycolumn;
+ int m_lineNumber;
- bool m_done;
Vector<char> m_buffer8;
Vector<UChar> m_buffer16;
bool m_terminator;
- bool m_restrKeyword;
bool m_delimited; // encountered delimiter like "'" and "}" on last run
- bool m_skipLF;
- bool m_skipCR;
- bool m_eatNextIdentifier;
- int m_stackToken;
int m_lastToken;
- State m_state;
- unsigned int m_position;
const SourceCode* m_source;
const UChar* m_code;
- unsigned int m_length;
+ const UChar* m_codeStart;
+ const UChar* m_codeEnd;
bool m_isReparsing;
- int m_atLineStart;
+ bool m_atLineStart;
bool m_error;
// current and following unicode characters (int to allow for -1 for end-of-file marker)
int m_next2;
int m_next3;
- int m_currentOffset;
- int m_nextOffset1;
- int m_nextOffset2;
- int m_nextOffset3;
-
- SegmentedVector<JSC::Identifier, initialIdentifierTableCapacity> m_identifiers;
+ IdentifierArena* m_arena;
JSGlobalData* m_globalData;
- UString m_pattern;
- UString m_flags;
+ const HashTable m_keywordTable;
- const HashTable m_mainTable;
+ Vector<UChar> m_codeWithoutBOMs;
};
+ inline bool Lexer::isWhiteSpace(int ch)
+ {
+ return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
+ }
+
+ inline bool Lexer::isLineTerminator(int ch)
+ {
+ return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
+ }
+
+ inline unsigned char Lexer::convertHex(int c1, int c2)
+ {
+ return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
+ }
+
+ inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
+ {
+ return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
+ }
+
+ // A bridge for yacc from the C world to the C++ world.
+ inline int jscyylex(void* lvalp, void* llocp, void* globalData)
+ {
+ return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
+ }
+
} // namespace JSC
#endif // Lexer_h