]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
ba379fdc | 3 | * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
9dae56ea A |
4 | * |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Library General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Library General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Library General Public License | |
16 | * along with this library; see the file COPYING.LIB. If not, write to | |
17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
18 | * Boston, MA 02110-1301, USA. | |
19 | * | |
20 | */ | |
21 | ||
22 | #ifndef Lexer_h | |
23 | #define Lexer_h | |
24 | ||
9dae56ea | 25 | #include "Lookup.h" |
f9bf01c6 | 26 | #include "ParserArena.h" |
9dae56ea | 27 | #include "SourceCode.h" |
ba379fdc A |
28 | #include <wtf/ASCIICType.h> |
29 | #include <wtf/SegmentedVector.h> | |
9dae56ea | 30 | #include <wtf/Vector.h> |
ba379fdc | 31 | #include <wtf/unicode/Unicode.h> |
9dae56ea A |
32 | |
33 | namespace JSC { | |
34 | ||
35 | class RegExp; | |
36 | ||
f9bf01c6 | 37 | class Lexer : public Noncopyable { |
9dae56ea | 38 | public: |
ba379fdc A |
39 | // Character manipulation functions. |
40 | static bool isWhiteSpace(int character); | |
41 | static bool isLineTerminator(int character); | |
42 | static unsigned char convertHex(int c1, int c2); | |
43 | static UChar convertUnicode(int c1, int c2, int c3, int c4); | |
44 | ||
45 | // Functions to set up parsing. | |
f9bf01c6 | 46 | void setCode(const SourceCode&, ParserArena&); |
9dae56ea | 47 | void setIsReparsing() { m_isReparsing = true; } |
9dae56ea | 48 | |
ba379fdc A |
49 | // Functions for the parser itself. |
50 | int lex(void* lvalp, void* llocp); | |
51 | int lineNumber() const { return m_lineNumber; } | |
9dae56ea | 52 | bool prevTerminator() const { return m_terminator; } |
ba379fdc | 53 | SourceCode sourceCode(int openBrace, int closeBrace, int firstLine); |
f9bf01c6 A |
54 | bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); |
55 | bool skipRegExp(); | |
9dae56ea | 56 | |
ba379fdc | 57 | // Functions for use after parsing. |
9dae56ea | 58 | bool sawError() const { return m_error; } |
9dae56ea | 59 | void clear(); |
9dae56ea A |
60 | |
61 | private: | |
62 | friend class JSGlobalData; | |
ba379fdc | 63 | |
9dae56ea A |
64 | Lexer(JSGlobalData*); |
65 | ~Lexer(); | |
66 | ||
ba379fdc A |
67 | void shift1(); |
68 | void shift2(); | |
69 | void shift3(); | |
70 | void shift4(); | |
71 | void shiftLineTerminator(); | |
9dae56ea A |
72 | |
73 | void record8(int); | |
74 | void record16(int); | |
75 | void record16(UChar); | |
76 | ||
ba379fdc A |
77 | void copyCodeWithoutBOMs(); |
78 | ||
79 | int currentOffset() const; | |
80 | const UChar* currentCharacter() const; | |
81 | ||
f9bf01c6 | 82 | const Identifier* makeIdentifier(const UChar* characters, size_t length); |
ba379fdc A |
83 | |
84 | bool lastTokenWasRestrKeyword() const; | |
9dae56ea A |
85 | |
86 | static const size_t initialReadBufferCapacity = 32; | |
9dae56ea | 87 | |
ba379fdc | 88 | int m_lineNumber; |
9dae56ea | 89 | |
9dae56ea A |
90 | Vector<char> m_buffer8; |
91 | Vector<UChar> m_buffer16; | |
92 | bool m_terminator; | |
9dae56ea | 93 | bool m_delimited; // encountered delimiter like "'" and "}" on last run |
9dae56ea A |
94 | int m_lastToken; |
95 | ||
9dae56ea A |
96 | const SourceCode* m_source; |
97 | const UChar* m_code; | |
ba379fdc A |
98 | const UChar* m_codeStart; |
99 | const UChar* m_codeEnd; | |
9dae56ea | 100 | bool m_isReparsing; |
ba379fdc | 101 | bool m_atLineStart; |
9dae56ea A |
102 | bool m_error; |
103 | ||
104 | // current and following unicode characters (int to allow for -1 for end-of-file marker) | |
105 | int m_current; | |
106 | int m_next1; | |
107 | int m_next2; | |
108 | int m_next3; | |
109 | ||
f9bf01c6 | 110 | IdentifierArena* m_arena; |
9dae56ea A |
111 | |
112 | JSGlobalData* m_globalData; | |
113 | ||
ba379fdc A |
114 | const HashTable m_keywordTable; |
115 | ||
116 | Vector<UChar> m_codeWithoutBOMs; | |
9dae56ea A |
117 | }; |
118 | ||
ba379fdc A |
119 | inline bool Lexer::isWhiteSpace(int ch) |
120 | { | |
121 | return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch); | |
122 | } | |
123 | ||
124 | inline bool Lexer::isLineTerminator(int ch) | |
125 | { | |
126 | return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028; | |
127 | } | |
128 | ||
129 | inline unsigned char Lexer::convertHex(int c1, int c2) | |
130 | { | |
131 | return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2); | |
132 | } | |
133 | ||
134 | inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) | |
135 | { | |
136 | return (convertHex(c1, c2) << 8) | convertHex(c3, c4); | |
137 | } | |
138 | ||
f9bf01c6 A |
139 | // A bridge for yacc from the C world to the C++ world. |
140 | inline int jscyylex(void* lvalp, void* llocp, void* globalData) | |
141 | { | |
142 | return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp); | |
143 | } | |
144 | ||
9dae56ea A |
145 | } // namespace JSC |
146 | ||
147 | #endif // Lexer_h |