]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
3 | * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. | |
4 | * | |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Library General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Library General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Library General Public License | |
16 | * along with this library; see the file COPYING.LIB. If not, write to | |
17 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
18 | * Boston, MA 02110-1301, USA. | |
19 | * | |
20 | */ | |
21 | ||
22 | #ifndef Lexer_h | |
23 | #define Lexer_h | |
24 | ||
25 | #include "Identifier.h" | |
26 | #include "Lookup.h" | |
27 | #include "SegmentedVector.h" | |
28 | #include "SourceCode.h" | |
29 | #include <wtf/Vector.h> | |
30 | ||
31 | namespace JSC { | |
32 | ||
33 | class RegExp; | |
34 | ||
35 | class Lexer : Noncopyable { | |
36 | public: | |
37 | void setCode(const SourceCode&); | |
38 | void setIsReparsing() { m_isReparsing = true; } | |
39 | int lex(void* lvalp, void* llocp); | |
40 | ||
41 | int lineNo() const { return yylineno; } | |
42 | ||
43 | bool prevTerminator() const { return m_terminator; } | |
44 | ||
45 | enum State { | |
46 | Start, | |
47 | IdentifierOrKeyword, | |
48 | Identifier, | |
49 | InIdentifierOrKeyword, | |
50 | InIdentifier, | |
51 | InIdentifierStartUnicodeEscapeStart, | |
52 | InIdentifierStartUnicodeEscape, | |
53 | InIdentifierPartUnicodeEscapeStart, | |
54 | InIdentifierPartUnicodeEscape, | |
55 | InSingleLineComment, | |
56 | InMultiLineComment, | |
57 | InNum, | |
58 | InNum0, | |
59 | InHex, | |
60 | InOctal, | |
61 | InDecimal, | |
62 | InExponentIndicator, | |
63 | InExponent, | |
64 | Hex, | |
65 | Octal, | |
66 | Number, | |
67 | String, | |
68 | Eof, | |
69 | InString, | |
70 | InEscapeSequence, | |
71 | InHexEscape, | |
72 | InUnicodeEscape, | |
73 | Other, | |
74 | Bad | |
75 | }; | |
76 | ||
77 | bool scanRegExp(); | |
78 | const UString& pattern() const { return m_pattern; } | |
79 | const UString& flags() const { return m_flags; } | |
80 | ||
81 | static unsigned char convertHex(int); | |
82 | static unsigned char convertHex(int c1, int c2); | |
83 | static UChar convertUnicode(int c1, int c2, int c3, int c4); | |
84 | static bool isIdentStart(int); | |
85 | static bool isIdentPart(int); | |
86 | static bool isHexDigit(int); | |
87 | ||
88 | bool sawError() const { return m_error; } | |
89 | ||
90 | void clear(); | |
91 | SourceCode sourceCode(int openBrace, int closeBrace, int firstLine) { return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); } | |
92 | ||
93 | private: | |
94 | friend class JSGlobalData; | |
95 | Lexer(JSGlobalData*); | |
96 | ~Lexer(); | |
97 | ||
98 | void setDone(State); | |
99 | void shift(unsigned int p); | |
100 | void nextLine(); | |
101 | int lookupKeyword(const char *); | |
102 | ||
103 | bool isWhiteSpace() const; | |
104 | bool isLineTerminator(); | |
105 | static bool isOctalDigit(int); | |
106 | ||
107 | ALWAYS_INLINE int matchPunctuator(int& charPos, int c1, int c2, int c3, int c4); | |
108 | static unsigned short singleEscape(unsigned short); | |
109 | static unsigned short convertOctal(int c1, int c2, int c3); | |
110 | ||
111 | void record8(int); | |
112 | void record16(int); | |
113 | void record16(UChar); | |
114 | ||
115 | JSC::Identifier* makeIdentifier(const Vector<UChar>& buffer) | |
116 | { | |
117 | m_identifiers.append(JSC::Identifier(m_globalData, buffer.data(), buffer.size())); | |
118 | return &m_identifiers.last(); | |
119 | } | |
120 | ||
121 | static const size_t initialReadBufferCapacity = 32; | |
122 | static const size_t initialIdentifierTableCapacity = 64; | |
123 | ||
124 | int yylineno; | |
125 | int yycolumn; | |
126 | ||
127 | bool m_done; | |
128 | Vector<char> m_buffer8; | |
129 | Vector<UChar> m_buffer16; | |
130 | bool m_terminator; | |
131 | bool m_restrKeyword; | |
132 | bool m_delimited; // encountered delimiter like "'" and "}" on last run | |
133 | bool m_skipLF; | |
134 | bool m_skipCR; | |
135 | bool m_eatNextIdentifier; | |
136 | int m_stackToken; | |
137 | int m_lastToken; | |
138 | ||
139 | State m_state; | |
140 | unsigned int m_position; | |
141 | const SourceCode* m_source; | |
142 | const UChar* m_code; | |
143 | unsigned int m_length; | |
144 | bool m_isReparsing; | |
145 | int m_atLineStart; | |
146 | bool m_error; | |
147 | ||
148 | // current and following unicode characters (int to allow for -1 for end-of-file marker) | |
149 | int m_current; | |
150 | int m_next1; | |
151 | int m_next2; | |
152 | int m_next3; | |
153 | ||
154 | int m_currentOffset; | |
155 | int m_nextOffset1; | |
156 | int m_nextOffset2; | |
157 | int m_nextOffset3; | |
158 | ||
159 | SegmentedVector<JSC::Identifier, initialIdentifierTableCapacity> m_identifiers; | |
160 | ||
161 | JSGlobalData* m_globalData; | |
162 | ||
163 | UString m_pattern; | |
164 | UString m_flags; | |
165 | ||
166 | const HashTable m_mainTable; | |
167 | }; | |
168 | ||
169 | } // namespace JSC | |
170 | ||
171 | #endif // Lexer_h |