]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.h
46501aa882be672ffe34b680ec4cd19fb25973ed
[apple/javascriptcore.git] / parser / Lexer.h
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef Lexer_h
24 #define Lexer_h
25
26 #include "JSParser.h"
27 #include "Lookup.h"
28 #include "ParserArena.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/AlwaysInline.h>
32 #include <wtf/SegmentedVector.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
35
36 namespace JSC {
37
38 class RegExp;
39
40 class Lexer {
41 WTF_MAKE_NONCOPYABLE(Lexer); WTF_MAKE_FAST_ALLOCATED;
42 public:
43 // Character manipulation functions.
44 static bool isWhiteSpace(int character);
45 static bool isLineTerminator(int character);
46 static unsigned char convertHex(int c1, int c2);
47 static UChar convertUnicode(int c1, int c2, int c3, int c4);
48
49 // Functions to set up parsing.
50 void setCode(const SourceCode&, ParserArena&);
51 void setIsReparsing() { m_isReparsing = true; }
52 bool isReparsing() const { return m_isReparsing; }
53
54 // Functions for the parser itself.
55 enum LexType {
56 IgnoreReservedWords = 1,
57 DontBuildStrings = 2,
58 DontBuildKeywords = 4
59 };
60 JSTokenType lex(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
61 bool nextTokenIsColon();
62 int lineNumber() const { return m_lineNumber; }
63 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
64 int lastLineNumber() const { return m_lastLineNumber; }
65 bool prevTerminator() const { return m_terminator; }
66 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
67 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
68 bool skipRegExp();
69
70 // Functions for use after parsing.
71 bool sawError() const { return m_error; }
72 void clear();
73 int currentOffset() { return m_code - m_codeStart; }
74 void setOffset(int offset)
75 {
76 m_error = 0;
77 m_code = m_codeStart + offset;
78 m_buffer8.resize(0);
79 m_buffer16.resize(0);
80 // Faster than an if-else sequence
81 m_current = -1;
82 if (LIKELY(m_code < m_codeEnd))
83 m_current = *m_code;
84 }
85 void setLineNumber(int line)
86 {
87 m_lineNumber = line;
88 }
89
90 SourceProvider* sourceProvider() const { return m_source->provider(); }
91
92 JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
93
94 bool isKeyword(const Identifier&);
95
96 private:
97 friend class JSGlobalData;
98
99 Lexer(JSGlobalData*);
100 ~Lexer();
101
102 void record8(int);
103 void record16(int);
104 void record16(UChar);
105
106 void copyCodeWithoutBOMs();
107
108 ALWAYS_INLINE void shift();
109 ALWAYS_INLINE int peek(int offset);
110 int getUnicodeCharacter();
111 void shiftLineTerminator();
112
113 ALWAYS_INLINE const UChar* currentCharacter() const;
114 ALWAYS_INLINE int currentOffset() const;
115
116 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
117
118 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
119
120 enum ShiftType { DoBoundsCheck, DoNotBoundsCheck };
121 template <int shiftAmount, ShiftType shouldBoundsCheck> void internalShift();
122 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
123 template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned);
124 template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData*, bool strictMode);
125 ALWAYS_INLINE void parseHex(double& returnValue);
126 ALWAYS_INLINE bool parseOctal(double& returnValue);
127 ALWAYS_INLINE bool parseDecimal(double& returnValue);
128 ALWAYS_INLINE void parseNumberAfterDecimalPoint();
129 ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
130 ALWAYS_INLINE bool parseMultilineComment();
131
132 static const size_t initialReadBufferCapacity = 32;
133
134 int m_lineNumber;
135 int m_lastLineNumber;
136
137 Vector<char> m_buffer8;
138 Vector<UChar> m_buffer16;
139 bool m_terminator;
140 bool m_delimited; // encountered delimiter like "'" and "}" on last run
141 int m_lastToken;
142
143 const SourceCode* m_source;
144 const UChar* m_code;
145 const UChar* m_codeStart;
146 const UChar* m_codeEnd;
147 bool m_isReparsing;
148 bool m_atLineStart;
149 bool m_error;
150
151 // current and following unicode characters (int to allow for -1 for end-of-file marker)
152 int m_current;
153
154 IdentifierArena* m_arena;
155
156 JSGlobalData* m_globalData;
157
158 const HashTable m_keywordTable;
159 };
160
161 ALWAYS_INLINE bool Lexer::isWhiteSpace(int ch)
162 {
163 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
164 }
165
166 ALWAYS_INLINE bool Lexer::isLineTerminator(int ch)
167 {
168 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
169 }
170
171 inline unsigned char Lexer::convertHex(int c1, int c2)
172 {
173 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
174 }
175
176 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
177 {
178 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
179 }
180
181 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
182 {
183 return &m_arena->makeIdentifier(m_globalData, characters, length);
184 }
185
186 ALWAYS_INLINE JSTokenType Lexer::lexExpectIdentifier(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexType, bool strictMode)
187 {
188 ASSERT((lexType & IgnoreReservedWords));
189 const UChar* start = m_code;
190 const UChar* ptr = start;
191 const UChar* end = m_codeEnd;
192 if (ptr >= end) {
193 ASSERT(ptr == end);
194 goto slowCase;
195 }
196 if (!WTF::isASCIIAlpha(*ptr))
197 goto slowCase;
198 ++ptr;
199 while (ptr < end) {
200 if (!WTF::isASCIIAlphanumeric(*ptr))
201 break;
202 ++ptr;
203 }
204
205 // Here's the shift
206 if (ptr < end) {
207 if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
208 goto slowCase;
209 m_current = *ptr;
210 } else
211 m_current = -1;
212
213 m_code = ptr;
214
215 // Create the identifier if needed
216 if (lexType & DontBuildKeywords)
217 tokenData->ident = 0;
218 else
219 tokenData->ident = makeIdentifier(start, ptr - start);
220 tokenInfo->line = m_lineNumber;
221 tokenInfo->startOffset = start - m_codeStart;
222 tokenInfo->endOffset = currentOffset();
223 m_lastToken = IDENT;
224 return IDENT;
225
226 slowCase:
227 return lex(tokenData, tokenInfo, lexType, strictMode);
228 }
229
230 } // namespace JSC
231
232 #endif // Lexer_h