]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.h
JavaScriptCore-1097.13.tar.gz
[apple/javascriptcore.git] / parser / Lexer.h
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
6fe7ccc8 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All rights reserved.
14957cd0 4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
9dae56ea
A
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef Lexer_h
24#define Lexer_h
25
9dae56ea 26#include "Lookup.h"
f9bf01c6 27#include "ParserArena.h"
6fe7ccc8 28#include "ParserTokens.h"
9dae56ea 29#include "SourceCode.h"
ba379fdc 30#include <wtf/ASCIICType.h>
14957cd0 31#include <wtf/AlwaysInline.h>
ba379fdc 32#include <wtf/SegmentedVector.h>
9dae56ea 33#include <wtf/Vector.h>
ba379fdc 34#include <wtf/unicode/Unicode.h>
9dae56ea
A
35
36namespace JSC {
37
6fe7ccc8
A
38class Keywords {
39public:
40 bool isKeyword(const Identifier& ident) const
ba379fdc 41 {
6fe7ccc8 42 return m_keywordTable.entry(m_globalData, ident);
ba379fdc 43 }
6fe7ccc8
A
44
45 const HashEntry* getKeyword(const Identifier& ident) const
ba379fdc 46 {
6fe7ccc8 47 return m_keywordTable.entry(m_globalData, ident);
ba379fdc 48 }
6fe7ccc8
A
49
50 ~Keywords()
ba379fdc 51 {
6fe7ccc8 52 m_keywordTable.deleteTable();
ba379fdc 53 }
6fe7ccc8
A
54
55private:
56 friend class JSGlobalData;
57
58 Keywords(JSGlobalData*);
59
60 JSGlobalData* m_globalData;
61 const HashTable m_keywordTable;
62};
63
64enum LexerFlags {
65 LexerFlagsIgnoreReservedWords = 1,
66 LexerFlagsDontBuildStrings = 2,
67 LexexFlagsDontBuildKeywords = 4
68};
69
70template <typename T>
71class Lexer {
72 WTF_MAKE_NONCOPYABLE(Lexer);
73 WTF_MAKE_FAST_ALLOCATED;
74
75public:
76 Lexer(JSGlobalData*);
77 ~Lexer();
78
79 // Character manipulation functions.
80 static bool isWhiteSpace(T character);
81 static bool isLineTerminator(T character);
82 static unsigned char convertHex(int c1, int c2);
83 static UChar convertUnicode(int c1, int c2, int c3, int c4);
84
85 // Functions to set up parsing.
86 void setCode(const SourceCode&, ParserArena*);
87 void setIsReparsing() { m_isReparsing = true; }
88 bool isReparsing() const { return m_isReparsing; }
89
90 JSTokenType lex(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
91 bool nextTokenIsColon();
92 int lineNumber() const { return m_lineNumber; }
93 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
94 int lastLineNumber() const { return m_lastLineNumber; }
95 bool prevTerminator() const { return m_terminator; }
96 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
97 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
98 bool skipRegExp();
99
100 // Functions for use after parsing.
101 bool sawError() const { return m_error; }
102 UString getErrorMessage() const { return m_lexErrorMessage; }
103 void clear();
104 void setOffset(int offset)
ba379fdc 105 {
6fe7ccc8
A
106 m_error = 0;
107 m_lexErrorMessage = UString();
108 m_code = m_codeStart + offset;
109 m_buffer8.resize(0);
110 m_buffer16.resize(0);
111 if (LIKELY(m_code < m_codeEnd))
112 m_current = *m_code;
113 else
114 m_current = 0;
ba379fdc 115 }
6fe7ccc8 116 void setLineNumber(int line)
14957cd0 117 {
6fe7ccc8 118 m_lineNumber = line;
14957cd0 119 }
ba379fdc 120
6fe7ccc8
A
121 SourceProvider* sourceProvider() const { return m_source->provider(); }
122
123 JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
124
125private:
126 void record8(int);
127 void append8(const T*, size_t);
128 void record16(int);
129 void record16(T);
130 void append16(const LChar*, size_t);
131 void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
132
133 ALWAYS_INLINE void shift();
134 ALWAYS_INLINE bool atEnd() const;
135 ALWAYS_INLINE T peek(int offset) const;
136 int parseFourDigitUnicodeHex();
137 void shiftLineTerminator();
138
139 UString invalidCharacterMessage() const;
140 ALWAYS_INLINE const T* currentCharacter() const;
141 ALWAYS_INLINE int currentOffset() const { return m_code - m_codeStart; }
142 ALWAYS_INLINE void setOffsetFromCharOffset(const T* charOffset) { setOffset(charOffset - m_codeStart); }
143
144 ALWAYS_INLINE void setCodeStart(const StringImpl*);
145
146 ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
147 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
148 ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
149
150 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
151
152 template <int shiftAmount> void internalShift();
153 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
154 template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
155 template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
156 template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData*, bool strictMode);
157 template <bool shouldBuildStrings> NEVER_INLINE bool parseStringSlowCase(JSTokenData*, bool strictMode);
158 ALWAYS_INLINE void parseHex(double& returnValue);
159 ALWAYS_INLINE bool parseOctal(double& returnValue);
160 ALWAYS_INLINE bool parseDecimal(double& returnValue);
161 ALWAYS_INLINE void parseNumberAfterDecimalPoint();
162 ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
163 ALWAYS_INLINE bool parseMultilineComment();
164
165 static const size_t initialReadBufferCapacity = 32;
166
167 int m_lineNumber;
168 int m_lastLineNumber;
169
170 Vector<LChar> m_buffer8;
171 Vector<UChar> m_buffer16;
172 bool m_terminator;
173 int m_lastToken;
174
175 const SourceCode* m_source;
176 const T* m_code;
177 const T* m_codeStart;
178 const T* m_codeEnd;
179 bool m_isReparsing;
180 bool m_atLineStart;
181 bool m_error;
182 UString m_lexErrorMessage;
183
184 T m_current;
185
186 IdentifierArena* m_arena;
187
188 JSGlobalData* m_globalData;
189};
190
191template <>
192ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
193{
194 return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
195}
196
197template <>
198ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
199{
200 return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
201}
202
203template <>
204ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
205{
206 return ch == '\r' || ch == '\n';
207}
208
209template <>
210ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
211{
212 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
213}
214
215template <typename T>
216inline unsigned char Lexer<T>::convertHex(int c1, int c2)
217{
218 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
219}
220
221template <typename T>
222inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
223{
224 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
225}
226
227template <typename T>
228ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
229{
230 return &m_arena->makeIdentifier(m_globalData, characters, length);
231}
232
233template <typename T>
234ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
235{
236 return &m_arena->makeIdentifier(m_globalData, characters, length);
237}
238
239template <>
240ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
241{
242 ASSERT(sourceString->is8Bit());
243 m_codeStart = sourceString->characters8();
244}
245
246template <>
247ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
248{
249 ASSERT(!sourceString->is8Bit());
250 m_codeStart = sourceString->characters16();
251}
252
253template <typename T>
254ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
255{
256 return &m_arena->makeIdentifierLCharFromUChar(m_globalData, characters, length);
257}
258
259template <typename T>
260ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
261{
262 ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
263 const T* start = m_code;
264 const T* ptr = start;
265 const T* end = m_codeEnd;
266 if (ptr >= end) {
267 ASSERT(ptr == end);
268 goto slowCase;
269 }
270 if (!WTF::isASCIIAlpha(*ptr))
271 goto slowCase;
272 ++ptr;
273 while (ptr < end) {
274 if (!WTF::isASCIIAlphanumeric(*ptr))
275 break;
14957cd0 276 ++ptr;
f9bf01c6
A
277 }
278
6fe7ccc8
A
279 // Here's the shift
280 if (ptr < end) {
281 if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
282 goto slowCase;
283 m_current = *ptr;
284 } else
285 m_current = 0;
286
287 m_code = ptr;
288
289 // Create the identifier if needed
290 if (lexerFlags & LexexFlagsDontBuildKeywords)
291 tokenData->ident = 0;
292 else
293 tokenData->ident = makeIdentifier(start, ptr - start);
294 tokenInfo->line = m_lineNumber;
295 tokenInfo->startOffset = start - m_codeStart;
296 tokenInfo->endOffset = currentOffset();
297 m_lastToken = IDENT;
298 return IDENT;
299
300slowCase:
301 return lex(tokenData, tokenInfo, lexerFlags, strictMode);
302}
303
9dae56ea
A
304} // namespace JSC
305
306#endif // Lexer_h