]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.h
JavaScriptCore-1218.33.tar.gz
[apple/javascriptcore.git] / parser / Lexer.h
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
93a37866 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
14957cd0 4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
9dae56ea
A
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef Lexer_h
24#define Lexer_h
25
9dae56ea 26#include "Lookup.h"
f9bf01c6 27#include "ParserArena.h"
6fe7ccc8 28#include "ParserTokens.h"
9dae56ea 29#include "SourceCode.h"
ba379fdc
A
30#include <wtf/ASCIICType.h>
31#include <wtf/SegmentedVector.h>
9dae56ea 32#include <wtf/Vector.h>
ba379fdc 33#include <wtf/unicode/Unicode.h>
9dae56ea
A
34
35namespace JSC {
36
6fe7ccc8
A
37class Keywords {
38public:
39 bool isKeyword(const Identifier& ident) const
ba379fdc 40 {
93a37866 41 return m_keywordTable.entry(m_vm, ident);
ba379fdc 42 }
6fe7ccc8
A
43
44 const HashEntry* getKeyword(const Identifier& ident) const
ba379fdc 45 {
93a37866 46 return m_keywordTable.entry(m_vm, ident);
ba379fdc 47 }
6fe7ccc8
A
48
49 ~Keywords()
ba379fdc 50 {
6fe7ccc8 51 m_keywordTable.deleteTable();
ba379fdc 52 }
6fe7ccc8
A
53
54private:
93a37866 55 friend class VM;
6fe7ccc8 56
93a37866 57 Keywords(VM*);
6fe7ccc8 58
93a37866 59 VM* m_vm;
6fe7ccc8
A
60 const HashTable m_keywordTable;
61};
62
63enum LexerFlags {
64 LexerFlagsIgnoreReservedWords = 1,
65 LexerFlagsDontBuildStrings = 2,
66 LexexFlagsDontBuildKeywords = 4
67};
68
69template <typename T>
70class Lexer {
71 WTF_MAKE_NONCOPYABLE(Lexer);
72 WTF_MAKE_FAST_ALLOCATED;
73
74public:
93a37866 75 Lexer(VM*);
6fe7ccc8
A
76 ~Lexer();
77
78 // Character manipulation functions.
79 static bool isWhiteSpace(T character);
80 static bool isLineTerminator(T character);
81 static unsigned char convertHex(int c1, int c2);
82 static UChar convertUnicode(int c1, int c2, int c3, int c4);
83
84 // Functions to set up parsing.
85 void setCode(const SourceCode&, ParserArena*);
86 void setIsReparsing() { m_isReparsing = true; }
87 bool isReparsing() const { return m_isReparsing; }
88
93a37866 89 JSTokenType lex(JSTokenData*, JSTokenLocation*, unsigned, bool strictMode);
6fe7ccc8
A
90 bool nextTokenIsColon();
91 int lineNumber() const { return m_lineNumber; }
93a37866
A
92 ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
93 ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
6fe7ccc8
A
94 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
95 int lastLineNumber() const { return m_lastLineNumber; }
96 bool prevTerminator() const { return m_terminator; }
93a37866 97 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine, unsigned startColumn);
6fe7ccc8
A
98 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
99 bool skipRegExp();
100
101 // Functions for use after parsing.
102 bool sawError() const { return m_error; }
93a37866 103 String getErrorMessage() const { return m_lexErrorMessage; }
6fe7ccc8 104 void clear();
93a37866 105 void setOffset(int offset, int lineStartOffset)
ba379fdc 106 {
6fe7ccc8 107 m_error = 0;
93a37866
A
108 m_lexErrorMessage = String();
109
110 m_code = sourcePtrFromOffset(offset);
111 m_lineStart = sourcePtrFromOffset(lineStartOffset);
112 ASSERT(currentOffset() >= currentLineStartOffset());
113
6fe7ccc8
A
114 m_buffer8.resize(0);
115 m_buffer16.resize(0);
116 if (LIKELY(m_code < m_codeEnd))
117 m_current = *m_code;
118 else
119 m_current = 0;
ba379fdc 120 }
6fe7ccc8 121 void setLineNumber(int line)
14957cd0 122 {
6fe7ccc8 123 m_lineNumber = line;
14957cd0 124 }
ba379fdc 125
6fe7ccc8
A
126 SourceProvider* sourceProvider() const { return m_source->provider(); }
127
93a37866 128 JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenLocation*, unsigned, bool strictMode);
6fe7ccc8
A
129
130private:
131 void record8(int);
132 void append8(const T*, size_t);
133 void record16(int);
134 void record16(T);
135 void append16(const LChar*, size_t);
136 void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
137
138 ALWAYS_INLINE void shift();
139 ALWAYS_INLINE bool atEnd() const;
140 ALWAYS_INLINE T peek(int offset) const;
93a37866
A
141 struct UnicodeHexValue {
142
143 enum ValueType { ValidHex, IncompleteHex, InvalidHex };
144
145 explicit UnicodeHexValue(int value)
146 : m_value(value)
147 {
148 }
149 explicit UnicodeHexValue(ValueType type)
150 : m_value(type == IncompleteHex ? -2 : -1)
151 {
152 }
153
154 ValueType valueType() const
155 {
156 if (m_value >= 0)
157 return ValidHex;
158 return m_value == -2 ? IncompleteHex : InvalidHex;
159 }
160 bool isValid() const { return m_value >= 0; }
161 int value() const
162 {
163 ASSERT(m_value >= 0);
164 return m_value;
165 }
166
167 private:
168 int m_value;
169 };
170 UnicodeHexValue parseFourDigitUnicodeHex();
6fe7ccc8
A
171 void shiftLineTerminator();
172
93a37866
A
173 ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
174 ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
175
176 String invalidCharacterMessage() const;
177 ALWAYS_INLINE const T* currentSourcePtr() const;
178 ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
6fe7ccc8
A
179
180 ALWAYS_INLINE void setCodeStart(const StringImpl*);
181
182 ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
183 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
93a37866
A
184 ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
185 ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
186 ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
6fe7ccc8
A
187 ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
188
189 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
190
191 template <int shiftAmount> void internalShift();
192 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
193 template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
194 template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
93a37866
A
195 enum StringParseResult {
196 StringParsedSuccessfully,
197 StringUnterminated,
198 StringCannotBeParsed
199 };
200 template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
201 template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
6fe7ccc8
A
202 ALWAYS_INLINE void parseHex(double& returnValue);
203 ALWAYS_INLINE bool parseOctal(double& returnValue);
204 ALWAYS_INLINE bool parseDecimal(double& returnValue);
205 ALWAYS_INLINE void parseNumberAfterDecimalPoint();
206 ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
207 ALWAYS_INLINE bool parseMultilineComment();
208
209 static const size_t initialReadBufferCapacity = 32;
210
211 int m_lineNumber;
212 int m_lastLineNumber;
213
214 Vector<LChar> m_buffer8;
215 Vector<UChar> m_buffer16;
216 bool m_terminator;
217 int m_lastToken;
218
219 const SourceCode* m_source;
93a37866 220 unsigned m_sourceOffset;
6fe7ccc8
A
221 const T* m_code;
222 const T* m_codeStart;
223 const T* m_codeEnd;
93a37866
A
224 const T* m_codeStartPlusOffset;
225 const T* m_lineStart;
6fe7ccc8
A
226 bool m_isReparsing;
227 bool m_atLineStart;
228 bool m_error;
93a37866 229 String m_lexErrorMessage;
6fe7ccc8
A
230
231 T m_current;
232
233 IdentifierArena* m_arena;
234
93a37866 235 VM* m_vm;
6fe7ccc8
A
236};
237
238template <>
239ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
240{
241 return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
242}
243
244template <>
245ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
246{
247 return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
248}
249
250template <>
251ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
252{
253 return ch == '\r' || ch == '\n';
254}
255
256template <>
257ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
258{
259 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
260}
261
262template <typename T>
263inline unsigned char Lexer<T>::convertHex(int c1, int c2)
264{
265 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
266}
267
268template <typename T>
269inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
270{
271 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
272}
273
274template <typename T>
275ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
276{
93a37866 277 return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8
A
278}
279
280template <typename T>
281ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
282{
93a37866
A
283 return &m_arena->makeIdentifier(m_vm, characters, length);
284}
285
286template <>
287ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
288{
289 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
290}
291
292template <>
293ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
294{
295 if (!(orAllChars & ~0xff))
296 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
297
298 return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8
A
299}
300
301template <>
302ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
303{
304 ASSERT(sourceString->is8Bit());
305 m_codeStart = sourceString->characters8();
306}
307
308template <>
309ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
310{
311 ASSERT(!sourceString->is8Bit());
312 m_codeStart = sourceString->characters16();
313}
314
315template <typename T>
316ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
317{
93a37866
A
318 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
319}
320
321template <typename T>
322ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
323{
324 return &m_arena->makeIdentifier(m_vm, characters, length);
325}
326
327template <typename T>
328ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
329{
330 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
6fe7ccc8
A
331}
332
333template <typename T>
93a37866 334ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSTokenData* tokenData, JSTokenLocation* tokenLocation, unsigned lexerFlags, bool strictMode)
6fe7ccc8
A
335{
336 ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
337 const T* start = m_code;
338 const T* ptr = start;
339 const T* end = m_codeEnd;
340 if (ptr >= end) {
341 ASSERT(ptr == end);
342 goto slowCase;
343 }
344 if (!WTF::isASCIIAlpha(*ptr))
345 goto slowCase;
346 ++ptr;
347 while (ptr < end) {
348 if (!WTF::isASCIIAlphanumeric(*ptr))
349 break;
14957cd0 350 ++ptr;
f9bf01c6
A
351 }
352
6fe7ccc8
A
353 // Here's the shift
354 if (ptr < end) {
355 if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
356 goto slowCase;
357 m_current = *ptr;
358 } else
359 m_current = 0;
360
361 m_code = ptr;
93a37866 362 ASSERT(currentOffset() >= currentLineStartOffset());
6fe7ccc8
A
363
364 // Create the identifier if needed
365 if (lexerFlags & LexexFlagsDontBuildKeywords)
366 tokenData->ident = 0;
367 else
93a37866
A
368 tokenData->ident = makeLCharIdentifier(start, ptr - start);
369 tokenLocation->line = m_lineNumber;
370 tokenLocation->lineStartOffset = currentLineStartOffset();
371 tokenLocation->startOffset = offsetFromSourcePtr(start);
372 tokenLocation->endOffset = currentOffset();
373 ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
6fe7ccc8
A
374 m_lastToken = IDENT;
375 return IDENT;
376
377slowCase:
93a37866 378 return lex(tokenData, tokenLocation, lexerFlags, strictMode);
6fe7ccc8
A
379}
380
9dae56ea
A
381} // namespace JSC
382
383#endif // Lexer_h