JavaScriptCore-7600.1.4.16.1.tar.gz
[apple/javascriptcore.git] / parser / Lexer.h
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
93a37866 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
14957cd0 4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
9dae56ea
A
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef Lexer_h
24#define Lexer_h
25
9dae56ea 26#include "Lookup.h"
f9bf01c6 27#include "ParserArena.h"
6fe7ccc8 28#include "ParserTokens.h"
9dae56ea 29#include "SourceCode.h"
ba379fdc
A
30#include <wtf/ASCIICType.h>
31#include <wtf/SegmentedVector.h>
9dae56ea
A
32#include <wtf/Vector.h>
33
34namespace JSC {
35
6fe7ccc8
A
36class Keywords {
37public:
38 bool isKeyword(const Identifier& ident) const
ba379fdc 39 {
93a37866 40 return m_keywordTable.entry(m_vm, ident);
ba379fdc 41 }
6fe7ccc8 42
81345200 43 const HashTableValue* getKeyword(const Identifier& ident) const
ba379fdc 44 {
93a37866 45 return m_keywordTable.entry(m_vm, ident);
ba379fdc 46 }
6fe7ccc8
A
47
48 ~Keywords()
ba379fdc 49 {
6fe7ccc8 50 m_keywordTable.deleteTable();
ba379fdc 51 }
6fe7ccc8
A
52
53private:
93a37866 54 friend class VM;
6fe7ccc8 55
81345200 56 explicit Keywords(VM&);
6fe7ccc8 57
81345200 58 VM& m_vm;
6fe7ccc8
A
59 const HashTable m_keywordTable;
60};
61
62enum LexerFlags {
63 LexerFlagsIgnoreReservedWords = 1,
64 LexerFlagsDontBuildStrings = 2,
65 LexexFlagsDontBuildKeywords = 4
66};
67
68template <typename T>
69class Lexer {
70 WTF_MAKE_NONCOPYABLE(Lexer);
71 WTF_MAKE_FAST_ALLOCATED;
72
73public:
81345200 74 Lexer(VM*, JSParserStrictness);
6fe7ccc8
A
75 ~Lexer();
76
77 // Character manipulation functions.
78 static bool isWhiteSpace(T character);
79 static bool isLineTerminator(T character);
80 static unsigned char convertHex(int c1, int c2);
81 static UChar convertUnicode(int c1, int c2, int c3, int c4);
82
83 // Functions to set up parsing.
84 void setCode(const SourceCode&, ParserArena*);
85 void setIsReparsing() { m_isReparsing = true; }
86 bool isReparsing() const { return m_isReparsing; }
87
81345200 88 JSTokenType lex(JSToken*, unsigned, bool strictMode);
6fe7ccc8
A
89 bool nextTokenIsColon();
90 int lineNumber() const { return m_lineNumber; }
93a37866
A
91 ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
92 ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
81345200
A
93 ALWAYS_INLINE JSTextPosition currentPosition() const
94 {
95 return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset());
96 }
97 JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; }
6fe7ccc8
A
98 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
99 int lastLineNumber() const { return m_lastLineNumber; }
100 bool prevTerminator() const { return m_terminator; }
6fe7ccc8
A
101 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
102 bool skipRegExp();
103
104 // Functions for use after parsing.
105 bool sawError() const { return m_error; }
93a37866 106 String getErrorMessage() const { return m_lexErrorMessage; }
6fe7ccc8 107 void clear();
93a37866 108 void setOffset(int offset, int lineStartOffset)
ba379fdc 109 {
6fe7ccc8 110 m_error = 0;
93a37866
A
111 m_lexErrorMessage = String();
112
113 m_code = sourcePtrFromOffset(offset);
114 m_lineStart = sourcePtrFromOffset(lineStartOffset);
115 ASSERT(currentOffset() >= currentLineStartOffset());
116
6fe7ccc8
A
117 m_buffer8.resize(0);
118 m_buffer16.resize(0);
119 if (LIKELY(m_code < m_codeEnd))
120 m_current = *m_code;
121 else
122 m_current = 0;
ba379fdc 123 }
6fe7ccc8 124 void setLineNumber(int line)
14957cd0 125 {
6fe7ccc8 126 m_lineNumber = line;
14957cd0 127 }
ba379fdc 128
6fe7ccc8
A
129 SourceProvider* sourceProvider() const { return m_source->provider(); }
130
81345200 131 JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode);
6fe7ccc8
A
132
133private:
134 void record8(int);
135 void append8(const T*, size_t);
136 void record16(int);
137 void record16(T);
138 void append16(const LChar*, size_t);
139 void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
140
141 ALWAYS_INLINE void shift();
142 ALWAYS_INLINE bool atEnd() const;
143 ALWAYS_INLINE T peek(int offset) const;
93a37866
A
144 struct UnicodeHexValue {
145
146 enum ValueType { ValidHex, IncompleteHex, InvalidHex };
147
148 explicit UnicodeHexValue(int value)
149 : m_value(value)
150 {
151 }
152 explicit UnicodeHexValue(ValueType type)
153 : m_value(type == IncompleteHex ? -2 : -1)
154 {
155 }
156
157 ValueType valueType() const
158 {
159 if (m_value >= 0)
160 return ValidHex;
161 return m_value == -2 ? IncompleteHex : InvalidHex;
162 }
163 bool isValid() const { return m_value >= 0; }
164 int value() const
165 {
166 ASSERT(m_value >= 0);
167 return m_value;
168 }
169
170 private:
171 int m_value;
172 };
173 UnicodeHexValue parseFourDigitUnicodeHex();
6fe7ccc8
A
174 void shiftLineTerminator();
175
93a37866
A
176 ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
177 ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
178
179 String invalidCharacterMessage() const;
180 ALWAYS_INLINE const T* currentSourcePtr() const;
181 ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
6fe7ccc8
A
182
183 ALWAYS_INLINE void setCodeStart(const StringImpl*);
184
185 ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
186 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
93a37866
A
187 ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
188 ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
189 ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
6fe7ccc8
A
190 ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
191
192 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
193
194 template <int shiftAmount> void internalShift();
195 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
196 template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
197 template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
93a37866
A
198 enum StringParseResult {
199 StringParsedSuccessfully,
200 StringUnterminated,
201 StringCannotBeParsed
202 };
203 template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
204 template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
6fe7ccc8
A
205 ALWAYS_INLINE void parseHex(double& returnValue);
206 ALWAYS_INLINE bool parseOctal(double& returnValue);
207 ALWAYS_INLINE bool parseDecimal(double& returnValue);
208 ALWAYS_INLINE void parseNumberAfterDecimalPoint();
209 ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
210 ALWAYS_INLINE bool parseMultilineComment();
211
212 static const size_t initialReadBufferCapacity = 32;
213
214 int m_lineNumber;
215 int m_lastLineNumber;
216
217 Vector<LChar> m_buffer8;
218 Vector<UChar> m_buffer16;
219 bool m_terminator;
220 int m_lastToken;
221
222 const SourceCode* m_source;
93a37866 223 unsigned m_sourceOffset;
6fe7ccc8
A
224 const T* m_code;
225 const T* m_codeStart;
226 const T* m_codeEnd;
93a37866
A
227 const T* m_codeStartPlusOffset;
228 const T* m_lineStart;
81345200 229 JSTextPosition m_positionBeforeLastNewline;
6fe7ccc8
A
230 bool m_isReparsing;
231 bool m_atLineStart;
232 bool m_error;
93a37866 233 String m_lexErrorMessage;
6fe7ccc8
A
234
235 T m_current;
236
237 IdentifierArena* m_arena;
238
93a37866 239 VM* m_vm;
81345200 240 bool m_parsingBuiltinFunction;
6fe7ccc8
A
241};
242
243template <>
244ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
245{
246 return ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC || ch == 0xA0;
247}
248
249template <>
250ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
251{
81345200
A
252 // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
253 return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0x180E || ch == 0xFEFF);
6fe7ccc8
A
254}
255
256template <>
257ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
258{
259 return ch == '\r' || ch == '\n';
260}
261
262template <>
263ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
264{
265 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
266}
267
268template <typename T>
269inline unsigned char Lexer<T>::convertHex(int c1, int c2)
270{
271 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
272}
273
274template <typename T>
275inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
276{
277 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
278}
279
280template <typename T>
281ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
282{
93a37866 283 return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8
A
284}
285
286template <typename T>
287ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
288{
93a37866
A
289 return &m_arena->makeIdentifier(m_vm, characters, length);
290}
291
292template <>
293ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
294{
295 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
296}
297
298template <>
299ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
300{
301 if (!(orAllChars & ~0xff))
302 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
303
304 return &m_arena->makeIdentifier(m_vm, characters, length);
6fe7ccc8
A
305}
306
307template <>
308ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
309{
310 ASSERT(sourceString->is8Bit());
311 m_codeStart = sourceString->characters8();
312}
313
314template <>
315ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
316{
317 ASSERT(!sourceString->is8Bit());
318 m_codeStart = sourceString->characters16();
319}
320
321template <typename T>
322ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
323{
93a37866
A
324 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
325}
326
327template <typename T>
328ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
329{
330 return &m_arena->makeIdentifier(m_vm, characters, length);
331}
332
333template <typename T>
334ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
335{
336 return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
6fe7ccc8
A
337}
338
81345200
A
339#if ASSERT_DISABLED
340ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; }
341#else
342bool isSafeBuiltinIdentifier(VM&, const Identifier*);
343#endif
344
6fe7ccc8 345template <typename T>
81345200 346ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
6fe7ccc8 347{
81345200
A
348 JSTokenData* tokenData = &tokenRecord->m_data;
349 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
6fe7ccc8
A
350 ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
351 const T* start = m_code;
352 const T* ptr = start;
353 const T* end = m_codeEnd;
81345200 354 JSTextPosition startPosition = currentPosition();
6fe7ccc8
A
355 if (ptr >= end) {
356 ASSERT(ptr == end);
357 goto slowCase;
358 }
359 if (!WTF::isASCIIAlpha(*ptr))
360 goto slowCase;
361 ++ptr;
362 while (ptr < end) {
363 if (!WTF::isASCIIAlphanumeric(*ptr))
364 break;
14957cd0 365 ++ptr;
f9bf01c6
A
366 }
367
6fe7ccc8
A
368 // Here's the shift
369 if (ptr < end) {
370 if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
371 goto slowCase;
372 m_current = *ptr;
373 } else
374 m_current = 0;
375
376 m_code = ptr;
93a37866 377 ASSERT(currentOffset() >= currentLineStartOffset());
6fe7ccc8
A
378
379 // Create the identifier if needed
81345200
A
380 if (lexerFlags & LexexFlagsDontBuildKeywords
381#if !ASSERT_DISABLED
382 && !m_parsingBuiltinFunction
383#endif
384 )
6fe7ccc8
A
385 tokenData->ident = 0;
386 else
93a37866 387 tokenData->ident = makeLCharIdentifier(start, ptr - start);
81345200 388
93a37866
A
389 tokenLocation->line = m_lineNumber;
390 tokenLocation->lineStartOffset = currentLineStartOffset();
391 tokenLocation->startOffset = offsetFromSourcePtr(start);
392 tokenLocation->endOffset = currentOffset();
393 ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
81345200
A
394 tokenRecord->m_startPosition = startPosition;
395 tokenRecord->m_endPosition = currentPosition();
396#if !ASSERT_DISABLED
397 if (m_parsingBuiltinFunction) {
398 if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident))
399 return ERRORTOK;
400 }
401#endif
402
6fe7ccc8
A
403 m_lastToken = IDENT;
404 return IDENT;
405
406slowCase:
81345200 407 return lex(tokenRecord, lexerFlags, strictMode);
6fe7ccc8
A
408}
409
9dae56ea
A
410} // namespace JSC
411
412#endif // Lexer_h