2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
28 #include "JSFunction.h"
30 #include "JSGlobalObjectFunctions.h"
31 #include "Identifier.h"
38 #include <wtf/Assertions.h>
41 using namespace Unicode
;
43 #include "KeywordLookup.h"
44 #include "Lexer.lut.h"
49 Keywords::Keywords(VM
* vm
)
51 , m_keywordTable(JSC::mainTable
)
56 // Types for the main switch
58 // The first three types are fixed, and also used for identifying
59 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
60 CharacterIdentifierStart
,
65 CharacterLineTerminator
,
66 CharacterExclamationMark
,
70 CharacterCloseBracket
,
94 // Other types (only one so far)
99 static const unsigned short typesOfLatin1Characters
[256] = {
100 /* 0 - Null */ CharacterInvalid
,
101 /* 1 - Start of Heading */ CharacterInvalid
,
102 /* 2 - Start of Text */ CharacterInvalid
,
103 /* 3 - End of Text */ CharacterInvalid
,
104 /* 4 - End of Transm. */ CharacterInvalid
,
105 /* 5 - Enquiry */ CharacterInvalid
,
106 /* 6 - Acknowledgment */ CharacterInvalid
,
107 /* 7 - Bell */ CharacterInvalid
,
108 /* 8 - Back Space */ CharacterInvalid
,
109 /* 9 - Horizontal Tab */ CharacterWhiteSpace
,
110 /* 10 - Line Feed */ CharacterLineTerminator
,
111 /* 11 - Vertical Tab */ CharacterWhiteSpace
,
112 /* 12 - Form Feed */ CharacterWhiteSpace
,
113 /* 13 - Carriage Return */ CharacterLineTerminator
,
114 /* 14 - Shift Out */ CharacterInvalid
,
115 /* 15 - Shift In */ CharacterInvalid
,
116 /* 16 - Data Line Escape */ CharacterInvalid
,
117 /* 17 - Device Control 1 */ CharacterInvalid
,
118 /* 18 - Device Control 2 */ CharacterInvalid
,
119 /* 19 - Device Control 3 */ CharacterInvalid
,
120 /* 20 - Device Control 4 */ CharacterInvalid
,
121 /* 21 - Negative Ack. */ CharacterInvalid
,
122 /* 22 - Synchronous Idle */ CharacterInvalid
,
123 /* 23 - End of Transmit */ CharacterInvalid
,
124 /* 24 - Cancel */ CharacterInvalid
,
125 /* 25 - End of Medium */ CharacterInvalid
,
126 /* 26 - Substitute */ CharacterInvalid
,
127 /* 27 - Escape */ CharacterInvalid
,
128 /* 28 - File Separator */ CharacterInvalid
,
129 /* 29 - Group Separator */ CharacterInvalid
,
130 /* 30 - Record Separator */ CharacterInvalid
,
131 /* 31 - Unit Separator */ CharacterInvalid
,
132 /* 32 - Space */ CharacterWhiteSpace
,
133 /* 33 - ! */ CharacterExclamationMark
,
134 /* 34 - " */ CharacterQuote
,
135 /* 35 - # */ CharacterInvalid
,
136 /* 36 - $ */ CharacterIdentifierStart
,
137 /* 37 - % */ CharacterModulo
,
138 /* 38 - & */ CharacterAnd
,
139 /* 39 - ' */ CharacterQuote
,
140 /* 40 - ( */ CharacterOpenParen
,
141 /* 41 - ) */ CharacterCloseParen
,
142 /* 42 - * */ CharacterMultiply
,
143 /* 43 - + */ CharacterAdd
,
144 /* 44 - , */ CharacterComma
,
145 /* 45 - - */ CharacterSub
,
146 /* 46 - . */ CharacterDot
,
147 /* 47 - / */ CharacterSlash
,
148 /* 48 - 0 */ CharacterZero
,
149 /* 49 - 1 */ CharacterNumber
,
150 /* 50 - 2 */ CharacterNumber
,
151 /* 51 - 3 */ CharacterNumber
,
152 /* 52 - 4 */ CharacterNumber
,
153 /* 53 - 5 */ CharacterNumber
,
154 /* 54 - 6 */ CharacterNumber
,
155 /* 55 - 7 */ CharacterNumber
,
156 /* 56 - 8 */ CharacterNumber
,
157 /* 57 - 9 */ CharacterNumber
,
158 /* 58 - : */ CharacterColon
,
159 /* 59 - ; */ CharacterSemicolon
,
160 /* 60 - < */ CharacterLess
,
161 /* 61 - = */ CharacterEqual
,
162 /* 62 - > */ CharacterGreater
,
163 /* 63 - ? */ CharacterQuestion
,
164 /* 64 - @ */ CharacterInvalid
,
165 /* 65 - A */ CharacterIdentifierStart
,
166 /* 66 - B */ CharacterIdentifierStart
,
167 /* 67 - C */ CharacterIdentifierStart
,
168 /* 68 - D */ CharacterIdentifierStart
,
169 /* 69 - E */ CharacterIdentifierStart
,
170 /* 70 - F */ CharacterIdentifierStart
,
171 /* 71 - G */ CharacterIdentifierStart
,
172 /* 72 - H */ CharacterIdentifierStart
,
173 /* 73 - I */ CharacterIdentifierStart
,
174 /* 74 - J */ CharacterIdentifierStart
,
175 /* 75 - K */ CharacterIdentifierStart
,
176 /* 76 - L */ CharacterIdentifierStart
,
177 /* 77 - M */ CharacterIdentifierStart
,
178 /* 78 - N */ CharacterIdentifierStart
,
179 /* 79 - O */ CharacterIdentifierStart
,
180 /* 80 - P */ CharacterIdentifierStart
,
181 /* 81 - Q */ CharacterIdentifierStart
,
182 /* 82 - R */ CharacterIdentifierStart
,
183 /* 83 - S */ CharacterIdentifierStart
,
184 /* 84 - T */ CharacterIdentifierStart
,
185 /* 85 - U */ CharacterIdentifierStart
,
186 /* 86 - V */ CharacterIdentifierStart
,
187 /* 87 - W */ CharacterIdentifierStart
,
188 /* 88 - X */ CharacterIdentifierStart
,
189 /* 89 - Y */ CharacterIdentifierStart
,
190 /* 90 - Z */ CharacterIdentifierStart
,
191 /* 91 - [ */ CharacterOpenBracket
,
192 /* 92 - \ */ CharacterBackSlash
,
193 /* 93 - ] */ CharacterCloseBracket
,
194 /* 94 - ^ */ CharacterXor
,
195 /* 95 - _ */ CharacterIdentifierStart
,
196 /* 96 - ` */ CharacterInvalid
,
197 /* 97 - a */ CharacterIdentifierStart
,
198 /* 98 - b */ CharacterIdentifierStart
,
199 /* 99 - c */ CharacterIdentifierStart
,
200 /* 100 - d */ CharacterIdentifierStart
,
201 /* 101 - e */ CharacterIdentifierStart
,
202 /* 102 - f */ CharacterIdentifierStart
,
203 /* 103 - g */ CharacterIdentifierStart
,
204 /* 104 - h */ CharacterIdentifierStart
,
205 /* 105 - i */ CharacterIdentifierStart
,
206 /* 106 - j */ CharacterIdentifierStart
,
207 /* 107 - k */ CharacterIdentifierStart
,
208 /* 108 - l */ CharacterIdentifierStart
,
209 /* 109 - m */ CharacterIdentifierStart
,
210 /* 110 - n */ CharacterIdentifierStart
,
211 /* 111 - o */ CharacterIdentifierStart
,
212 /* 112 - p */ CharacterIdentifierStart
,
213 /* 113 - q */ CharacterIdentifierStart
,
214 /* 114 - r */ CharacterIdentifierStart
,
215 /* 115 - s */ CharacterIdentifierStart
,
216 /* 116 - t */ CharacterIdentifierStart
,
217 /* 117 - u */ CharacterIdentifierStart
,
218 /* 118 - v */ CharacterIdentifierStart
,
219 /* 119 - w */ CharacterIdentifierStart
,
220 /* 120 - x */ CharacterIdentifierStart
,
221 /* 121 - y */ CharacterIdentifierStart
,
222 /* 122 - z */ CharacterIdentifierStart
,
223 /* 123 - { */ CharacterOpenBrace
,
224 /* 124 - | */ CharacterOr
,
225 /* 125 - } */ CharacterCloseBrace
,
226 /* 126 - ~ */ CharacterTilde
,
227 /* 127 - Delete */ CharacterInvalid
,
228 /* 128 - Cc category */ CharacterInvalid
,
229 /* 129 - Cc category */ CharacterInvalid
,
230 /* 130 - Cc category */ CharacterInvalid
,
231 /* 131 - Cc category */ CharacterInvalid
,
232 /* 132 - Cc category */ CharacterInvalid
,
233 /* 133 - Cc category */ CharacterInvalid
,
234 /* 134 - Cc category */ CharacterInvalid
,
235 /* 135 - Cc category */ CharacterInvalid
,
236 /* 136 - Cc category */ CharacterInvalid
,
237 /* 137 - Cc category */ CharacterInvalid
,
238 /* 138 - Cc category */ CharacterInvalid
,
239 /* 139 - Cc category */ CharacterInvalid
,
240 /* 140 - Cc category */ CharacterInvalid
,
241 /* 141 - Cc category */ CharacterInvalid
,
242 /* 142 - Cc category */ CharacterInvalid
,
243 /* 143 - Cc category */ CharacterInvalid
,
244 /* 144 - Cc category */ CharacterInvalid
,
245 /* 145 - Cc category */ CharacterInvalid
,
246 /* 146 - Cc category */ CharacterInvalid
,
247 /* 147 - Cc category */ CharacterInvalid
,
248 /* 148 - Cc category */ CharacterInvalid
,
249 /* 149 - Cc category */ CharacterInvalid
,
250 /* 150 - Cc category */ CharacterInvalid
,
251 /* 151 - Cc category */ CharacterInvalid
,
252 /* 152 - Cc category */ CharacterInvalid
,
253 /* 153 - Cc category */ CharacterInvalid
,
254 /* 154 - Cc category */ CharacterInvalid
,
255 /* 155 - Cc category */ CharacterInvalid
,
256 /* 156 - Cc category */ CharacterInvalid
,
257 /* 157 - Cc category */ CharacterInvalid
,
258 /* 158 - Cc category */ CharacterInvalid
,
259 /* 159 - Cc category */ CharacterInvalid
,
260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace
,
261 /* 161 - Po category */ CharacterInvalid
,
262 /* 162 - Sc category */ CharacterInvalid
,
263 /* 163 - Sc category */ CharacterInvalid
,
264 /* 164 - Sc category */ CharacterInvalid
,
265 /* 165 - Sc category */ CharacterInvalid
,
266 /* 166 - So category */ CharacterInvalid
,
267 /* 167 - So category */ CharacterInvalid
,
268 /* 168 - Sk category */ CharacterInvalid
,
269 /* 169 - So category */ CharacterInvalid
,
270 /* 170 - Ll category */ CharacterIdentifierStart
,
271 /* 171 - Pi category */ CharacterInvalid
,
272 /* 172 - Sm category */ CharacterInvalid
,
273 /* 173 - Cf category */ CharacterInvalid
,
274 /* 174 - So category */ CharacterInvalid
,
275 /* 175 - Sk category */ CharacterInvalid
,
276 /* 176 - So category */ CharacterInvalid
,
277 /* 177 - Sm category */ CharacterInvalid
,
278 /* 178 - No category */ CharacterInvalid
,
279 /* 179 - No category */ CharacterInvalid
,
280 /* 180 - Sk category */ CharacterInvalid
,
281 /* 181 - Ll category */ CharacterIdentifierStart
,
282 /* 182 - So category */ CharacterInvalid
,
283 /* 183 - Po category */ CharacterInvalid
,
284 /* 184 - Sk category */ CharacterInvalid
,
285 /* 185 - No category */ CharacterInvalid
,
286 /* 186 - Ll category */ CharacterIdentifierStart
,
287 /* 187 - Pf category */ CharacterInvalid
,
288 /* 188 - No category */ CharacterInvalid
,
289 /* 189 - No category */ CharacterInvalid
,
290 /* 190 - No category */ CharacterInvalid
,
291 /* 191 - Po category */ CharacterInvalid
,
292 /* 192 - Lu category */ CharacterIdentifierStart
,
293 /* 193 - Lu category */ CharacterIdentifierStart
,
294 /* 194 - Lu category */ CharacterIdentifierStart
,
295 /* 195 - Lu category */ CharacterIdentifierStart
,
296 /* 196 - Lu category */ CharacterIdentifierStart
,
297 /* 197 - Lu category */ CharacterIdentifierStart
,
298 /* 198 - Lu category */ CharacterIdentifierStart
,
299 /* 199 - Lu category */ CharacterIdentifierStart
,
300 /* 200 - Lu category */ CharacterIdentifierStart
,
301 /* 201 - Lu category */ CharacterIdentifierStart
,
302 /* 202 - Lu category */ CharacterIdentifierStart
,
303 /* 203 - Lu category */ CharacterIdentifierStart
,
304 /* 204 - Lu category */ CharacterIdentifierStart
,
305 /* 205 - Lu category */ CharacterIdentifierStart
,
306 /* 206 - Lu category */ CharacterIdentifierStart
,
307 /* 207 - Lu category */ CharacterIdentifierStart
,
308 /* 208 - Lu category */ CharacterIdentifierStart
,
309 /* 209 - Lu category */ CharacterIdentifierStart
,
310 /* 210 - Lu category */ CharacterIdentifierStart
,
311 /* 211 - Lu category */ CharacterIdentifierStart
,
312 /* 212 - Lu category */ CharacterIdentifierStart
,
313 /* 213 - Lu category */ CharacterIdentifierStart
,
314 /* 214 - Lu category */ CharacterIdentifierStart
,
315 /* 215 - Sm category */ CharacterInvalid
,
316 /* 216 - Lu category */ CharacterIdentifierStart
,
317 /* 217 - Lu category */ CharacterIdentifierStart
,
318 /* 218 - Lu category */ CharacterIdentifierStart
,
319 /* 219 - Lu category */ CharacterIdentifierStart
,
320 /* 220 - Lu category */ CharacterIdentifierStart
,
321 /* 221 - Lu category */ CharacterIdentifierStart
,
322 /* 222 - Lu category */ CharacterIdentifierStart
,
323 /* 223 - Ll category */ CharacterIdentifierStart
,
324 /* 224 - Ll category */ CharacterIdentifierStart
,
325 /* 225 - Ll category */ CharacterIdentifierStart
,
326 /* 226 - Ll category */ CharacterIdentifierStart
,
327 /* 227 - Ll category */ CharacterIdentifierStart
,
328 /* 228 - Ll category */ CharacterIdentifierStart
,
329 /* 229 - Ll category */ CharacterIdentifierStart
,
330 /* 230 - Ll category */ CharacterIdentifierStart
,
331 /* 231 - Ll category */ CharacterIdentifierStart
,
332 /* 232 - Ll category */ CharacterIdentifierStart
,
333 /* 233 - Ll category */ CharacterIdentifierStart
,
334 /* 234 - Ll category */ CharacterIdentifierStart
,
335 /* 235 - Ll category */ CharacterIdentifierStart
,
336 /* 236 - Ll category */ CharacterIdentifierStart
,
337 /* 237 - Ll category */ CharacterIdentifierStart
,
338 /* 238 - Ll category */ CharacterIdentifierStart
,
339 /* 239 - Ll category */ CharacterIdentifierStart
,
340 /* 240 - Ll category */ CharacterIdentifierStart
,
341 /* 241 - Ll category */ CharacterIdentifierStart
,
342 /* 242 - Ll category */ CharacterIdentifierStart
,
343 /* 243 - Ll category */ CharacterIdentifierStart
,
344 /* 244 - Ll category */ CharacterIdentifierStart
,
345 /* 245 - Ll category */ CharacterIdentifierStart
,
346 /* 246 - Ll category */ CharacterIdentifierStart
,
347 /* 247 - Sm category */ CharacterInvalid
,
348 /* 248 - Ll category */ CharacterIdentifierStart
,
349 /* 249 - Ll category */ CharacterIdentifierStart
,
350 /* 250 - Ll category */ CharacterIdentifierStart
,
351 /* 251 - Ll category */ CharacterIdentifierStart
,
352 /* 252 - Ll category */ CharacterIdentifierStart
,
353 /* 253 - Ll category */ CharacterIdentifierStart
,
354 /* 254 - Ll category */ CharacterIdentifierStart
,
355 /* 255 - Ll category */ CharacterIdentifierStart
358 // This table provides the character that results from \X where X is the index in the table beginning
359 // with SPACE. A table value of 0 means that more processing needs to be done.
360 static const LChar singleCharacterEscapeValuesForASCII
[128] = {
362 /* 1 - Start of Heading */ 0,
363 /* 2 - Start of Text */ 0,
364 /* 3 - End of Text */ 0,
365 /* 4 - End of Transm. */ 0,
367 /* 6 - Acknowledgment */ 0,
369 /* 8 - Back Space */ 0,
370 /* 9 - Horizontal Tab */ 0,
371 /* 10 - Line Feed */ 0,
372 /* 11 - Vertical Tab */ 0,
373 /* 12 - Form Feed */ 0,
374 /* 13 - Carriage Return */ 0,
375 /* 14 - Shift Out */ 0,
376 /* 15 - Shift In */ 0,
377 /* 16 - Data Line Escape */ 0,
378 /* 17 - Device Control 1 */ 0,
379 /* 18 - Device Control 2 */ 0,
380 /* 19 - Device Control 3 */ 0,
381 /* 20 - Device Control 4 */ 0,
382 /* 21 - Negative Ack. */ 0,
383 /* 22 - Synchronous Idle */ 0,
384 /* 23 - End of Transmit */ 0,
386 /* 25 - End of Medium */ 0,
387 /* 26 - Substitute */ 0,
389 /* 28 - File Separator */ 0,
390 /* 29 - Group Separator */ 0,
391 /* 30 - Record Separator */ 0,
392 /* 31 - Unit Separator */ 0,
393 /* 32 - Space */ ' ',
491 template <typename T
>
492 Lexer
<T
>::Lexer(VM
* vm
)
493 : m_isReparsing(false)
498 template <typename T
>
503 template <typename T
>
504 String Lexer
<T
>::invalidCharacterMessage() const
508 return "Invalid character: '\\0'";
510 return "Invalid character: '\\n'";
512 return "Invalid character: '\\v'";
514 return "Invalid character: '\\r'";
516 return "Invalid character: '#'";
518 return "Invalid character: '@'";
520 return "Invalid character: '`'";
522 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current
)).impl();
526 template <typename T
>
527 ALWAYS_INLINE
const T
* Lexer
<T
>::currentSourcePtr() const
529 ASSERT(m_code
<= m_codeEnd
);
533 template <typename T
>
534 void Lexer
<T
>::setCode(const SourceCode
& source
, ParserArena
* arena
)
536 m_arena
= &arena
->identifierArena();
538 m_lineNumber
= source
.firstLine();
541 const String
& sourceString
= source
.provider()->source();
543 if (!sourceString
.isNull())
544 setCodeStart(sourceString
.impl());
549 m_sourceOffset
= source
.startOffset();
550 m_codeStartPlusOffset
= m_codeStart
+ source
.startOffset();
551 m_code
= m_codeStartPlusOffset
;
552 m_codeEnd
= m_codeStart
+ source
.endOffset();
554 m_atLineStart
= true;
555 m_lineStart
= m_code
;
556 m_lexErrorMessage
= String();
558 m_buffer8
.reserveInitialCapacity(initialReadBufferCapacity
);
559 m_buffer16
.reserveInitialCapacity((m_codeEnd
- m_code
) / 2);
561 if (LIKELY(m_code
< m_codeEnd
))
565 ASSERT(currentOffset() == source
.startOffset());
568 template <typename T
>
569 template <int shiftAmount
> ALWAYS_INLINE
void Lexer
<T
>::internalShift()
571 m_code
+= shiftAmount
;
572 ASSERT(currentOffset() >= currentLineStartOffset());
576 template <typename T
>
577 ALWAYS_INLINE
void Lexer
<T
>::shift()
579 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
582 if (LIKELY(m_code
< m_codeEnd
))
586 template <typename T
>
587 ALWAYS_INLINE
bool Lexer
<T
>::atEnd() const
589 ASSERT(!m_current
|| m_code
< m_codeEnd
);
590 return UNLIKELY(UNLIKELY(!m_current
) && m_code
== m_codeEnd
);
593 template <typename T
>
594 ALWAYS_INLINE T Lexer
<T
>::peek(int offset
) const
596 ASSERT(offset
> 0 && offset
< 5);
597 const T
* code
= m_code
+ offset
;
598 return (code
< m_codeEnd
) ? *code
: 0;
601 template <typename T
>
602 typename Lexer
<T
>::UnicodeHexValue Lexer
<T
>::parseFourDigitUnicodeHex()
608 if (UNLIKELY(!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(char1
) || !isASCIIHexDigit(char2
) || !isASCIIHexDigit(char3
)))
609 return UnicodeHexValue((m_code
+ 4) >= m_codeEnd
? UnicodeHexValue::IncompleteHex
: UnicodeHexValue::InvalidHex
);
611 int result
= convertUnicode(m_current
, char1
, char2
, char3
);
616 return UnicodeHexValue(result
);
619 template <typename T
>
620 void Lexer
<T
>::shiftLineTerminator()
622 ASSERT(isLineTerminator(m_current
));
627 // Allow both CRLF and LFCR.
628 if (prev
+ m_current
== '\n' + '\r')
634 template <typename T
>
635 ALWAYS_INLINE
bool Lexer
<T
>::lastTokenWasRestrKeyword() const
637 return m_lastToken
== CONTINUE
|| m_lastToken
== BREAK
|| m_lastToken
== RETURN
|| m_lastToken
== THROW
;
640 static NEVER_INLINE
bool isNonLatin1IdentStart(int c
)
642 return category(c
) & (Letter_Uppercase
| Letter_Lowercase
| Letter_Titlecase
| Letter_Modifier
| Letter_Other
);
645 static ALWAYS_INLINE
bool isLatin1(LChar
)
650 static ALWAYS_INLINE
bool isLatin1(UChar c
)
655 static inline bool isIdentStart(LChar c
)
657 return typesOfLatin1Characters
[c
] == CharacterIdentifierStart
;
660 static inline bool isIdentStart(UChar c
)
662 return isLatin1(c
) ? isIdentStart(static_cast<LChar
>(c
)) : isNonLatin1IdentStart(c
);
665 static NEVER_INLINE
bool isNonLatin1IdentPart(int c
)
667 return (category(c
) & (Letter_Uppercase
| Letter_Lowercase
| Letter_Titlecase
| Letter_Modifier
| Letter_Other
668 | Mark_NonSpacing
| Mark_SpacingCombining
| Number_DecimalDigit
| Punctuation_Connector
)) || c
== 0x200C || c
== 0x200D;
671 static ALWAYS_INLINE
bool isIdentPart(LChar c
)
673 // Character types are divided into two groups depending on whether they can be part of an
674 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
675 // part of an identifier. (See the CharacterType definition for more details.)
676 return typesOfLatin1Characters
[c
] <= CharacterNumber
;
679 static ALWAYS_INLINE
bool isIdentPart(UChar c
)
681 return isLatin1(c
) ? isIdentPart(static_cast<LChar
>(c
)) : isNonLatin1IdentPart(c
);
684 static inline LChar
singleEscape(int c
)
687 ASSERT(static_cast<size_t>(c
) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII
));
688 return singleCharacterEscapeValuesForASCII
[c
];
693 template <typename T
>
694 inline void Lexer
<T
>::record8(int c
)
698 m_buffer8
.append(static_cast<LChar
>(c
));
701 template <typename T
>
702 inline void assertCharIsIn8BitRange(T c
)
710 inline void assertCharIsIn8BitRange(UChar c
)
717 inline void assertCharIsIn8BitRange(LChar
)
721 template <typename T
>
722 inline void Lexer
<T
>::append8(const T
* p
, size_t length
)
724 size_t currentSize
= m_buffer8
.size();
725 m_buffer8
.grow(currentSize
+ length
);
726 LChar
* rawBuffer
= m_buffer8
.data() + currentSize
;
728 for (size_t i
= 0; i
< length
; i
++) {
730 assertCharIsIn8BitRange(c
);
735 template <typename T
>
736 inline void Lexer
<T
>::append16(const LChar
* p
, size_t length
)
738 size_t currentSize
= m_buffer16
.size();
739 m_buffer16
.grow(currentSize
+ length
);
740 UChar
* rawBuffer
= m_buffer16
.data() + currentSize
;
742 for (size_t i
= 0; i
< length
; i
++)
746 template <typename T
>
747 inline void Lexer
<T
>::record16(T c
)
749 m_buffer16
.append(c
);
752 template <typename T
>
753 inline void Lexer
<T
>::record16(int c
)
756 ASSERT(c
<= static_cast<int>(USHRT_MAX
));
757 m_buffer16
.append(static_cast<UChar
>(c
));
761 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<LChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
763 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
764 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
765 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
766 if (keyword
!= IDENT
) {
767 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
768 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
772 const LChar
* identifierStart
= currentSourcePtr();
773 unsigned identifierLineStart
= currentLineStartOffset();
775 while (isIdentPart(m_current
))
778 if (UNLIKELY(m_current
== '\\')) {
779 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
780 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
783 const Identifier
* ident
= 0;
785 if (shouldCreateIdentifier
) {
786 int identifierLength
= currentSourcePtr() - identifierStart
;
787 ident
= makeIdentifier(identifierStart
, identifierLength
);
789 tokenData
->ident
= ident
;
791 tokenData
->ident
= 0;
793 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
))) {
794 ASSERT(shouldCreateIdentifier
);
795 if (remaining
< maxTokenLength
) {
796 const HashEntry
* entry
= m_vm
->keywords
->getKeyword(*ident
);
797 ASSERT((remaining
< maxTokenLength
) || !entry
);
800 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
801 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
810 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<UChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
812 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
813 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
814 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
815 if (keyword
!= IDENT
) {
816 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
817 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
821 const UChar
* identifierStart
= currentSourcePtr();
822 int identifierLineStart
= currentLineStartOffset();
824 UChar orAllChars
= 0;
826 while (isIdentPart(m_current
)) {
827 orAllChars
|= m_current
;
831 if (UNLIKELY(m_current
== '\\')) {
832 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
833 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
836 bool isAll8Bit
= false;
838 if (!(orAllChars
& ~0xff))
841 const Identifier
* ident
= 0;
843 if (shouldCreateIdentifier
) {
844 int identifierLength
= currentSourcePtr() - identifierStart
;
846 ident
= makeIdentifierLCharFromUChar(identifierStart
, identifierLength
);
848 ident
= makeIdentifier(identifierStart
, identifierLength
);
850 tokenData
->ident
= ident
;
852 tokenData
->ident
= 0;
854 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
))) {
855 ASSERT(shouldCreateIdentifier
);
856 if (remaining
< maxTokenLength
) {
857 const HashEntry
* entry
= m_vm
->keywords
->getKeyword(*ident
);
858 ASSERT((remaining
< maxTokenLength
) || !entry
);
861 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
862 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
870 template <typename T
>
871 template <bool shouldCreateIdentifier
> JSTokenType Lexer
<T
>::parseIdentifierSlowCase(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
873 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
874 const T
* identifierStart
= currentSourcePtr();
875 bool bufferRequired
= false;
878 if (LIKELY(isIdentPart(m_current
))) {
882 if (LIKELY(m_current
!= '\\'))
885 // \uXXXX unicode characters.
886 bufferRequired
= true;
887 if (identifierStart
!= currentSourcePtr())
888 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
890 if (UNLIKELY(m_current
!= 'u'))
891 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_ESCAPE_ERRORTOK
;
893 UnicodeHexValue character
= parseFourDigitUnicodeHex();
894 if (UNLIKELY(!character
.isValid()))
895 return character
.valueType() == UnicodeHexValue::IncompleteHex
? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
896 UChar ucharacter
= static_cast<UChar
>(character
.value());
897 if (UNLIKELY(m_buffer16
.size() ? !isIdentPart(ucharacter
) : !isIdentStart(ucharacter
)))
898 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
899 if (shouldCreateIdentifier
)
900 record16(ucharacter
);
901 identifierStart
= currentSourcePtr();
904 int identifierLength
;
905 const Identifier
* ident
= 0;
906 if (shouldCreateIdentifier
) {
907 if (!bufferRequired
) {
908 identifierLength
= currentSourcePtr() - identifierStart
;
909 ident
= makeIdentifier(identifierStart
, identifierLength
);
911 if (identifierStart
!= currentSourcePtr())
912 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
913 ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
916 tokenData
->ident
= ident
;
918 tokenData
->ident
= 0;
920 if (LIKELY(!bufferRequired
&& !(lexerFlags
& LexerFlagsIgnoreReservedWords
))) {
921 ASSERT(shouldCreateIdentifier
);
922 // Keywords must not be recognized if there was an \uXXXX in the identifier.
923 if (remaining
< maxTokenLength
) {
924 const HashEntry
* entry
= m_vm
->keywords
->getKeyword(*ident
);
925 ASSERT((remaining
< maxTokenLength
) || !entry
);
928 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
929 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
934 m_buffer16
.resize(0);
938 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(LChar character
)
940 return character
< 0xE;
943 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(UChar character
)
945 return character
< 0xE || character
> 0xFF;
948 template <typename T
>
949 template <bool shouldBuildStrings
> ALWAYS_INLINE typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseString(JSTokenData
* tokenData
, bool strictMode
)
951 int startingOffset
= currentOffset();
952 int startingLineStartOffset
= currentLineStartOffset();
953 int startingLineNumber
= lineNumber();
954 T stringQuoteCharacter
= m_current
;
957 const T
* stringStart
= currentSourcePtr();
959 while (m_current
!= stringQuoteCharacter
) {
960 if (UNLIKELY(m_current
== '\\')) {
961 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
962 append8(stringStart
, currentSourcePtr() - stringStart
);
965 LChar escape
= singleEscape(m_current
);
967 // Most common escape sequences first
969 if (shouldBuildStrings
)
972 } else if (UNLIKELY(isLineTerminator(m_current
)))
973 shiftLineTerminator();
974 else if (m_current
== 'x') {
976 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
977 m_lexErrorMessage
= "\\x can only be followed by a hex character sequence";
978 return (atEnd() || (isASCIIHexDigit(m_current
) && (m_code
+ 1 == m_codeEnd
))) ? StringUnterminated
: StringCannotBeParsed
;
982 if (shouldBuildStrings
)
983 record8(convertHex(prev
, m_current
));
986 setOffset(startingOffset
, startingLineStartOffset
);
987 setLineNumber(startingLineNumber
);
989 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
991 stringStart
= currentSourcePtr();
995 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current
))) {
996 setOffset(startingOffset
, startingLineStartOffset
);
997 setLineNumber(startingLineNumber
);
999 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
1005 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1006 append8(stringStart
, currentSourcePtr() - stringStart
);
1007 if (shouldBuildStrings
) {
1008 tokenData
->ident
= makeIdentifier(m_buffer8
.data(), m_buffer8
.size());
1009 m_buffer8
.resize(0);
1011 tokenData
->ident
= 0;
1013 return StringParsedSuccessfully
;
1016 template <typename T
>
1017 template <bool shouldBuildStrings
> typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseStringSlowCase(JSTokenData
* tokenData
, bool strictMode
)
1019 T stringQuoteCharacter
= m_current
;
1022 const T
* stringStart
= currentSourcePtr();
1024 while (m_current
!= stringQuoteCharacter
) {
1025 if (UNLIKELY(m_current
== '\\')) {
1026 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1027 append16(stringStart
, currentSourcePtr() - stringStart
);
1030 LChar escape
= singleEscape(m_current
);
1032 // Most common escape sequences first
1034 if (shouldBuildStrings
)
1037 } else if (UNLIKELY(isLineTerminator(m_current
)))
1038 shiftLineTerminator();
1039 else if (m_current
== 'x') {
1041 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
1042 m_lexErrorMessage
= "\\x can only be followed by a hex character sequence";
1043 return StringCannotBeParsed
;
1047 if (shouldBuildStrings
)
1048 record16(convertHex(prev
, m_current
));
1050 } else if (m_current
== 'u') {
1052 UnicodeHexValue character
= parseFourDigitUnicodeHex();
1053 if (character
.isValid()) {
1054 if (shouldBuildStrings
)
1055 record16(character
.value());
1056 } else if (m_current
== stringQuoteCharacter
) {
1057 if (shouldBuildStrings
)
1060 m_lexErrorMessage
= "\\u can only be followed by a Unicode character sequence";
1061 return character
.valueType() == UnicodeHexValue::IncompleteHex
? StringUnterminated
: StringCannotBeParsed
;
1063 } else if (strictMode
&& isASCIIDigit(m_current
)) {
1064 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1065 int character1
= m_current
;
1067 if (character1
!= '0' || isASCIIDigit(m_current
)) {
1068 m_lexErrorMessage
= "The only valid numeric escape in strict mode is '\\0'";
1069 return StringCannotBeParsed
;
1071 if (shouldBuildStrings
)
1073 } else if (!strictMode
&& isASCIIOctalDigit(m_current
)) {
1074 // Octal character sequences
1075 T character1
= m_current
;
1077 if (isASCIIOctalDigit(m_current
)) {
1078 // Two octal characters
1079 T character2
= m_current
;
1081 if (character1
>= '0' && character1
<= '3' && isASCIIOctalDigit(m_current
)) {
1082 if (shouldBuildStrings
)
1083 record16((character1
- '0') * 64 + (character2
- '0') * 8 + m_current
- '0');
1086 if (shouldBuildStrings
)
1087 record16((character1
- '0') * 8 + character2
- '0');
1090 if (shouldBuildStrings
)
1091 record16(character1
- '0');
1093 } else if (!atEnd()) {
1094 if (shouldBuildStrings
)
1095 record16(m_current
);
1098 m_lexErrorMessage
= "Unterminated string constant";
1099 return StringUnterminated
;
1102 stringStart
= currentSourcePtr();
1105 // Fast check for characters that require special handling.
1106 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1107 // as possible, and lets through all common ASCII characters.
1108 if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) {
1109 // New-line or end of input is not allowed
1110 if (atEnd() || isLineTerminator(m_current
)) {
1111 m_lexErrorMessage
= "Unexpected EOF";
1112 return atEnd() ? StringUnterminated
: StringCannotBeParsed
;
1114 // Anything else is just a normal character
1119 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1120 append16(stringStart
, currentSourcePtr() - stringStart
);
1121 if (shouldBuildStrings
)
1122 tokenData
->ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1124 tokenData
->ident
= 0;
1126 m_buffer16
.resize(0);
1127 return StringParsedSuccessfully
;
1130 template <typename T
>
1131 ALWAYS_INLINE
void Lexer
<T
>::parseHex(double& returnValue
)
1133 // Optimization: most hexadecimal values fit into 4 bytes.
1134 uint32_t hexValue
= 0;
1135 int maximumDigits
= 7;
1137 // Shift out the 'x' prefix.
1141 hexValue
= (hexValue
<< 4) + toASCIIHexValue(m_current
);
1144 } while (isASCIIHexDigit(m_current
) && maximumDigits
>= 0);
1146 if (maximumDigits
>= 0) {
1147 returnValue
= hexValue
;
1151 // No more place in the hexValue buffer.
1152 // The values are shifted out and placed into the m_buffer8 vector.
1153 for (int i
= 0; i
< 8; ++i
) {
1154 int digit
= hexValue
>> 28;
1156 record8(digit
+ '0');
1158 record8(digit
- 10 + 'a');
1162 while (isASCIIHexDigit(m_current
)) {
1167 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 16);
1170 template <typename T
>
1171 ALWAYS_INLINE
bool Lexer
<T
>::parseOctal(double& returnValue
)
1173 // Optimization: most octal values fit into 4 bytes.
1174 uint32_t octalValue
= 0;
1175 int maximumDigits
= 9;
1176 // Temporary buffer for the digits. Makes easier
1177 // to reconstruct the input characters when needed.
1181 octalValue
= octalValue
* 8 + (m_current
- '0');
1182 digits
[maximumDigits
] = m_current
;
1185 } while (isASCIIOctalDigit(m_current
) && maximumDigits
>= 0);
1187 if (!isASCIIDigit(m_current
) && maximumDigits
>= 0) {
1188 returnValue
= octalValue
;
1192 for (int i
= 9; i
> maximumDigits
; --i
)
1195 while (isASCIIOctalDigit(m_current
)) {
1200 if (isASCIIDigit(m_current
))
1203 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 8);
1207 template <typename T
>
1208 ALWAYS_INLINE
bool Lexer
<T
>::parseDecimal(double& returnValue
)
1210 // Optimization: most decimal values fit into 4 bytes.
1211 uint32_t decimalValue
= 0;
1213 // Since parseOctal may be executed before parseDecimal,
1214 // the m_buffer8 may hold ascii digits.
1215 if (!m_buffer8
.size()) {
1216 int maximumDigits
= 9;
1217 // Temporary buffer for the digits. Makes easier
1218 // to reconstruct the input characters when needed.
1222 decimalValue
= decimalValue
* 10 + (m_current
- '0');
1223 digits
[maximumDigits
] = m_current
;
1226 } while (isASCIIDigit(m_current
) && maximumDigits
>= 0);
1228 if (maximumDigits
>= 0 && m_current
!= '.' && (m_current
| 0x20) != 'e') {
1229 returnValue
= decimalValue
;
1233 for (int i
= 9; i
> maximumDigits
; --i
)
1237 while (isASCIIDigit(m_current
)) {
1245 template <typename T
>
1246 ALWAYS_INLINE
void Lexer
<T
>::parseNumberAfterDecimalPoint()
1249 while (isASCIIDigit(m_current
)) {
1255 template <typename T
>
1256 ALWAYS_INLINE
bool Lexer
<T
>::parseNumberAfterExponentIndicator()
1260 if (m_current
== '+' || m_current
== '-') {
1265 if (!isASCIIDigit(m_current
))
1271 } while (isASCIIDigit(m_current
));
1275 template <typename T
>
1276 ALWAYS_INLINE
bool Lexer
<T
>::parseMultilineComment()
1279 while (UNLIKELY(m_current
== '*')) {
1281 if (m_current
== '/') {
1290 if (isLineTerminator(m_current
)) {
1291 shiftLineTerminator();
1292 m_terminator
= true;
1298 template <typename T
>
1299 bool Lexer
<T
>::nextTokenIsColon()
1301 const T
* code
= m_code
;
1302 while (code
< m_codeEnd
&& (isWhiteSpace(*code
) || isLineTerminator(*code
)))
1305 return code
< m_codeEnd
&& *code
== ':';
1308 template <typename T
>
1309 JSTokenType Lexer
<T
>::lex(JSTokenData
* tokenData
, JSTokenLocation
* tokenLocation
, unsigned lexerFlags
, bool strictMode
)
1312 ASSERT(m_buffer8
.isEmpty());
1313 ASSERT(m_buffer16
.isEmpty());
1315 JSTokenType token
= ERRORTOK
;
1316 m_terminator
= false;
1319 while (isWhiteSpace(m_current
))
1325 tokenLocation
->startOffset
= currentOffset();
1326 ASSERT(currentOffset() >= currentLineStartOffset());
1329 if (LIKELY(isLatin1(m_current
)))
1330 type
= static_cast<CharacterType
>(typesOfLatin1Characters
[m_current
]);
1331 else if (isNonLatin1IdentStart(m_current
))
1332 type
= CharacterIdentifierStart
;
1333 else if (isLineTerminator(m_current
))
1334 type
= CharacterLineTerminator
;
1336 type
= CharacterInvalid
;
1339 case CharacterGreater
:
1341 if (m_current
== '>') {
1343 if (m_current
== '>') {
1345 if (m_current
== '=') {
1347 token
= URSHIFTEQUAL
;
1353 if (m_current
== '=') {
1355 token
= RSHIFTEQUAL
;
1361 if (m_current
== '=') {
1368 case CharacterEqual
:
1370 if (m_current
== '=') {
1372 if (m_current
== '=') {
1384 if (m_current
== '!' && peek(1) == '-' && peek(2) == '-') {
1385 // <!-- marks the beginning of a line comment (for www usage)
1386 goto inSingleLineComment
;
1388 if (m_current
== '<') {
1390 if (m_current
== '=') {
1392 token
= LSHIFTEQUAL
;
1398 if (m_current
== '=') {
1405 case CharacterExclamationMark
:
1407 if (m_current
== '=') {
1409 if (m_current
== '=') {
1417 token
= EXCLAMATION
;
1421 if (m_current
== '+') {
1423 token
= (!m_terminator
) ? PLUSPLUS
: AUTOPLUSPLUS
;
1426 if (m_current
== '=') {
1435 if (m_current
== '-') {
1437 if (m_atLineStart
&& m_current
== '>') {
1439 goto inSingleLineComment
;
1441 token
= (!m_terminator
) ? MINUSMINUS
: AUTOMINUSMINUS
;
1444 if (m_current
== '=') {
1451 case CharacterMultiply
:
1453 if (m_current
== '=') {
1460 case CharacterSlash
:
1462 if (m_current
== '/') {
1464 goto inSingleLineComment
;
1466 if (m_current
== '*') {
1468 if (parseMultilineComment())
1470 m_lexErrorMessage
= "Multiline comment was not closed properly";
1471 token
= UNTERMINATED_MULTILINE_COMMENT_ERRORTOK
;
1474 if (m_current
== '=') {
1483 if (m_current
== '&') {
1488 if (m_current
== '=') {
1497 if (m_current
== '=') {
1504 case CharacterModulo
:
1506 if (m_current
== '=') {
1515 if (m_current
== '=') {
1520 if (m_current
== '|') {
1527 case CharacterOpenParen
:
1531 case CharacterCloseParen
:
1535 case CharacterOpenBracket
:
1536 token
= OPENBRACKET
;
1539 case CharacterCloseBracket
:
1540 token
= CLOSEBRACKET
;
1543 case CharacterComma
:
1547 case CharacterColon
:
1551 case CharacterQuestion
:
1555 case CharacterTilde
:
1559 case CharacterSemicolon
:
1563 case CharacterOpenBrace
:
1564 tokenData
->line
= lineNumber();
1565 tokenData
->offset
= currentOffset();
1566 tokenData
->lineStartOffset
= currentLineStartOffset();
1567 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1571 case CharacterCloseBrace
:
1572 tokenData
->line
= lineNumber();
1573 tokenData
->offset
= currentOffset();
1574 tokenData
->lineStartOffset
= currentLineStartOffset();
1575 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1581 if (!isASCIIDigit(m_current
)) {
1585 goto inNumberAfterDecimalPoint
;
1588 if ((m_current
| 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1589 parseHex(tokenData
->doubleValue
);
1593 if (isASCIIOctalDigit(m_current
)) {
1594 if (parseOctal(tokenData
->doubleValue
)) {
1596 m_lexErrorMessage
= "Octal escapes are forbidden in strict mode";
1597 token
= INVALID_OCTAL_NUMBER_ERRORTOK
;
1604 // Fall through into CharacterNumber
1605 case CharacterNumber
:
1606 if (LIKELY(token
!= NUMBER
)) {
1607 if (!parseDecimal(tokenData
->doubleValue
)) {
1608 if (m_current
== '.') {
1610 inNumberAfterDecimalPoint
:
1611 parseNumberAfterDecimalPoint();
1613 if ((m_current
| 0x20) == 'e') {
1614 if (!parseNumberAfterExponentIndicator()) {
1615 m_lexErrorMessage
= "Non-number found after exponent indicator";
1616 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
1620 size_t parsedLength
;
1621 tokenData
->doubleValue
= parseDouble(m_buffer8
.data(), m_buffer8
.size(), parsedLength
);
1626 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1627 if (UNLIKELY(isIdentStart(m_current
))) {
1628 m_lexErrorMessage
= "At least one digit must occur after a decimal point";
1629 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
1632 m_buffer8
.resize(0);
1634 case CharacterQuote
:
1635 if (lexerFlags
& LexerFlagsDontBuildStrings
) {
1636 StringParseResult result
= parseString
<false>(tokenData
, strictMode
);
1637 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
1638 token
= result
== StringUnterminated
? UNTERMINATED_STRING_LITERAL_ERRORTOK
: INVALID_STRING_LITERAL_ERRORTOK
;
1642 StringParseResult result
= parseString
<true>(tokenData
, strictMode
);
1643 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
1644 token
= result
== StringUnterminated
? UNTERMINATED_STRING_LITERAL_ERRORTOK
: INVALID_STRING_LITERAL_ERRORTOK
;
1651 case CharacterIdentifierStart
:
1652 ASSERT(isIdentStart(m_current
));
1653 // Fall through into CharacterBackSlash.
1654 case CharacterBackSlash
:
1655 if (lexerFlags
& LexexFlagsDontBuildKeywords
)
1656 token
= parseIdentifier
<false>(tokenData
, lexerFlags
, strictMode
);
1658 token
= parseIdentifier
<true>(tokenData
, lexerFlags
, strictMode
);
1660 case CharacterLineTerminator
:
1661 ASSERT(isLineTerminator(m_current
));
1662 shiftLineTerminator();
1663 m_atLineStart
= true;
1664 m_terminator
= true;
1665 m_lineStart
= m_code
;
1667 case CharacterInvalid
:
1668 m_lexErrorMessage
= invalidCharacterMessage();
1672 RELEASE_ASSERT_NOT_REACHED();
1673 m_lexErrorMessage
= "Internal Error";
1678 m_atLineStart
= false;
1681 inSingleLineComment
:
1682 while (!isLineTerminator(m_current
)) {
1687 shiftLineTerminator();
1688 m_atLineStart
= true;
1689 m_terminator
= true;
1690 m_lineStart
= m_code
;
1691 if (!lastTokenWasRestrKeyword())
1695 // Fall through into returnToken.
1698 tokenLocation
->line
= m_lineNumber
;
1699 tokenLocation
->endOffset
= currentOffset();
1700 tokenLocation
->lineStartOffset
= currentLineStartOffset();
1701 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
1702 m_lastToken
= token
;
1707 tokenLocation
->line
= m_lineNumber
;
1708 tokenLocation
->endOffset
= currentOffset();
1709 tokenLocation
->lineStartOffset
= currentLineStartOffset();
1710 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
1711 RELEASE_ASSERT(token
& ErrorTokenFlag
);
1715 template <typename T
>
1716 static inline void orCharacter(UChar
&, UChar
);
1719 inline void orCharacter
<LChar
>(UChar
&, UChar
) { }
1722 inline void orCharacter
<UChar
>(UChar
& orAccumulator
, UChar character
)
1724 orAccumulator
|= character
;
1727 template <typename T
>
1728 bool Lexer
<T
>::scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
)
1730 ASSERT(m_buffer16
.isEmpty());
1732 bool lastWasEscape
= false;
1733 bool inBrackets
= false;
1734 UChar charactersOredTogether
= 0;
1736 if (patternPrefix
) {
1737 ASSERT(!isLineTerminator(patternPrefix
));
1738 ASSERT(patternPrefix
!= '/');
1739 ASSERT(patternPrefix
!= '[');
1740 record16(patternPrefix
);
1744 if (isLineTerminator(m_current
) || atEnd()) {
1745 m_buffer16
.resize(0);
1753 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
1757 orCharacter
<T
>(charactersOredTogether
, prev
);
1759 if (lastWasEscape
) {
1760 lastWasEscape
= false;
1772 lastWasEscape
= true;
1777 pattern
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
1779 m_buffer16
.resize(0);
1780 charactersOredTogether
= 0;
1782 while (isIdentPart(m_current
)) {
1783 record16(m_current
);
1784 orCharacter
<T
>(charactersOredTogether
, m_current
);
1788 flags
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
1789 m_buffer16
.resize(0);
1794 template <typename T
>
1795 bool Lexer
<T
>::skipRegExp()
1797 bool lastWasEscape
= false;
1798 bool inBrackets
= false;
1801 if (isLineTerminator(m_current
) || atEnd())
1808 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
1811 if (lastWasEscape
) {
1812 lastWasEscape
= false;
1824 lastWasEscape
= true;
1829 while (isIdentPart(m_current
))
1835 template <typename T
>
1836 void Lexer
<T
>::clear()
1840 Vector
<LChar
> newBuffer8
;
1841 m_buffer8
.swap(newBuffer8
);
1843 Vector
<UChar
> newBuffer16
;
1844 m_buffer16
.swap(newBuffer16
);
1846 m_isReparsing
= false;
1849 template <typename T
>
1850 SourceCode Lexer
<T
>::sourceCode(int openBrace
, int closeBrace
, int firstLine
, unsigned startColumn
)
1852 ASSERT(m_source
->provider()->source()[openBrace
] == '{');
1853 ASSERT(m_source
->provider()->source()[closeBrace
] == '}');
1854 return SourceCode(m_source
->provider(), openBrace
, closeBrace
+ 1, firstLine
, startColumn
);
1857 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1858 template class Lexer
<LChar
>;
1859 template class Lexer
<UChar
>;