2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
28 #include "JSFunctionInlines.h"
30 #include "BuiltinNames.h"
31 #include "JSGlobalObjectFunctions.h"
32 #include "Identifier.h"
35 #include "JSCInlines.h"
40 #include <wtf/Assertions.h>
42 #include "KeywordLookup.h"
43 #include "Lexer.lut.h"
48 Keywords::Keywords(VM
& vm
)
50 , m_keywordTable(JSC::mainTable
)
55 // Types for the main switch
57 // The first three types are fixed, and also used for identifying
58 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
59 CharacterIdentifierStart
,
64 CharacterLineTerminator
,
65 CharacterExclamationMark
,
69 CharacterCloseBracket
,
93 // Other types (only one so far)
95 CharacterPrivateIdentifierStart
99 static const unsigned short typesOfLatin1Characters
[256] = {
100 /* 0 - Null */ CharacterInvalid
,
101 /* 1 - Start of Heading */ CharacterInvalid
,
102 /* 2 - Start of Text */ CharacterInvalid
,
103 /* 3 - End of Text */ CharacterInvalid
,
104 /* 4 - End of Transm. */ CharacterInvalid
,
105 /* 5 - Enquiry */ CharacterInvalid
,
106 /* 6 - Acknowledgment */ CharacterInvalid
,
107 /* 7 - Bell */ CharacterInvalid
,
108 /* 8 - Back Space */ CharacterInvalid
,
109 /* 9 - Horizontal Tab */ CharacterWhiteSpace
,
110 /* 10 - Line Feed */ CharacterLineTerminator
,
111 /* 11 - Vertical Tab */ CharacterWhiteSpace
,
112 /* 12 - Form Feed */ CharacterWhiteSpace
,
113 /* 13 - Carriage Return */ CharacterLineTerminator
,
114 /* 14 - Shift Out */ CharacterInvalid
,
115 /* 15 - Shift In */ CharacterInvalid
,
116 /* 16 - Data Line Escape */ CharacterInvalid
,
117 /* 17 - Device Control 1 */ CharacterInvalid
,
118 /* 18 - Device Control 2 */ CharacterInvalid
,
119 /* 19 - Device Control 3 */ CharacterInvalid
,
120 /* 20 - Device Control 4 */ CharacterInvalid
,
121 /* 21 - Negative Ack. */ CharacterInvalid
,
122 /* 22 - Synchronous Idle */ CharacterInvalid
,
123 /* 23 - End of Transmit */ CharacterInvalid
,
124 /* 24 - Cancel */ CharacterInvalid
,
125 /* 25 - End of Medium */ CharacterInvalid
,
126 /* 26 - Substitute */ CharacterInvalid
,
127 /* 27 - Escape */ CharacterInvalid
,
128 /* 28 - File Separator */ CharacterInvalid
,
129 /* 29 - Group Separator */ CharacterInvalid
,
130 /* 30 - Record Separator */ CharacterInvalid
,
131 /* 31 - Unit Separator */ CharacterInvalid
,
132 /* 32 - Space */ CharacterWhiteSpace
,
133 /* 33 - ! */ CharacterExclamationMark
,
134 /* 34 - " */ CharacterQuote
,
135 /* 35 - # */ CharacterInvalid
,
136 /* 36 - $ */ CharacterIdentifierStart
,
137 /* 37 - % */ CharacterModulo
,
138 /* 38 - & */ CharacterAnd
,
139 /* 39 - ' */ CharacterQuote
,
140 /* 40 - ( */ CharacterOpenParen
,
141 /* 41 - ) */ CharacterCloseParen
,
142 /* 42 - * */ CharacterMultiply
,
143 /* 43 - + */ CharacterAdd
,
144 /* 44 - , */ CharacterComma
,
145 /* 45 - - */ CharacterSub
,
146 /* 46 - . */ CharacterDot
,
147 /* 47 - / */ CharacterSlash
,
148 /* 48 - 0 */ CharacterZero
,
149 /* 49 - 1 */ CharacterNumber
,
150 /* 50 - 2 */ CharacterNumber
,
151 /* 51 - 3 */ CharacterNumber
,
152 /* 52 - 4 */ CharacterNumber
,
153 /* 53 - 5 */ CharacterNumber
,
154 /* 54 - 6 */ CharacterNumber
,
155 /* 55 - 7 */ CharacterNumber
,
156 /* 56 - 8 */ CharacterNumber
,
157 /* 57 - 9 */ CharacterNumber
,
158 /* 58 - : */ CharacterColon
,
159 /* 59 - ; */ CharacterSemicolon
,
160 /* 60 - < */ CharacterLess
,
161 /* 61 - = */ CharacterEqual
,
162 /* 62 - > */ CharacterGreater
,
163 /* 63 - ? */ CharacterQuestion
,
164 /* 64 - @ */ CharacterPrivateIdentifierStart
,
165 /* 65 - A */ CharacterIdentifierStart
,
166 /* 66 - B */ CharacterIdentifierStart
,
167 /* 67 - C */ CharacterIdentifierStart
,
168 /* 68 - D */ CharacterIdentifierStart
,
169 /* 69 - E */ CharacterIdentifierStart
,
170 /* 70 - F */ CharacterIdentifierStart
,
171 /* 71 - G */ CharacterIdentifierStart
,
172 /* 72 - H */ CharacterIdentifierStart
,
173 /* 73 - I */ CharacterIdentifierStart
,
174 /* 74 - J */ CharacterIdentifierStart
,
175 /* 75 - K */ CharacterIdentifierStart
,
176 /* 76 - L */ CharacterIdentifierStart
,
177 /* 77 - M */ CharacterIdentifierStart
,
178 /* 78 - N */ CharacterIdentifierStart
,
179 /* 79 - O */ CharacterIdentifierStart
,
180 /* 80 - P */ CharacterIdentifierStart
,
181 /* 81 - Q */ CharacterIdentifierStart
,
182 /* 82 - R */ CharacterIdentifierStart
,
183 /* 83 - S */ CharacterIdentifierStart
,
184 /* 84 - T */ CharacterIdentifierStart
,
185 /* 85 - U */ CharacterIdentifierStart
,
186 /* 86 - V */ CharacterIdentifierStart
,
187 /* 87 - W */ CharacterIdentifierStart
,
188 /* 88 - X */ CharacterIdentifierStart
,
189 /* 89 - Y */ CharacterIdentifierStart
,
190 /* 90 - Z */ CharacterIdentifierStart
,
191 /* 91 - [ */ CharacterOpenBracket
,
192 /* 92 - \ */ CharacterBackSlash
,
193 /* 93 - ] */ CharacterCloseBracket
,
194 /* 94 - ^ */ CharacterXor
,
195 /* 95 - _ */ CharacterIdentifierStart
,
196 /* 96 - ` */ CharacterInvalid
,
197 /* 97 - a */ CharacterIdentifierStart
,
198 /* 98 - b */ CharacterIdentifierStart
,
199 /* 99 - c */ CharacterIdentifierStart
,
200 /* 100 - d */ CharacterIdentifierStart
,
201 /* 101 - e */ CharacterIdentifierStart
,
202 /* 102 - f */ CharacterIdentifierStart
,
203 /* 103 - g */ CharacterIdentifierStart
,
204 /* 104 - h */ CharacterIdentifierStart
,
205 /* 105 - i */ CharacterIdentifierStart
,
206 /* 106 - j */ CharacterIdentifierStart
,
207 /* 107 - k */ CharacterIdentifierStart
,
208 /* 108 - l */ CharacterIdentifierStart
,
209 /* 109 - m */ CharacterIdentifierStart
,
210 /* 110 - n */ CharacterIdentifierStart
,
211 /* 111 - o */ CharacterIdentifierStart
,
212 /* 112 - p */ CharacterIdentifierStart
,
213 /* 113 - q */ CharacterIdentifierStart
,
214 /* 114 - r */ CharacterIdentifierStart
,
215 /* 115 - s */ CharacterIdentifierStart
,
216 /* 116 - t */ CharacterIdentifierStart
,
217 /* 117 - u */ CharacterIdentifierStart
,
218 /* 118 - v */ CharacterIdentifierStart
,
219 /* 119 - w */ CharacterIdentifierStart
,
220 /* 120 - x */ CharacterIdentifierStart
,
221 /* 121 - y */ CharacterIdentifierStart
,
222 /* 122 - z */ CharacterIdentifierStart
,
223 /* 123 - { */ CharacterOpenBrace
,
224 /* 124 - | */ CharacterOr
,
225 /* 125 - } */ CharacterCloseBrace
,
226 /* 126 - ~ */ CharacterTilde
,
227 /* 127 - Delete */ CharacterInvalid
,
228 /* 128 - Cc category */ CharacterInvalid
,
229 /* 129 - Cc category */ CharacterInvalid
,
230 /* 130 - Cc category */ CharacterInvalid
,
231 /* 131 - Cc category */ CharacterInvalid
,
232 /* 132 - Cc category */ CharacterInvalid
,
233 /* 133 - Cc category */ CharacterInvalid
,
234 /* 134 - Cc category */ CharacterInvalid
,
235 /* 135 - Cc category */ CharacterInvalid
,
236 /* 136 - Cc category */ CharacterInvalid
,
237 /* 137 - Cc category */ CharacterInvalid
,
238 /* 138 - Cc category */ CharacterInvalid
,
239 /* 139 - Cc category */ CharacterInvalid
,
240 /* 140 - Cc category */ CharacterInvalid
,
241 /* 141 - Cc category */ CharacterInvalid
,
242 /* 142 - Cc category */ CharacterInvalid
,
243 /* 143 - Cc category */ CharacterInvalid
,
244 /* 144 - Cc category */ CharacterInvalid
,
245 /* 145 - Cc category */ CharacterInvalid
,
246 /* 146 - Cc category */ CharacterInvalid
,
247 /* 147 - Cc category */ CharacterInvalid
,
248 /* 148 - Cc category */ CharacterInvalid
,
249 /* 149 - Cc category */ CharacterInvalid
,
250 /* 150 - Cc category */ CharacterInvalid
,
251 /* 151 - Cc category */ CharacterInvalid
,
252 /* 152 - Cc category */ CharacterInvalid
,
253 /* 153 - Cc category */ CharacterInvalid
,
254 /* 154 - Cc category */ CharacterInvalid
,
255 /* 155 - Cc category */ CharacterInvalid
,
256 /* 156 - Cc category */ CharacterInvalid
,
257 /* 157 - Cc category */ CharacterInvalid
,
258 /* 158 - Cc category */ CharacterInvalid
,
259 /* 159 - Cc category */ CharacterInvalid
,
260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace
,
261 /* 161 - Po category */ CharacterInvalid
,
262 /* 162 - Sc category */ CharacterInvalid
,
263 /* 163 - Sc category */ CharacterInvalid
,
264 /* 164 - Sc category */ CharacterInvalid
,
265 /* 165 - Sc category */ CharacterInvalid
,
266 /* 166 - So category */ CharacterInvalid
,
267 /* 167 - So category */ CharacterInvalid
,
268 /* 168 - Sk category */ CharacterInvalid
,
269 /* 169 - So category */ CharacterInvalid
,
270 /* 170 - Ll category */ CharacterIdentifierStart
,
271 /* 171 - Pi category */ CharacterInvalid
,
272 /* 172 - Sm category */ CharacterInvalid
,
273 /* 173 - Cf category */ CharacterInvalid
,
274 /* 174 - So category */ CharacterInvalid
,
275 /* 175 - Sk category */ CharacterInvalid
,
276 /* 176 - So category */ CharacterInvalid
,
277 /* 177 - Sm category */ CharacterInvalid
,
278 /* 178 - No category */ CharacterInvalid
,
279 /* 179 - No category */ CharacterInvalid
,
280 /* 180 - Sk category */ CharacterInvalid
,
281 /* 181 - Ll category */ CharacterIdentifierStart
,
282 /* 182 - So category */ CharacterInvalid
,
283 /* 183 - Po category */ CharacterInvalid
,
284 /* 184 - Sk category */ CharacterInvalid
,
285 /* 185 - No category */ CharacterInvalid
,
286 /* 186 - Ll category */ CharacterIdentifierStart
,
287 /* 187 - Pf category */ CharacterInvalid
,
288 /* 188 - No category */ CharacterInvalid
,
289 /* 189 - No category */ CharacterInvalid
,
290 /* 190 - No category */ CharacterInvalid
,
291 /* 191 - Po category */ CharacterInvalid
,
292 /* 192 - Lu category */ CharacterIdentifierStart
,
293 /* 193 - Lu category */ CharacterIdentifierStart
,
294 /* 194 - Lu category */ CharacterIdentifierStart
,
295 /* 195 - Lu category */ CharacterIdentifierStart
,
296 /* 196 - Lu category */ CharacterIdentifierStart
,
297 /* 197 - Lu category */ CharacterIdentifierStart
,
298 /* 198 - Lu category */ CharacterIdentifierStart
,
299 /* 199 - Lu category */ CharacterIdentifierStart
,
300 /* 200 - Lu category */ CharacterIdentifierStart
,
301 /* 201 - Lu category */ CharacterIdentifierStart
,
302 /* 202 - Lu category */ CharacterIdentifierStart
,
303 /* 203 - Lu category */ CharacterIdentifierStart
,
304 /* 204 - Lu category */ CharacterIdentifierStart
,
305 /* 205 - Lu category */ CharacterIdentifierStart
,
306 /* 206 - Lu category */ CharacterIdentifierStart
,
307 /* 207 - Lu category */ CharacterIdentifierStart
,
308 /* 208 - Lu category */ CharacterIdentifierStart
,
309 /* 209 - Lu category */ CharacterIdentifierStart
,
310 /* 210 - Lu category */ CharacterIdentifierStart
,
311 /* 211 - Lu category */ CharacterIdentifierStart
,
312 /* 212 - Lu category */ CharacterIdentifierStart
,
313 /* 213 - Lu category */ CharacterIdentifierStart
,
314 /* 214 - Lu category */ CharacterIdentifierStart
,
315 /* 215 - Sm category */ CharacterInvalid
,
316 /* 216 - Lu category */ CharacterIdentifierStart
,
317 /* 217 - Lu category */ CharacterIdentifierStart
,
318 /* 218 - Lu category */ CharacterIdentifierStart
,
319 /* 219 - Lu category */ CharacterIdentifierStart
,
320 /* 220 - Lu category */ CharacterIdentifierStart
,
321 /* 221 - Lu category */ CharacterIdentifierStart
,
322 /* 222 - Lu category */ CharacterIdentifierStart
,
323 /* 223 - Ll category */ CharacterIdentifierStart
,
324 /* 224 - Ll category */ CharacterIdentifierStart
,
325 /* 225 - Ll category */ CharacterIdentifierStart
,
326 /* 226 - Ll category */ CharacterIdentifierStart
,
327 /* 227 - Ll category */ CharacterIdentifierStart
,
328 /* 228 - Ll category */ CharacterIdentifierStart
,
329 /* 229 - Ll category */ CharacterIdentifierStart
,
330 /* 230 - Ll category */ CharacterIdentifierStart
,
331 /* 231 - Ll category */ CharacterIdentifierStart
,
332 /* 232 - Ll category */ CharacterIdentifierStart
,
333 /* 233 - Ll category */ CharacterIdentifierStart
,
334 /* 234 - Ll category */ CharacterIdentifierStart
,
335 /* 235 - Ll category */ CharacterIdentifierStart
,
336 /* 236 - Ll category */ CharacterIdentifierStart
,
337 /* 237 - Ll category */ CharacterIdentifierStart
,
338 /* 238 - Ll category */ CharacterIdentifierStart
,
339 /* 239 - Ll category */ CharacterIdentifierStart
,
340 /* 240 - Ll category */ CharacterIdentifierStart
,
341 /* 241 - Ll category */ CharacterIdentifierStart
,
342 /* 242 - Ll category */ CharacterIdentifierStart
,
343 /* 243 - Ll category */ CharacterIdentifierStart
,
344 /* 244 - Ll category */ CharacterIdentifierStart
,
345 /* 245 - Ll category */ CharacterIdentifierStart
,
346 /* 246 - Ll category */ CharacterIdentifierStart
,
347 /* 247 - Sm category */ CharacterInvalid
,
348 /* 248 - Ll category */ CharacterIdentifierStart
,
349 /* 249 - Ll category */ CharacterIdentifierStart
,
350 /* 250 - Ll category */ CharacterIdentifierStart
,
351 /* 251 - Ll category */ CharacterIdentifierStart
,
352 /* 252 - Ll category */ CharacterIdentifierStart
,
353 /* 253 - Ll category */ CharacterIdentifierStart
,
354 /* 254 - Ll category */ CharacterIdentifierStart
,
355 /* 255 - Ll category */ CharacterIdentifierStart
358 // This table provides the character that results from \X where X is the index in the table beginning
359 // with SPACE. A table value of 0 means that more processing needs to be done.
360 static const LChar singleCharacterEscapeValuesForASCII
[128] = {
362 /* 1 - Start of Heading */ 0,
363 /* 2 - Start of Text */ 0,
364 /* 3 - End of Text */ 0,
365 /* 4 - End of Transm. */ 0,
367 /* 6 - Acknowledgment */ 0,
369 /* 8 - Back Space */ 0,
370 /* 9 - Horizontal Tab */ 0,
371 /* 10 - Line Feed */ 0,
372 /* 11 - Vertical Tab */ 0,
373 /* 12 - Form Feed */ 0,
374 /* 13 - Carriage Return */ 0,
375 /* 14 - Shift Out */ 0,
376 /* 15 - Shift In */ 0,
377 /* 16 - Data Line Escape */ 0,
378 /* 17 - Device Control 1 */ 0,
379 /* 18 - Device Control 2 */ 0,
380 /* 19 - Device Control 3 */ 0,
381 /* 20 - Device Control 4 */ 0,
382 /* 21 - Negative Ack. */ 0,
383 /* 22 - Synchronous Idle */ 0,
384 /* 23 - End of Transmit */ 0,
386 /* 25 - End of Medium */ 0,
387 /* 26 - Substitute */ 0,
389 /* 28 - File Separator */ 0,
390 /* 29 - Group Separator */ 0,
391 /* 30 - Record Separator */ 0,
392 /* 31 - Unit Separator */ 0,
393 /* 32 - Space */ ' ',
491 template <typename T
>
492 Lexer
<T
>::Lexer(VM
* vm
, JSParserStrictness strictness
)
493 : m_isReparsing(false)
495 , m_parsingBuiltinFunction(strictness
== JSParseBuiltin
)
499 template <typename T
>
504 template <typename T
>
505 String Lexer
<T
>::invalidCharacterMessage() const
509 return "Invalid character: '\\0'";
511 return "Invalid character: '\\n'";
513 return "Invalid character: '\\v'";
515 return "Invalid character: '\\r'";
517 return "Invalid character: '#'";
519 return "Invalid character: '@'";
521 return "Invalid character: '`'";
523 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current
)).impl();
527 template <typename T
>
528 ALWAYS_INLINE
const T
* Lexer
<T
>::currentSourcePtr() const
530 ASSERT(m_code
<= m_codeEnd
);
534 template <typename T
>
535 void Lexer
<T
>::setCode(const SourceCode
& source
, ParserArena
* arena
)
537 m_arena
= &arena
->identifierArena();
539 m_lineNumber
= source
.firstLine();
542 const String
& sourceString
= source
.provider()->source();
544 if (!sourceString
.isNull())
545 setCodeStart(sourceString
.impl());
550 m_sourceOffset
= source
.startOffset();
551 m_codeStartPlusOffset
= m_codeStart
+ source
.startOffset();
552 m_code
= m_codeStartPlusOffset
;
553 m_codeEnd
= m_codeStart
+ source
.endOffset();
555 m_atLineStart
= true;
556 m_lineStart
= m_code
;
557 m_lexErrorMessage
= String();
559 m_buffer8
.reserveInitialCapacity(initialReadBufferCapacity
);
560 m_buffer16
.reserveInitialCapacity((m_codeEnd
- m_code
) / 2);
562 if (LIKELY(m_code
< m_codeEnd
))
566 ASSERT(currentOffset() == source
.startOffset());
569 template <typename T
>
570 template <int shiftAmount
> ALWAYS_INLINE
void Lexer
<T
>::internalShift()
572 m_code
+= shiftAmount
;
573 ASSERT(currentOffset() >= currentLineStartOffset());
577 template <typename T
>
578 ALWAYS_INLINE
void Lexer
<T
>::shift()
580 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
583 if (LIKELY(m_code
< m_codeEnd
))
587 template <typename T
>
588 ALWAYS_INLINE
bool Lexer
<T
>::atEnd() const
590 ASSERT(!m_current
|| m_code
< m_codeEnd
);
591 return UNLIKELY(UNLIKELY(!m_current
) && m_code
== m_codeEnd
);
594 template <typename T
>
595 ALWAYS_INLINE T Lexer
<T
>::peek(int offset
) const
597 ASSERT(offset
> 0 && offset
< 5);
598 const T
* code
= m_code
+ offset
;
599 return (code
< m_codeEnd
) ? *code
: 0;
602 template <typename T
>
603 typename Lexer
<T
>::UnicodeHexValue Lexer
<T
>::parseFourDigitUnicodeHex()
609 if (UNLIKELY(!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(char1
) || !isASCIIHexDigit(char2
) || !isASCIIHexDigit(char3
)))
610 return UnicodeHexValue((m_code
+ 4) >= m_codeEnd
? UnicodeHexValue::IncompleteHex
: UnicodeHexValue::InvalidHex
);
612 int result
= convertUnicode(m_current
, char1
, char2
, char3
);
617 return UnicodeHexValue(result
);
620 template <typename T
>
621 void Lexer
<T
>::shiftLineTerminator()
623 ASSERT(isLineTerminator(m_current
));
625 m_positionBeforeLastNewline
= currentPosition();
629 // Allow both CRLF and LFCR.
630 if (prev
+ m_current
== '\n' + '\r')
636 template <typename T
>
637 ALWAYS_INLINE
bool Lexer
<T
>::lastTokenWasRestrKeyword() const
639 return m_lastToken
== CONTINUE
|| m_lastToken
== BREAK
|| m_lastToken
== RETURN
|| m_lastToken
== THROW
;
642 static NEVER_INLINE
bool isNonLatin1IdentStart(UChar c
)
644 return U_GET_GC_MASK(c
) & U_GC_L_MASK
;
647 static ALWAYS_INLINE
bool isLatin1(LChar
)
652 static ALWAYS_INLINE
bool isLatin1(UChar c
)
657 static inline bool isIdentStart(LChar c
)
659 return typesOfLatin1Characters
[c
] == CharacterIdentifierStart
;
662 static inline bool isIdentStart(UChar c
)
664 return isLatin1(c
) ? isIdentStart(static_cast<LChar
>(c
)) : isNonLatin1IdentStart(c
);
667 static NEVER_INLINE
bool isNonLatin1IdentPart(int c
)
669 return (U_GET_GC_MASK(c
) & (U_GC_L_MASK
| U_GC_MN_MASK
| U_GC_MC_MASK
| U_GC_ND_MASK
| U_GC_PC_MASK
)) || c
== 0x200C || c
== 0x200D;
672 static ALWAYS_INLINE
bool isIdentPart(LChar c
)
674 // Character types are divided into two groups depending on whether they can be part of an
675 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
676 // part of an identifier. (See the CharacterType definition for more details.)
677 return typesOfLatin1Characters
[c
] <= CharacterNumber
;
680 static ALWAYS_INLINE
bool isIdentPart(UChar c
)
682 return isLatin1(c
) ? isIdentPart(static_cast<LChar
>(c
)) : isNonLatin1IdentPart(c
);
685 static inline LChar
singleEscape(int c
)
688 ASSERT(static_cast<size_t>(c
) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII
));
689 return singleCharacterEscapeValuesForASCII
[c
];
694 template <typename T
>
695 inline void Lexer
<T
>::record8(int c
)
699 m_buffer8
.append(static_cast<LChar
>(c
));
702 template <typename T
>
703 inline void assertCharIsIn8BitRange(T c
)
711 inline void assertCharIsIn8BitRange(UChar c
)
718 inline void assertCharIsIn8BitRange(LChar
)
722 template <typename T
>
723 inline void Lexer
<T
>::append8(const T
* p
, size_t length
)
725 size_t currentSize
= m_buffer8
.size();
726 m_buffer8
.grow(currentSize
+ length
);
727 LChar
* rawBuffer
= m_buffer8
.data() + currentSize
;
729 for (size_t i
= 0; i
< length
; i
++) {
731 assertCharIsIn8BitRange(c
);
736 template <typename T
>
737 inline void Lexer
<T
>::append16(const LChar
* p
, size_t length
)
739 size_t currentSize
= m_buffer16
.size();
740 m_buffer16
.grow(currentSize
+ length
);
741 UChar
* rawBuffer
= m_buffer16
.data() + currentSize
;
743 for (size_t i
= 0; i
< length
; i
++)
747 template <typename T
>
748 inline void Lexer
<T
>::record16(T c
)
750 m_buffer16
.append(c
);
753 template <typename T
>
754 inline void Lexer
<T
>::record16(int c
)
757 ASSERT(c
<= static_cast<int>(USHRT_MAX
));
758 m_buffer16
.append(static_cast<UChar
>(c
));
762 bool isSafeBuiltinIdentifier(VM
& vm
, const Identifier
* ident
)
766 /* Just block any use of suspicious identifiers. This is intended to
767 * be used as a safety net while implementing builtins.
769 if (*ident
== vm
.propertyNames
->builtinNames().callPublicName())
771 if (*ident
== vm
.propertyNames
->builtinNames().applyPublicName())
773 if (*ident
== vm
.propertyNames
->eval
)
775 if (*ident
== vm
.propertyNames
->Function
)
782 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<LChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
784 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
785 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
786 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
787 if (keyword
!= IDENT
) {
788 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
789 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
793 bool isPrivateName
= m_current
== '@' && m_parsingBuiltinFunction
;
797 const LChar
* identifierStart
= currentSourcePtr();
798 unsigned identifierLineStart
= currentLineStartOffset();
800 while (isIdentPart(m_current
))
803 if (UNLIKELY(m_current
== '\\')) {
804 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
805 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
808 const Identifier
* ident
= 0;
810 if (shouldCreateIdentifier
|| m_parsingBuiltinFunction
) {
811 int identifierLength
= currentSourcePtr() - identifierStart
;
812 ident
= makeIdentifier(identifierStart
, identifierLength
);
813 if (m_parsingBuiltinFunction
) {
814 if (!isSafeBuiltinIdentifier(*m_vm
, ident
) && !isPrivateName
) {
815 m_lexErrorMessage
= makeString("The use of '", ident
->string(), "' is disallowed in builtin functions.");
819 ident
= m_vm
->propertyNames
->getPrivateName(*ident
);
820 else if (*ident
== m_vm
->propertyNames
->undefinedKeyword
)
821 tokenData
->ident
= &m_vm
->propertyNames
->undefinedPrivateName
;
823 return INVALID_PRIVATE_NAME_ERRORTOK
;
825 tokenData
->ident
= ident
;
827 tokenData
->ident
= 0;
829 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) && !isPrivateName
) {
830 ASSERT(shouldCreateIdentifier
);
831 if (remaining
< maxTokenLength
) {
832 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
833 ASSERT((remaining
< maxTokenLength
) || !entry
);
836 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
837 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
846 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<UChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
848 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
849 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
850 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
851 if (keyword
!= IDENT
) {
852 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
853 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
857 bool isPrivateName
= m_current
== '@' && m_parsingBuiltinFunction
;
861 const UChar
* identifierStart
= currentSourcePtr();
862 int identifierLineStart
= currentLineStartOffset();
864 UChar orAllChars
= 0;
866 while (isIdentPart(m_current
)) {
867 orAllChars
|= m_current
;
871 if (UNLIKELY(m_current
== '\\')) {
872 ASSERT(!isPrivateName
);
873 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
874 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
877 bool isAll8Bit
= false;
879 if (!(orAllChars
& ~0xff))
882 const Identifier
* ident
= 0;
884 if (shouldCreateIdentifier
|| m_parsingBuiltinFunction
) {
885 int identifierLength
= currentSourcePtr() - identifierStart
;
887 ident
= makeIdentifierLCharFromUChar(identifierStart
, identifierLength
);
889 ident
= makeIdentifier(identifierStart
, identifierLength
);
890 if (m_parsingBuiltinFunction
) {
891 if (!isSafeBuiltinIdentifier(*m_vm
, ident
) && !isPrivateName
) {
892 m_lexErrorMessage
= makeString("The use of '", ident
->string(), "' is disallowed in builtin functions.");
896 ident
= m_vm
->propertyNames
->getPrivateName(*ident
);
897 else if (*ident
== m_vm
->propertyNames
->undefinedKeyword
)
898 tokenData
->ident
= &m_vm
->propertyNames
->undefinedPrivateName
;
900 return INVALID_PRIVATE_NAME_ERRORTOK
;
902 tokenData
->ident
= ident
;
904 tokenData
->ident
= 0;
906 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) && !isPrivateName
) {
907 ASSERT(shouldCreateIdentifier
);
908 if (remaining
< maxTokenLength
) {
909 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
910 ASSERT((remaining
< maxTokenLength
) || !entry
);
913 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
914 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
922 template <typename T
>
923 template <bool shouldCreateIdentifier
> JSTokenType Lexer
<T
>::parseIdentifierSlowCase(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
925 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
926 const T
* identifierStart
= currentSourcePtr();
927 bool bufferRequired
= false;
930 if (LIKELY(isIdentPart(m_current
))) {
934 if (LIKELY(m_current
!= '\\'))
937 // \uXXXX unicode characters.
938 bufferRequired
= true;
939 if (identifierStart
!= currentSourcePtr())
940 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
942 if (UNLIKELY(m_current
!= 'u'))
943 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_ESCAPE_ERRORTOK
;
945 UnicodeHexValue character
= parseFourDigitUnicodeHex();
946 if (UNLIKELY(!character
.isValid()))
947 return character
.valueType() == UnicodeHexValue::IncompleteHex
? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
948 UChar ucharacter
= static_cast<UChar
>(character
.value());
949 if (UNLIKELY(m_buffer16
.size() ? !isIdentPart(ucharacter
) : !isIdentStart(ucharacter
)))
950 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
951 if (shouldCreateIdentifier
)
952 record16(ucharacter
);
953 identifierStart
= currentSourcePtr();
956 int identifierLength
;
957 const Identifier
* ident
= 0;
958 if (shouldCreateIdentifier
) {
959 if (!bufferRequired
) {
960 identifierLength
= currentSourcePtr() - identifierStart
;
961 ident
= makeIdentifier(identifierStart
, identifierLength
);
963 if (identifierStart
!= currentSourcePtr())
964 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
965 ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
968 tokenData
->ident
= ident
;
970 tokenData
->ident
= 0;
972 if (LIKELY(!bufferRequired
&& !(lexerFlags
& LexerFlagsIgnoreReservedWords
))) {
973 ASSERT(shouldCreateIdentifier
);
974 // Keywords must not be recognized if there was an \uXXXX in the identifier.
975 if (remaining
< maxTokenLength
) {
976 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
977 ASSERT((remaining
< maxTokenLength
) || !entry
);
980 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
981 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
986 m_buffer16
.resize(0);
990 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(LChar character
)
992 return character
< 0xE;
995 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(UChar character
)
997 return character
< 0xE || character
> 0xFF;
1000 template <typename T
>
1001 template <bool shouldBuildStrings
> ALWAYS_INLINE typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseString(JSTokenData
* tokenData
, bool strictMode
)
1003 int startingOffset
= currentOffset();
1004 int startingLineStartOffset
= currentLineStartOffset();
1005 int startingLineNumber
= lineNumber();
1006 T stringQuoteCharacter
= m_current
;
1009 const T
* stringStart
= currentSourcePtr();
1011 while (m_current
!= stringQuoteCharacter
) {
1012 if (UNLIKELY(m_current
== '\\')) {
1013 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1014 append8(stringStart
, currentSourcePtr() - stringStart
);
1017 LChar escape
= singleEscape(m_current
);
1019 // Most common escape sequences first
1021 if (shouldBuildStrings
)
1024 } else if (UNLIKELY(isLineTerminator(m_current
)))
1025 shiftLineTerminator();
1026 else if (m_current
== 'x') {
1028 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
1029 m_lexErrorMessage
= "\\x can only be followed by a hex character sequence";
1030 return (atEnd() || (isASCIIHexDigit(m_current
) && (m_code
+ 1 == m_codeEnd
))) ? StringUnterminated
: StringCannotBeParsed
;
1034 if (shouldBuildStrings
)
1035 record8(convertHex(prev
, m_current
));
1038 setOffset(startingOffset
, startingLineStartOffset
);
1039 setLineNumber(startingLineNumber
);
1040 m_buffer8
.resize(0);
1041 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
1043 stringStart
= currentSourcePtr();
1047 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current
))) {
1048 setOffset(startingOffset
, startingLineStartOffset
);
1049 setLineNumber(startingLineNumber
);
1050 m_buffer8
.resize(0);
1051 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
1057 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1058 append8(stringStart
, currentSourcePtr() - stringStart
);
1059 if (shouldBuildStrings
) {
1060 tokenData
->ident
= makeIdentifier(m_buffer8
.data(), m_buffer8
.size());
1061 m_buffer8
.resize(0);
1063 tokenData
->ident
= 0;
1065 return StringParsedSuccessfully
;
1068 template <typename T
>
1069 template <bool shouldBuildStrings
> typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseStringSlowCase(JSTokenData
* tokenData
, bool strictMode
)
1071 T stringQuoteCharacter
= m_current
;
1074 const T
* stringStart
= currentSourcePtr();
1076 while (m_current
!= stringQuoteCharacter
) {
1077 if (UNLIKELY(m_current
== '\\')) {
1078 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1079 append16(stringStart
, currentSourcePtr() - stringStart
);
1082 LChar escape
= singleEscape(m_current
);
1084 // Most common escape sequences first
1086 if (shouldBuildStrings
)
1089 } else if (UNLIKELY(isLineTerminator(m_current
)))
1090 shiftLineTerminator();
1091 else if (m_current
== 'x') {
1093 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
1094 m_lexErrorMessage
= "\\x can only be followed by a hex character sequence";
1095 return StringCannotBeParsed
;
1099 if (shouldBuildStrings
)
1100 record16(convertHex(prev
, m_current
));
1102 } else if (m_current
== 'u') {
1104 UnicodeHexValue character
= parseFourDigitUnicodeHex();
1105 if (character
.isValid()) {
1106 if (shouldBuildStrings
)
1107 record16(character
.value());
1108 } else if (m_current
== stringQuoteCharacter
) {
1109 if (shouldBuildStrings
)
1112 m_lexErrorMessage
= "\\u can only be followed by a Unicode character sequence";
1113 return character
.valueType() == UnicodeHexValue::IncompleteHex
? StringUnterminated
: StringCannotBeParsed
;
1115 } else if (strictMode
&& isASCIIDigit(m_current
)) {
1116 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1117 int character1
= m_current
;
1119 if (character1
!= '0' || isASCIIDigit(m_current
)) {
1120 m_lexErrorMessage
= "The only valid numeric escape in strict mode is '\\0'";
1121 return StringCannotBeParsed
;
1123 if (shouldBuildStrings
)
1125 } else if (!strictMode
&& isASCIIOctalDigit(m_current
)) {
1126 // Octal character sequences
1127 T character1
= m_current
;
1129 if (isASCIIOctalDigit(m_current
)) {
1130 // Two octal characters
1131 T character2
= m_current
;
1133 if (character1
>= '0' && character1
<= '3' && isASCIIOctalDigit(m_current
)) {
1134 if (shouldBuildStrings
)
1135 record16((character1
- '0') * 64 + (character2
- '0') * 8 + m_current
- '0');
1138 if (shouldBuildStrings
)
1139 record16((character1
- '0') * 8 + character2
- '0');
1142 if (shouldBuildStrings
)
1143 record16(character1
- '0');
1145 } else if (!atEnd()) {
1146 if (shouldBuildStrings
)
1147 record16(m_current
);
1150 m_lexErrorMessage
= "Unterminated string constant";
1151 return StringUnterminated
;
1154 stringStart
= currentSourcePtr();
1157 // Fast check for characters that require special handling.
1158 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1159 // as possible, and lets through all common ASCII characters.
1160 if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) {
1161 // New-line or end of input is not allowed
1162 if (atEnd() || isLineTerminator(m_current
)) {
1163 m_lexErrorMessage
= "Unexpected EOF";
1164 return atEnd() ? StringUnterminated
: StringCannotBeParsed
;
1166 // Anything else is just a normal character
1171 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1172 append16(stringStart
, currentSourcePtr() - stringStart
);
1173 if (shouldBuildStrings
)
1174 tokenData
->ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1176 tokenData
->ident
= 0;
1178 m_buffer16
.resize(0);
1179 return StringParsedSuccessfully
;
1182 template <typename T
>
1183 ALWAYS_INLINE
void Lexer
<T
>::parseHex(double& returnValue
)
1185 // Optimization: most hexadecimal values fit into 4 bytes.
1186 uint32_t hexValue
= 0;
1187 int maximumDigits
= 7;
1189 // Shift out the 'x' prefix.
1193 hexValue
= (hexValue
<< 4) + toASCIIHexValue(m_current
);
1196 } while (isASCIIHexDigit(m_current
) && maximumDigits
>= 0);
1198 if (maximumDigits
>= 0) {
1199 returnValue
= hexValue
;
1203 // No more place in the hexValue buffer.
1204 // The values are shifted out and placed into the m_buffer8 vector.
1205 for (int i
= 0; i
< 8; ++i
) {
1206 int digit
= hexValue
>> 28;
1208 record8(digit
+ '0');
1210 record8(digit
- 10 + 'a');
1214 while (isASCIIHexDigit(m_current
)) {
1219 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 16);
1222 template <typename T
>
1223 ALWAYS_INLINE
bool Lexer
<T
>::parseOctal(double& returnValue
)
1225 // Optimization: most octal values fit into 4 bytes.
1226 uint32_t octalValue
= 0;
1227 int maximumDigits
= 9;
1228 // Temporary buffer for the digits. Makes easier
1229 // to reconstruct the input characters when needed.
1233 octalValue
= octalValue
* 8 + (m_current
- '0');
1234 digits
[maximumDigits
] = m_current
;
1237 } while (isASCIIOctalDigit(m_current
) && maximumDigits
>= 0);
1239 if (!isASCIIDigit(m_current
) && maximumDigits
>= 0) {
1240 returnValue
= octalValue
;
1244 for (int i
= 9; i
> maximumDigits
; --i
)
1247 while (isASCIIOctalDigit(m_current
)) {
1252 if (isASCIIDigit(m_current
))
1255 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 8);
1259 template <typename T
>
1260 ALWAYS_INLINE
bool Lexer
<T
>::parseDecimal(double& returnValue
)
1262 // Optimization: most decimal values fit into 4 bytes.
1263 uint32_t decimalValue
= 0;
1265 // Since parseOctal may be executed before parseDecimal,
1266 // the m_buffer8 may hold ascii digits.
1267 if (!m_buffer8
.size()) {
1268 int maximumDigits
= 9;
1269 // Temporary buffer for the digits. Makes easier
1270 // to reconstruct the input characters when needed.
1274 decimalValue
= decimalValue
* 10 + (m_current
- '0');
1275 digits
[maximumDigits
] = m_current
;
1278 } while (isASCIIDigit(m_current
) && maximumDigits
>= 0);
1280 if (maximumDigits
>= 0 && m_current
!= '.' && (m_current
| 0x20) != 'e') {
1281 returnValue
= decimalValue
;
1285 for (int i
= 9; i
> maximumDigits
; --i
)
1289 while (isASCIIDigit(m_current
)) {
1297 template <typename T
>
1298 ALWAYS_INLINE
void Lexer
<T
>::parseNumberAfterDecimalPoint()
1301 while (isASCIIDigit(m_current
)) {
1307 template <typename T
>
1308 ALWAYS_INLINE
bool Lexer
<T
>::parseNumberAfterExponentIndicator()
1312 if (m_current
== '+' || m_current
== '-') {
1317 if (!isASCIIDigit(m_current
))
1323 } while (isASCIIDigit(m_current
));
1327 template <typename T
>
1328 ALWAYS_INLINE
bool Lexer
<T
>::parseMultilineComment()
1331 while (UNLIKELY(m_current
== '*')) {
1333 if (m_current
== '/') {
1342 if (isLineTerminator(m_current
)) {
1343 shiftLineTerminator();
1344 m_terminator
= true;
1350 template <typename T
>
1351 bool Lexer
<T
>::nextTokenIsColon()
1353 const T
* code
= m_code
;
1354 while (code
< m_codeEnd
&& (isWhiteSpace(*code
) || isLineTerminator(*code
)))
1357 return code
< m_codeEnd
&& *code
== ':';
1360 template <typename T
>
1361 JSTokenType Lexer
<T
>::lex(JSToken
* tokenRecord
, unsigned lexerFlags
, bool strictMode
)
1363 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
1364 JSTokenLocation
* tokenLocation
= &tokenRecord
->m_location
;
1366 ASSERT(m_buffer8
.isEmpty());
1367 ASSERT(m_buffer16
.isEmpty());
1369 JSTokenType token
= ERRORTOK
;
1370 m_terminator
= false;
1373 while (isWhiteSpace(m_current
))
1379 tokenLocation
->startOffset
= currentOffset();
1380 ASSERT(currentOffset() >= currentLineStartOffset());
1381 tokenRecord
->m_startPosition
= currentPosition();
1384 if (LIKELY(isLatin1(m_current
)))
1385 type
= static_cast<CharacterType
>(typesOfLatin1Characters
[m_current
]);
1386 else if (isNonLatin1IdentStart(m_current
))
1387 type
= CharacterIdentifierStart
;
1388 else if (isLineTerminator(m_current
))
1389 type
= CharacterLineTerminator
;
1391 type
= CharacterInvalid
;
1394 case CharacterGreater
:
1396 if (m_current
== '>') {
1398 if (m_current
== '>') {
1400 if (m_current
== '=') {
1402 token
= URSHIFTEQUAL
;
1408 if (m_current
== '=') {
1410 token
= RSHIFTEQUAL
;
1416 if (m_current
== '=') {
1423 case CharacterEqual
:
1425 if (m_current
== '=') {
1427 if (m_current
== '=') {
1439 if (m_current
== '!' && peek(1) == '-' && peek(2) == '-') {
1440 // <!-- marks the beginning of a line comment (for www usage)
1441 goto inSingleLineComment
;
1443 if (m_current
== '<') {
1445 if (m_current
== '=') {
1447 token
= LSHIFTEQUAL
;
1453 if (m_current
== '=') {
1460 case CharacterExclamationMark
:
1462 if (m_current
== '=') {
1464 if (m_current
== '=') {
1472 token
= EXCLAMATION
;
1476 if (m_current
== '+') {
1478 token
= (!m_terminator
) ? PLUSPLUS
: AUTOPLUSPLUS
;
1481 if (m_current
== '=') {
1490 if (m_current
== '-') {
1492 if (m_atLineStart
&& m_current
== '>') {
1494 goto inSingleLineComment
;
1496 token
= (!m_terminator
) ? MINUSMINUS
: AUTOMINUSMINUS
;
1499 if (m_current
== '=') {
1506 case CharacterMultiply
:
1508 if (m_current
== '=') {
1515 case CharacterSlash
:
1517 if (m_current
== '/') {
1519 goto inSingleLineComment
;
1521 if (m_current
== '*') {
1523 if (parseMultilineComment())
1525 m_lexErrorMessage
= "Multiline comment was not closed properly";
1526 token
= UNTERMINATED_MULTILINE_COMMENT_ERRORTOK
;
1529 if (m_current
== '=') {
1538 if (m_current
== '&') {
1543 if (m_current
== '=') {
1552 if (m_current
== '=') {
1559 case CharacterModulo
:
1561 if (m_current
== '=') {
1570 if (m_current
== '=') {
1575 if (m_current
== '|') {
1582 case CharacterOpenParen
:
1586 case CharacterCloseParen
:
1590 case CharacterOpenBracket
:
1591 token
= OPENBRACKET
;
1594 case CharacterCloseBracket
:
1595 token
= CLOSEBRACKET
;
1598 case CharacterComma
:
1602 case CharacterColon
:
1606 case CharacterQuestion
:
1610 case CharacterTilde
:
1614 case CharacterSemicolon
:
1618 case CharacterOpenBrace
:
1619 tokenData
->line
= lineNumber();
1620 tokenData
->offset
= currentOffset();
1621 tokenData
->lineStartOffset
= currentLineStartOffset();
1622 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1626 case CharacterCloseBrace
:
1627 tokenData
->line
= lineNumber();
1628 tokenData
->offset
= currentOffset();
1629 tokenData
->lineStartOffset
= currentLineStartOffset();
1630 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1636 if (!isASCIIDigit(m_current
)) {
1637 if (UNLIKELY((m_current
== '.') && (peek(1) == '.'))) {
1646 goto inNumberAfterDecimalPoint
;
1649 if ((m_current
| 0x20) == 'x') {
1650 if (!isASCIIHexDigit(peek(1))) {
1651 m_lexErrorMessage
= "No hexadecimal digits after '0x'";
1652 token
= INVALID_HEX_NUMBER_ERRORTOK
;
1655 parseHex(tokenData
->doubleValue
);
1656 if (isIdentStart(m_current
)) {
1657 m_lexErrorMessage
= "No space between hexadecimal literal and identifier";
1658 token
= INVALID_HEX_NUMBER_ERRORTOK
;
1662 m_buffer8
.resize(0);
1667 if (isASCIIOctalDigit(m_current
)) {
1668 if (parseOctal(tokenData
->doubleValue
)) {
1670 m_lexErrorMessage
= "Octal escapes are forbidden in strict mode";
1671 token
= INVALID_OCTAL_NUMBER_ERRORTOK
;
1678 case CharacterNumber
:
1679 if (LIKELY(token
!= NUMBER
)) {
1680 if (!parseDecimal(tokenData
->doubleValue
)) {
1681 if (m_current
== '.') {
1683 inNumberAfterDecimalPoint
:
1684 parseNumberAfterDecimalPoint();
1686 if ((m_current
| 0x20) == 'e') {
1687 if (!parseNumberAfterExponentIndicator()) {
1688 m_lexErrorMessage
= "Non-number found after exponent indicator";
1689 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
1693 size_t parsedLength
;
1694 tokenData
->doubleValue
= parseDouble(m_buffer8
.data(), m_buffer8
.size(), parsedLength
);
1699 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1700 if (UNLIKELY(isIdentStart(m_current
))) {
1701 m_lexErrorMessage
= "At least one digit must occur after a decimal point";
1702 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
1705 m_buffer8
.resize(0);
1707 case CharacterQuote
:
1708 if (lexerFlags
& LexerFlagsDontBuildStrings
) {
1709 StringParseResult result
= parseString
<false>(tokenData
, strictMode
);
1710 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
1711 token
= result
== StringUnterminated
? UNTERMINATED_STRING_LITERAL_ERRORTOK
: INVALID_STRING_LITERAL_ERRORTOK
;
1715 StringParseResult result
= parseString
<true>(tokenData
, strictMode
);
1716 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
1717 token
= result
== StringUnterminated
? UNTERMINATED_STRING_LITERAL_ERRORTOK
: INVALID_STRING_LITERAL_ERRORTOK
;
1724 case CharacterIdentifierStart
:
1725 ASSERT(isIdentStart(m_current
));
1727 case CharacterBackSlash
:
1729 if (lexerFlags
& LexexFlagsDontBuildKeywords
)
1730 token
= parseIdentifier
<false>(tokenData
, lexerFlags
, strictMode
);
1732 token
= parseIdentifier
<true>(tokenData
, lexerFlags
, strictMode
);
1734 case CharacterLineTerminator
:
1735 ASSERT(isLineTerminator(m_current
));
1736 shiftLineTerminator();
1737 m_atLineStart
= true;
1738 m_terminator
= true;
1739 m_lineStart
= m_code
;
1741 case CharacterPrivateIdentifierStart
:
1742 if (m_parsingBuiltinFunction
)
1746 case CharacterInvalid
:
1747 m_lexErrorMessage
= invalidCharacterMessage();
1751 RELEASE_ASSERT_NOT_REACHED();
1752 m_lexErrorMessage
= "Internal Error";
1757 m_atLineStart
= false;
1760 inSingleLineComment
:
1761 while (!isLineTerminator(m_current
)) {
1766 shiftLineTerminator();
1767 m_atLineStart
= true;
1768 m_terminator
= true;
1769 m_lineStart
= m_code
;
1770 if (!lastTokenWasRestrKeyword())
1774 // Fall through into returnToken.
1777 tokenLocation
->line
= m_lineNumber
;
1778 tokenLocation
->endOffset
= currentOffset();
1779 tokenLocation
->lineStartOffset
= currentLineStartOffset();
1780 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
1781 tokenRecord
->m_endPosition
= currentPosition();
1782 m_lastToken
= token
;
1787 tokenLocation
->line
= m_lineNumber
;
1788 tokenLocation
->endOffset
= currentOffset();
1789 tokenLocation
->lineStartOffset
= currentLineStartOffset();
1790 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
1791 tokenRecord
->m_endPosition
= currentPosition();
1792 RELEASE_ASSERT(token
& ErrorTokenFlag
);
1796 template <typename T
>
1797 static inline void orCharacter(UChar
&, UChar
);
1800 inline void orCharacter
<LChar
>(UChar
&, UChar
) { }
1803 inline void orCharacter
<UChar
>(UChar
& orAccumulator
, UChar character
)
1805 orAccumulator
|= character
;
1808 template <typename T
>
1809 bool Lexer
<T
>::scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
)
1811 ASSERT(m_buffer16
.isEmpty());
1813 bool lastWasEscape
= false;
1814 bool inBrackets
= false;
1815 UChar charactersOredTogether
= 0;
1817 if (patternPrefix
) {
1818 ASSERT(!isLineTerminator(patternPrefix
));
1819 ASSERT(patternPrefix
!= '/');
1820 ASSERT(patternPrefix
!= '[');
1821 record16(patternPrefix
);
1825 if (isLineTerminator(m_current
) || atEnd()) {
1826 m_buffer16
.resize(0);
1834 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
1838 orCharacter
<T
>(charactersOredTogether
, prev
);
1840 if (lastWasEscape
) {
1841 lastWasEscape
= false;
1853 lastWasEscape
= true;
1858 pattern
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
1860 m_buffer16
.resize(0);
1861 charactersOredTogether
= 0;
1863 while (isIdentPart(m_current
)) {
1864 record16(m_current
);
1865 orCharacter
<T
>(charactersOredTogether
, m_current
);
1869 flags
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
1870 m_buffer16
.resize(0);
1875 template <typename T
>
1876 bool Lexer
<T
>::skipRegExp()
1878 bool lastWasEscape
= false;
1879 bool inBrackets
= false;
1882 if (isLineTerminator(m_current
) || atEnd())
1889 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
1892 if (lastWasEscape
) {
1893 lastWasEscape
= false;
1905 lastWasEscape
= true;
1910 while (isIdentPart(m_current
))
1916 template <typename T
>
1917 void Lexer
<T
>::clear()
1921 Vector
<LChar
> newBuffer8
;
1922 m_buffer8
.swap(newBuffer8
);
1924 Vector
<UChar
> newBuffer16
;
1925 m_buffer16
.swap(newBuffer16
);
1927 m_isReparsing
= false;
1930 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1931 template class Lexer
<LChar
>;
1932 template class Lexer
<UChar
>;