2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
28 #include "JSFunctionInlines.h"
30 #include "BuiltinNames.h"
31 #include "JSGlobalObjectFunctions.h"
32 #include "Identifier.h"
34 #include "JSCInlines.h"
39 #include <wtf/Assertions.h>
41 #include "KeywordLookup.h"
42 #include "Lexer.lut.h"
47 Keywords::Keywords(VM
& vm
)
49 , m_keywordTable(JSC::mainTable
)
54 // Types for the main switch
56 // The first three types are fixed, and also used for identifying
57 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
58 CharacterIdentifierStart
,
63 CharacterLineTerminator
,
64 CharacterExclamationMark
,
68 CharacterCloseBracket
,
93 // Other types (only one so far)
95 CharacterPrivateIdentifierStart
99 static const unsigned short typesOfLatin1Characters
[256] = {
100 /* 0 - Null */ CharacterInvalid
,
101 /* 1 - Start of Heading */ CharacterInvalid
,
102 /* 2 - Start of Text */ CharacterInvalid
,
103 /* 3 - End of Text */ CharacterInvalid
,
104 /* 4 - End of Transm. */ CharacterInvalid
,
105 /* 5 - Enquiry */ CharacterInvalid
,
106 /* 6 - Acknowledgment */ CharacterInvalid
,
107 /* 7 - Bell */ CharacterInvalid
,
108 /* 8 - Back Space */ CharacterInvalid
,
109 /* 9 - Horizontal Tab */ CharacterWhiteSpace
,
110 /* 10 - Line Feed */ CharacterLineTerminator
,
111 /* 11 - Vertical Tab */ CharacterWhiteSpace
,
112 /* 12 - Form Feed */ CharacterWhiteSpace
,
113 /* 13 - Carriage Return */ CharacterLineTerminator
,
114 /* 14 - Shift Out */ CharacterInvalid
,
115 /* 15 - Shift In */ CharacterInvalid
,
116 /* 16 - Data Line Escape */ CharacterInvalid
,
117 /* 17 - Device Control 1 */ CharacterInvalid
,
118 /* 18 - Device Control 2 */ CharacterInvalid
,
119 /* 19 - Device Control 3 */ CharacterInvalid
,
120 /* 20 - Device Control 4 */ CharacterInvalid
,
121 /* 21 - Negative Ack. */ CharacterInvalid
,
122 /* 22 - Synchronous Idle */ CharacterInvalid
,
123 /* 23 - End of Transmit */ CharacterInvalid
,
124 /* 24 - Cancel */ CharacterInvalid
,
125 /* 25 - End of Medium */ CharacterInvalid
,
126 /* 26 - Substitute */ CharacterInvalid
,
127 /* 27 - Escape */ CharacterInvalid
,
128 /* 28 - File Separator */ CharacterInvalid
,
129 /* 29 - Group Separator */ CharacterInvalid
,
130 /* 30 - Record Separator */ CharacterInvalid
,
131 /* 31 - Unit Separator */ CharacterInvalid
,
132 /* 32 - Space */ CharacterWhiteSpace
,
133 /* 33 - ! */ CharacterExclamationMark
,
134 /* 34 - " */ CharacterQuote
,
135 /* 35 - # */ CharacterInvalid
,
136 /* 36 - $ */ CharacterIdentifierStart
,
137 /* 37 - % */ CharacterModulo
,
138 /* 38 - & */ CharacterAnd
,
139 /* 39 - ' */ CharacterQuote
,
140 /* 40 - ( */ CharacterOpenParen
,
141 /* 41 - ) */ CharacterCloseParen
,
142 /* 42 - * */ CharacterMultiply
,
143 /* 43 - + */ CharacterAdd
,
144 /* 44 - , */ CharacterComma
,
145 /* 45 - - */ CharacterSub
,
146 /* 46 - . */ CharacterDot
,
147 /* 47 - / */ CharacterSlash
,
148 /* 48 - 0 */ CharacterZero
,
149 /* 49 - 1 */ CharacterNumber
,
150 /* 50 - 2 */ CharacterNumber
,
151 /* 51 - 3 */ CharacterNumber
,
152 /* 52 - 4 */ CharacterNumber
,
153 /* 53 - 5 */ CharacterNumber
,
154 /* 54 - 6 */ CharacterNumber
,
155 /* 55 - 7 */ CharacterNumber
,
156 /* 56 - 8 */ CharacterNumber
,
157 /* 57 - 9 */ CharacterNumber
,
158 /* 58 - : */ CharacterColon
,
159 /* 59 - ; */ CharacterSemicolon
,
160 /* 60 - < */ CharacterLess
,
161 /* 61 - = */ CharacterEqual
,
162 /* 62 - > */ CharacterGreater
,
163 /* 63 - ? */ CharacterQuestion
,
164 /* 64 - @ */ CharacterPrivateIdentifierStart
,
165 /* 65 - A */ CharacterIdentifierStart
,
166 /* 66 - B */ CharacterIdentifierStart
,
167 /* 67 - C */ CharacterIdentifierStart
,
168 /* 68 - D */ CharacterIdentifierStart
,
169 /* 69 - E */ CharacterIdentifierStart
,
170 /* 70 - F */ CharacterIdentifierStart
,
171 /* 71 - G */ CharacterIdentifierStart
,
172 /* 72 - H */ CharacterIdentifierStart
,
173 /* 73 - I */ CharacterIdentifierStart
,
174 /* 74 - J */ CharacterIdentifierStart
,
175 /* 75 - K */ CharacterIdentifierStart
,
176 /* 76 - L */ CharacterIdentifierStart
,
177 /* 77 - M */ CharacterIdentifierStart
,
178 /* 78 - N */ CharacterIdentifierStart
,
179 /* 79 - O */ CharacterIdentifierStart
,
180 /* 80 - P */ CharacterIdentifierStart
,
181 /* 81 - Q */ CharacterIdentifierStart
,
182 /* 82 - R */ CharacterIdentifierStart
,
183 /* 83 - S */ CharacterIdentifierStart
,
184 /* 84 - T */ CharacterIdentifierStart
,
185 /* 85 - U */ CharacterIdentifierStart
,
186 /* 86 - V */ CharacterIdentifierStart
,
187 /* 87 - W */ CharacterIdentifierStart
,
188 /* 88 - X */ CharacterIdentifierStart
,
189 /* 89 - Y */ CharacterIdentifierStart
,
190 /* 90 - Z */ CharacterIdentifierStart
,
191 /* 91 - [ */ CharacterOpenBracket
,
192 /* 92 - \ */ CharacterBackSlash
,
193 /* 93 - ] */ CharacterCloseBracket
,
194 /* 94 - ^ */ CharacterXor
,
195 /* 95 - _ */ CharacterIdentifierStart
,
196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
197 /* 96 - ` */ CharacterBackQuote
,
199 /* 96 - ` */ CharacterInvalid
,
201 /* 97 - a */ CharacterIdentifierStart
,
202 /* 98 - b */ CharacterIdentifierStart
,
203 /* 99 - c */ CharacterIdentifierStart
,
204 /* 100 - d */ CharacterIdentifierStart
,
205 /* 101 - e */ CharacterIdentifierStart
,
206 /* 102 - f */ CharacterIdentifierStart
,
207 /* 103 - g */ CharacterIdentifierStart
,
208 /* 104 - h */ CharacterIdentifierStart
,
209 /* 105 - i */ CharacterIdentifierStart
,
210 /* 106 - j */ CharacterIdentifierStart
,
211 /* 107 - k */ CharacterIdentifierStart
,
212 /* 108 - l */ CharacterIdentifierStart
,
213 /* 109 - m */ CharacterIdentifierStart
,
214 /* 110 - n */ CharacterIdentifierStart
,
215 /* 111 - o */ CharacterIdentifierStart
,
216 /* 112 - p */ CharacterIdentifierStart
,
217 /* 113 - q */ CharacterIdentifierStart
,
218 /* 114 - r */ CharacterIdentifierStart
,
219 /* 115 - s */ CharacterIdentifierStart
,
220 /* 116 - t */ CharacterIdentifierStart
,
221 /* 117 - u */ CharacterIdentifierStart
,
222 /* 118 - v */ CharacterIdentifierStart
,
223 /* 119 - w */ CharacterIdentifierStart
,
224 /* 120 - x */ CharacterIdentifierStart
,
225 /* 121 - y */ CharacterIdentifierStart
,
226 /* 122 - z */ CharacterIdentifierStart
,
227 /* 123 - { */ CharacterOpenBrace
,
228 /* 124 - | */ CharacterOr
,
229 /* 125 - } */ CharacterCloseBrace
,
230 /* 126 - ~ */ CharacterTilde
,
231 /* 127 - Delete */ CharacterInvalid
,
232 /* 128 - Cc category */ CharacterInvalid
,
233 /* 129 - Cc category */ CharacterInvalid
,
234 /* 130 - Cc category */ CharacterInvalid
,
235 /* 131 - Cc category */ CharacterInvalid
,
236 /* 132 - Cc category */ CharacterInvalid
,
237 /* 133 - Cc category */ CharacterInvalid
,
238 /* 134 - Cc category */ CharacterInvalid
,
239 /* 135 - Cc category */ CharacterInvalid
,
240 /* 136 - Cc category */ CharacterInvalid
,
241 /* 137 - Cc category */ CharacterInvalid
,
242 /* 138 - Cc category */ CharacterInvalid
,
243 /* 139 - Cc category */ CharacterInvalid
,
244 /* 140 - Cc category */ CharacterInvalid
,
245 /* 141 - Cc category */ CharacterInvalid
,
246 /* 142 - Cc category */ CharacterInvalid
,
247 /* 143 - Cc category */ CharacterInvalid
,
248 /* 144 - Cc category */ CharacterInvalid
,
249 /* 145 - Cc category */ CharacterInvalid
,
250 /* 146 - Cc category */ CharacterInvalid
,
251 /* 147 - Cc category */ CharacterInvalid
,
252 /* 148 - Cc category */ CharacterInvalid
,
253 /* 149 - Cc category */ CharacterInvalid
,
254 /* 150 - Cc category */ CharacterInvalid
,
255 /* 151 - Cc category */ CharacterInvalid
,
256 /* 152 - Cc category */ CharacterInvalid
,
257 /* 153 - Cc category */ CharacterInvalid
,
258 /* 154 - Cc category */ CharacterInvalid
,
259 /* 155 - Cc category */ CharacterInvalid
,
260 /* 156 - Cc category */ CharacterInvalid
,
261 /* 157 - Cc category */ CharacterInvalid
,
262 /* 158 - Cc category */ CharacterInvalid
,
263 /* 159 - Cc category */ CharacterInvalid
,
264 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace
,
265 /* 161 - Po category */ CharacterInvalid
,
266 /* 162 - Sc category */ CharacterInvalid
,
267 /* 163 - Sc category */ CharacterInvalid
,
268 /* 164 - Sc category */ CharacterInvalid
,
269 /* 165 - Sc category */ CharacterInvalid
,
270 /* 166 - So category */ CharacterInvalid
,
271 /* 167 - So category */ CharacterInvalid
,
272 /* 168 - Sk category */ CharacterInvalid
,
273 /* 169 - So category */ CharacterInvalid
,
274 /* 170 - Ll category */ CharacterIdentifierStart
,
275 /* 171 - Pi category */ CharacterInvalid
,
276 /* 172 - Sm category */ CharacterInvalid
,
277 /* 173 - Cf category */ CharacterInvalid
,
278 /* 174 - So category */ CharacterInvalid
,
279 /* 175 - Sk category */ CharacterInvalid
,
280 /* 176 - So category */ CharacterInvalid
,
281 /* 177 - Sm category */ CharacterInvalid
,
282 /* 178 - No category */ CharacterInvalid
,
283 /* 179 - No category */ CharacterInvalid
,
284 /* 180 - Sk category */ CharacterInvalid
,
285 /* 181 - Ll category */ CharacterIdentifierStart
,
286 /* 182 - So category */ CharacterInvalid
,
287 /* 183 - Po category */ CharacterInvalid
,
288 /* 184 - Sk category */ CharacterInvalid
,
289 /* 185 - No category */ CharacterInvalid
,
290 /* 186 - Ll category */ CharacterIdentifierStart
,
291 /* 187 - Pf category */ CharacterInvalid
,
292 /* 188 - No category */ CharacterInvalid
,
293 /* 189 - No category */ CharacterInvalid
,
294 /* 190 - No category */ CharacterInvalid
,
295 /* 191 - Po category */ CharacterInvalid
,
296 /* 192 - Lu category */ CharacterIdentifierStart
,
297 /* 193 - Lu category */ CharacterIdentifierStart
,
298 /* 194 - Lu category */ CharacterIdentifierStart
,
299 /* 195 - Lu category */ CharacterIdentifierStart
,
300 /* 196 - Lu category */ CharacterIdentifierStart
,
301 /* 197 - Lu category */ CharacterIdentifierStart
,
302 /* 198 - Lu category */ CharacterIdentifierStart
,
303 /* 199 - Lu category */ CharacterIdentifierStart
,
304 /* 200 - Lu category */ CharacterIdentifierStart
,
305 /* 201 - Lu category */ CharacterIdentifierStart
,
306 /* 202 - Lu category */ CharacterIdentifierStart
,
307 /* 203 - Lu category */ CharacterIdentifierStart
,
308 /* 204 - Lu category */ CharacterIdentifierStart
,
309 /* 205 - Lu category */ CharacterIdentifierStart
,
310 /* 206 - Lu category */ CharacterIdentifierStart
,
311 /* 207 - Lu category */ CharacterIdentifierStart
,
312 /* 208 - Lu category */ CharacterIdentifierStart
,
313 /* 209 - Lu category */ CharacterIdentifierStart
,
314 /* 210 - Lu category */ CharacterIdentifierStart
,
315 /* 211 - Lu category */ CharacterIdentifierStart
,
316 /* 212 - Lu category */ CharacterIdentifierStart
,
317 /* 213 - Lu category */ CharacterIdentifierStart
,
318 /* 214 - Lu category */ CharacterIdentifierStart
,
319 /* 215 - Sm category */ CharacterInvalid
,
320 /* 216 - Lu category */ CharacterIdentifierStart
,
321 /* 217 - Lu category */ CharacterIdentifierStart
,
322 /* 218 - Lu category */ CharacterIdentifierStart
,
323 /* 219 - Lu category */ CharacterIdentifierStart
,
324 /* 220 - Lu category */ CharacterIdentifierStart
,
325 /* 221 - Lu category */ CharacterIdentifierStart
,
326 /* 222 - Lu category */ CharacterIdentifierStart
,
327 /* 223 - Ll category */ CharacterIdentifierStart
,
328 /* 224 - Ll category */ CharacterIdentifierStart
,
329 /* 225 - Ll category */ CharacterIdentifierStart
,
330 /* 226 - Ll category */ CharacterIdentifierStart
,
331 /* 227 - Ll category */ CharacterIdentifierStart
,
332 /* 228 - Ll category */ CharacterIdentifierStart
,
333 /* 229 - Ll category */ CharacterIdentifierStart
,
334 /* 230 - Ll category */ CharacterIdentifierStart
,
335 /* 231 - Ll category */ CharacterIdentifierStart
,
336 /* 232 - Ll category */ CharacterIdentifierStart
,
337 /* 233 - Ll category */ CharacterIdentifierStart
,
338 /* 234 - Ll category */ CharacterIdentifierStart
,
339 /* 235 - Ll category */ CharacterIdentifierStart
,
340 /* 236 - Ll category */ CharacterIdentifierStart
,
341 /* 237 - Ll category */ CharacterIdentifierStart
,
342 /* 238 - Ll category */ CharacterIdentifierStart
,
343 /* 239 - Ll category */ CharacterIdentifierStart
,
344 /* 240 - Ll category */ CharacterIdentifierStart
,
345 /* 241 - Ll category */ CharacterIdentifierStart
,
346 /* 242 - Ll category */ CharacterIdentifierStart
,
347 /* 243 - Ll category */ CharacterIdentifierStart
,
348 /* 244 - Ll category */ CharacterIdentifierStart
,
349 /* 245 - Ll category */ CharacterIdentifierStart
,
350 /* 246 - Ll category */ CharacterIdentifierStart
,
351 /* 247 - Sm category */ CharacterInvalid
,
352 /* 248 - Ll category */ CharacterIdentifierStart
,
353 /* 249 - Ll category */ CharacterIdentifierStart
,
354 /* 250 - Ll category */ CharacterIdentifierStart
,
355 /* 251 - Ll category */ CharacterIdentifierStart
,
356 /* 252 - Ll category */ CharacterIdentifierStart
,
357 /* 253 - Ll category */ CharacterIdentifierStart
,
358 /* 254 - Ll category */ CharacterIdentifierStart
,
359 /* 255 - Ll category */ CharacterIdentifierStart
362 // This table provides the character that results from \X where X is the index in the table beginning
363 // with SPACE. A table value of 0 means that more processing needs to be done.
364 static const LChar singleCharacterEscapeValuesForASCII
[128] = {
366 /* 1 - Start of Heading */ 0,
367 /* 2 - Start of Text */ 0,
368 /* 3 - End of Text */ 0,
369 /* 4 - End of Transm. */ 0,
371 /* 6 - Acknowledgment */ 0,
373 /* 8 - Back Space */ 0,
374 /* 9 - Horizontal Tab */ 0,
375 /* 10 - Line Feed */ 0,
376 /* 11 - Vertical Tab */ 0,
377 /* 12 - Form Feed */ 0,
378 /* 13 - Carriage Return */ 0,
379 /* 14 - Shift Out */ 0,
380 /* 15 - Shift In */ 0,
381 /* 16 - Data Line Escape */ 0,
382 /* 17 - Device Control 1 */ 0,
383 /* 18 - Device Control 2 */ 0,
384 /* 19 - Device Control 3 */ 0,
385 /* 20 - Device Control 4 */ 0,
386 /* 21 - Negative Ack. */ 0,
387 /* 22 - Synchronous Idle */ 0,
388 /* 23 - End of Transmit */ 0,
390 /* 25 - End of Medium */ 0,
391 /* 26 - Substitute */ 0,
393 /* 28 - File Separator */ 0,
394 /* 29 - Group Separator */ 0,
395 /* 30 - Record Separator */ 0,
396 /* 31 - Unit Separator */ 0,
397 /* 32 - Space */ ' ',
495 template <typename T
>
496 Lexer
<T
>::Lexer(VM
* vm
, JSParserBuiltinMode builtinMode
)
497 : m_isReparsing(false)
499 , m_parsingBuiltinFunction(builtinMode
== JSParserBuiltinMode::Builtin
)
503 static inline JSTokenType
tokenTypeForIntegerLikeToken(double doubleValue
)
505 if ((doubleValue
|| !std::signbit(doubleValue
)) && static_cast<int64_t>(doubleValue
) == doubleValue
)
510 template <typename T
>
515 template <typename T
>
516 String Lexer
<T
>::invalidCharacterMessage() const
520 return ASCIILiteral("Invalid character: '\\0'");
522 return ASCIILiteral("Invalid character: '\\n'");
524 return ASCIILiteral("Invalid character: '\\v'");
526 return ASCIILiteral("Invalid character: '\\r'");
528 return ASCIILiteral("Invalid character: '#'");
530 return ASCIILiteral("Invalid character: '@'");
532 return ASCIILiteral("Invalid character: '`'");
534 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current
));
538 template <typename T
>
539 ALWAYS_INLINE
const T
* Lexer
<T
>::currentSourcePtr() const
541 ASSERT(m_code
<= m_codeEnd
);
545 template <typename T
>
546 void Lexer
<T
>::setCode(const SourceCode
& source
, ParserArena
* arena
)
548 m_arena
= &arena
->identifierArena();
550 m_lineNumber
= source
.firstLine();
553 const String
& sourceString
= source
.provider()->source();
555 if (!sourceString
.isNull())
556 setCodeStart(sourceString
.impl());
561 m_sourceOffset
= source
.startOffset();
562 m_codeStartPlusOffset
= m_codeStart
+ source
.startOffset();
563 m_code
= m_codeStartPlusOffset
;
564 m_codeEnd
= m_codeStart
+ source
.endOffset();
566 m_atLineStart
= true;
567 m_lineStart
= m_code
;
568 m_lexErrorMessage
= String();
570 m_buffer8
.reserveInitialCapacity(initialReadBufferCapacity
);
571 m_buffer16
.reserveInitialCapacity((m_codeEnd
- m_code
) / 2);
572 m_bufferForRawTemplateString16
.reserveInitialCapacity(initialReadBufferCapacity
);
574 if (LIKELY(m_code
< m_codeEnd
))
578 ASSERT(currentOffset() == source
.startOffset());
581 template <typename T
>
582 template <int shiftAmount
> ALWAYS_INLINE
void Lexer
<T
>::internalShift()
584 m_code
+= shiftAmount
;
585 ASSERT(currentOffset() >= currentLineStartOffset());
589 template <typename T
>
590 ALWAYS_INLINE
void Lexer
<T
>::shift()
592 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
595 if (LIKELY(m_code
< m_codeEnd
))
599 template <typename T
>
600 ALWAYS_INLINE
bool Lexer
<T
>::atEnd() const
602 ASSERT(!m_current
|| m_code
< m_codeEnd
);
603 return UNLIKELY(UNLIKELY(!m_current
) && m_code
== m_codeEnd
);
606 template <typename T
>
607 ALWAYS_INLINE T Lexer
<T
>::peek(int offset
) const
609 ASSERT(offset
> 0 && offset
< 5);
610 const T
* code
= m_code
+ offset
;
611 return (code
< m_codeEnd
) ? *code
: 0;
614 struct ParsedUnicodeEscapeValue
{
615 ParsedUnicodeEscapeValue(UChar32 value
)
621 enum SpecialValueType
{ Incomplete
= -2, Invalid
= -1 };
622 ParsedUnicodeEscapeValue(SpecialValueType type
)
627 bool isValid() const { return m_value
>= 0; }
628 bool isIncomplete() const { return m_value
== Incomplete
; }
630 UChar32
value() const
640 template<typename CharacterType
> ParsedUnicodeEscapeValue Lexer
<CharacterType
>::parseUnicodeEscape()
642 if (m_current
== '{') {
644 UChar32 codePoint
= 0;
646 if (!isASCIIHexDigit(m_current
))
647 return m_current
? ParsedUnicodeEscapeValue::Invalid
: ParsedUnicodeEscapeValue::Incomplete
;
648 codePoint
= (codePoint
<< 4) | toASCIIHexValue(m_current
);
649 if (codePoint
> UCHAR_MAX_VALUE
)
650 return ParsedUnicodeEscapeValue::Invalid
;
652 } while (m_current
!= '}');
657 auto character2
= peek(1);
658 auto character3
= peek(2);
659 auto character4
= peek(3);
660 if (UNLIKELY(!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(character2
) || !isASCIIHexDigit(character3
) || !isASCIIHexDigit(character4
)))
661 return (m_code
+ 4) >= m_codeEnd
? ParsedUnicodeEscapeValue::Incomplete
: ParsedUnicodeEscapeValue::Invalid
;
662 auto result
= convertUnicode(m_current
, character2
, character3
, character4
);
670 template <typename T
>
671 void Lexer
<T
>::shiftLineTerminator()
673 ASSERT(isLineTerminator(m_current
));
675 m_positionBeforeLastNewline
= currentPosition();
679 // Allow both CRLF and LFCR.
680 if (prev
+ m_current
== '\n' + '\r')
686 template <typename T
>
687 ALWAYS_INLINE
bool Lexer
<T
>::lastTokenWasRestrKeyword() const
689 return m_lastToken
== CONTINUE
|| m_lastToken
== BREAK
|| m_lastToken
== RETURN
|| m_lastToken
== THROW
;
692 static NEVER_INLINE
bool isNonLatin1IdentStart(UChar c
)
694 return U_GET_GC_MASK(c
) & U_GC_L_MASK
;
697 static ALWAYS_INLINE
bool isLatin1(LChar
)
702 static ALWAYS_INLINE
bool isLatin1(UChar c
)
707 static ALWAYS_INLINE
bool isLatin1(UChar32 c
)
712 static inline bool isIdentStart(LChar c
)
714 return typesOfLatin1Characters
[c
] == CharacterIdentifierStart
;
717 static inline bool isIdentStart(UChar32 c
)
719 return isLatin1(c
) ? isIdentStart(static_cast<LChar
>(c
)) : isNonLatin1IdentStart(c
);
722 static NEVER_INLINE
bool isNonLatin1IdentPart(UChar32 c
)
724 // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
725 return (U_GET_GC_MASK(c
) & (U_GC_L_MASK
| U_GC_MN_MASK
| U_GC_MC_MASK
| U_GC_ND_MASK
| U_GC_PC_MASK
)) || c
== 0x200C || c
== 0x200D;
728 static ALWAYS_INLINE
bool isIdentPart(LChar c
)
730 // Character types are divided into two groups depending on whether they can be part of an
731 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
732 // part of an identifier. (See the CharacterType definition for more details.)
733 return typesOfLatin1Characters
[c
] <= CharacterNumber
;
736 static ALWAYS_INLINE
bool isIdentPart(UChar32 c
)
738 return isLatin1(c
) ? isIdentPart(static_cast<LChar
>(c
)) : isNonLatin1IdentPart(c
);
741 static ALWAYS_INLINE
bool isIdentPart(UChar c
)
743 return isIdentPart(static_cast<UChar32
>(c
));
746 template<typename CharacterType
> ALWAYS_INLINE
bool isIdentPartIncludingEscapeTemplate(const CharacterType
* code
, const CharacterType
* codeEnd
)
748 if (isIdentPart(code
[0]))
751 // Shortest sequence handled below is \u{0}, which is 5 characters.
752 if (!(code
[0] == '\\' && codeEnd
- code
>= 5 && code
[1] == 'u'))
755 if (code
[2] == '{') {
756 UChar32 codePoint
= 0;
757 const CharacterType
* pointer
;
758 for (pointer
= &code
[3]; pointer
< codeEnd
; ++pointer
) {
759 auto digit
= *pointer
;
760 if (!isASCIIHexDigit(digit
))
762 codePoint
= (codePoint
<< 4) | toASCIIHexValue(digit
);
763 if (codePoint
> UCHAR_MAX_VALUE
)
766 return isIdentPart(codePoint
) && pointer
< codeEnd
&& *pointer
== '}';
769 // Shortest sequence handled below is \uXXXX, which is 6 characters.
770 if (codeEnd
- code
< 6)
773 auto character1
= code
[2];
774 auto character2
= code
[3];
775 auto character3
= code
[4];
776 auto character4
= code
[5];
777 return isASCIIHexDigit(character1
) && isASCIIHexDigit(character2
) && isASCIIHexDigit(character3
) && isASCIIHexDigit(character4
)
778 && isIdentPart(Lexer
<LChar
>::convertUnicode(character1
, character2
, character3
, character4
));
781 static ALWAYS_INLINE
bool isIdentPartIncludingEscape(const LChar
* code
, const LChar
* codeEnd
)
783 return isIdentPartIncludingEscapeTemplate(code
, codeEnd
);
786 static ALWAYS_INLINE
bool isIdentPartIncludingEscape(const UChar
* code
, const UChar
* codeEnd
)
788 return isIdentPartIncludingEscapeTemplate(code
, codeEnd
);
791 static inline LChar
singleEscape(int c
)
794 ASSERT(static_cast<size_t>(c
) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII
));
795 return singleCharacterEscapeValuesForASCII
[c
];
800 template <typename T
>
801 inline void Lexer
<T
>::record8(int c
)
805 m_buffer8
.append(static_cast<LChar
>(c
));
808 template <typename T
>
809 inline void assertCharIsIn8BitRange(T c
)
817 inline void assertCharIsIn8BitRange(UChar c
)
824 inline void assertCharIsIn8BitRange(LChar
)
828 template <typename T
>
829 inline void Lexer
<T
>::append8(const T
* p
, size_t length
)
831 size_t currentSize
= m_buffer8
.size();
832 m_buffer8
.grow(currentSize
+ length
);
833 LChar
* rawBuffer
= m_buffer8
.data() + currentSize
;
835 for (size_t i
= 0; i
< length
; i
++) {
837 assertCharIsIn8BitRange(c
);
842 template <typename T
>
843 inline void Lexer
<T
>::append16(const LChar
* p
, size_t length
)
845 size_t currentSize
= m_buffer16
.size();
846 m_buffer16
.grow(currentSize
+ length
);
847 UChar
* rawBuffer
= m_buffer16
.data() + currentSize
;
849 for (size_t i
= 0; i
< length
; i
++)
853 template <typename T
>
854 inline void Lexer
<T
>::record16(T c
)
856 m_buffer16
.append(c
);
859 template <typename T
>
860 inline void Lexer
<T
>::record16(int c
)
863 ASSERT(c
<= static_cast<int>(USHRT_MAX
));
864 m_buffer16
.append(static_cast<UChar
>(c
));
867 template<typename CharacterType
> inline void Lexer
<CharacterType
>::recordUnicodeCodePoint(UChar32 codePoint
)
869 ASSERT(codePoint
>= 0);
870 ASSERT(codePoint
<= UCHAR_MAX_VALUE
);
871 if (U_IS_BMP(codePoint
))
874 UChar codeUnits
[2] = { U16_LEAD(codePoint
), U16_TRAIL(codePoint
) };
875 append16(codeUnits
, 2);
880 bool isSafeBuiltinIdentifier(VM
& vm
, const Identifier
* ident
)
884 /* Just block any use of suspicious identifiers. This is intended to
885 * be used as a safety net while implementing builtins.
887 // FIXME: How can a debug-only assertion be a safety net?
888 if (*ident
== vm
.propertyNames
->builtinNames().callPublicName())
890 if (*ident
== vm
.propertyNames
->builtinNames().applyPublicName())
892 if (*ident
== vm
.propertyNames
->eval
)
894 if (*ident
== vm
.propertyNames
->Function
)
901 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<LChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
903 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
904 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
905 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
906 if (keyword
!= IDENT
) {
907 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
908 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
912 bool isPrivateName
= m_current
== '@' && m_parsingBuiltinFunction
;
916 const LChar
* identifierStart
= currentSourcePtr();
917 unsigned identifierLineStart
= currentLineStartOffset();
919 while (isIdentPart(m_current
))
922 if (UNLIKELY(m_current
== '\\')) {
923 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
924 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
927 const Identifier
* ident
= 0;
929 if (shouldCreateIdentifier
|| m_parsingBuiltinFunction
) {
930 int identifierLength
= currentSourcePtr() - identifierStart
;
931 ident
= makeIdentifier(identifierStart
, identifierLength
);
932 if (m_parsingBuiltinFunction
) {
933 if (!isSafeBuiltinIdentifier(*m_vm
, ident
) && !isPrivateName
) {
934 m_lexErrorMessage
= makeString("The use of '", ident
->string(), "' is disallowed in builtin functions.");
938 ident
= m_vm
->propertyNames
->getPrivateName(*ident
);
939 else if (*ident
== m_vm
->propertyNames
->undefinedKeyword
)
940 tokenData
->ident
= &m_vm
->propertyNames
->undefinedPrivateName
;
942 return INVALID_PRIVATE_NAME_ERRORTOK
;
944 tokenData
->ident
= ident
;
946 tokenData
->ident
= 0;
948 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) && !isPrivateName
) {
949 ASSERT(shouldCreateIdentifier
);
950 if (remaining
< maxTokenLength
) {
951 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
952 ASSERT((remaining
< maxTokenLength
) || !entry
);
955 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
956 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
965 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<UChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
967 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
968 if ((remaining
>= maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) {
969 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
970 if (keyword
!= IDENT
) {
971 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
972 return keyword
== RESERVED_IF_STRICT
&& !strictMode
? IDENT
: keyword
;
976 bool isPrivateName
= m_current
== '@' && m_parsingBuiltinFunction
;
980 const UChar
* identifierStart
= currentSourcePtr();
981 int identifierLineStart
= currentLineStartOffset();
983 UChar orAllChars
= 0;
985 while (isIdentPart(m_current
)) {
986 orAllChars
|= m_current
;
990 if (UNLIKELY(m_current
== '\\')) {
991 ASSERT(!isPrivateName
);
992 setOffsetFromSourcePtr(identifierStart
, identifierLineStart
);
993 return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
);
996 bool isAll8Bit
= false;
998 if (!(orAllChars
& ~0xff))
1001 const Identifier
* ident
= 0;
1003 if (shouldCreateIdentifier
|| m_parsingBuiltinFunction
) {
1004 int identifierLength
= currentSourcePtr() - identifierStart
;
1006 ident
= makeIdentifierLCharFromUChar(identifierStart
, identifierLength
);
1008 ident
= makeIdentifier(identifierStart
, identifierLength
);
1009 if (m_parsingBuiltinFunction
) {
1010 if (!isSafeBuiltinIdentifier(*m_vm
, ident
) && !isPrivateName
) {
1011 m_lexErrorMessage
= makeString("The use of '", ident
->string(), "' is disallowed in builtin functions.");
1015 ident
= m_vm
->propertyNames
->getPrivateName(*ident
);
1016 else if (*ident
== m_vm
->propertyNames
->undefinedKeyword
)
1017 tokenData
->ident
= &m_vm
->propertyNames
->undefinedPrivateName
;
1019 return INVALID_PRIVATE_NAME_ERRORTOK
;
1021 tokenData
->ident
= ident
;
1023 tokenData
->ident
= 0;
1025 if (UNLIKELY((remaining
< maxTokenLength
) && !(lexerFlags
& LexerFlagsIgnoreReservedWords
)) && !isPrivateName
) {
1026 ASSERT(shouldCreateIdentifier
);
1027 if (remaining
< maxTokenLength
) {
1028 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
1029 ASSERT((remaining
< maxTokenLength
) || !entry
);
1032 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
1033 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
1041 template<typename CharacterType
> template<bool shouldCreateIdentifier
> JSTokenType Lexer
<CharacterType
>::parseIdentifierSlowCase(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
)
1043 auto identifierStart
= currentSourcePtr();
1044 bool bufferRequired
= false;
1047 if (LIKELY(isIdentPart(m_current
))) {
1051 if (LIKELY(m_current
!= '\\'))
1054 // \uXXXX unicode characters.
1055 bufferRequired
= true;
1056 if (identifierStart
!= currentSourcePtr())
1057 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
1059 if (UNLIKELY(m_current
!= 'u'))
1060 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_ESCAPE_ERRORTOK
;
1062 auto character
= parseUnicodeEscape();
1063 if (UNLIKELY(!character
.isValid()))
1064 return character
.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
: INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
1065 if (UNLIKELY(m_buffer16
.size() ? !isIdentPart(character
.value()) : !isIdentStart(character
.value())))
1066 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
;
1067 if (shouldCreateIdentifier
)
1068 recordUnicodeCodePoint(character
.value());
1069 identifierStart
= currentSourcePtr();
1072 int identifierLength
;
1073 const Identifier
* ident
= nullptr;
1074 if (shouldCreateIdentifier
) {
1075 if (!bufferRequired
) {
1076 identifierLength
= currentSourcePtr() - identifierStart
;
1077 ident
= makeIdentifier(identifierStart
, identifierLength
);
1079 if (identifierStart
!= currentSourcePtr())
1080 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
);
1081 ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1084 tokenData
->ident
= ident
;
1086 tokenData
->ident
= nullptr;
1088 m_buffer16
.shrink(0);
1090 if (LIKELY(!(lexerFlags
& LexerFlagsIgnoreReservedWords
))) {
1091 ASSERT(shouldCreateIdentifier
);
1092 const HashTableValue
* entry
= m_vm
->keywords
->getKeyword(*ident
);
1095 JSTokenType token
= static_cast<JSTokenType
>(entry
->lexerValue());
1096 return (token
!= RESERVED_IF_STRICT
) || strictMode
? token
: IDENT
;
1102 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(LChar character
)
1104 return character
< 0xE;
1107 static ALWAYS_INLINE
bool characterRequiresParseStringSlowCase(UChar character
)
1109 return character
< 0xE || character
> 0xFF;
1112 template <typename T
>
1113 template <bool shouldBuildStrings
> ALWAYS_INLINE typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseString(JSTokenData
* tokenData
, bool strictMode
)
1115 int startingOffset
= currentOffset();
1116 int startingLineStartOffset
= currentLineStartOffset();
1117 int startingLineNumber
= lineNumber();
1118 T stringQuoteCharacter
= m_current
;
1121 const T
* stringStart
= currentSourcePtr();
1123 while (m_current
!= stringQuoteCharacter
) {
1124 if (UNLIKELY(m_current
== '\\')) {
1125 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1126 append8(stringStart
, currentSourcePtr() - stringStart
);
1129 LChar escape
= singleEscape(m_current
);
1131 // Most common escape sequences first.
1133 if (shouldBuildStrings
)
1136 } else if (UNLIKELY(isLineTerminator(m_current
)))
1137 shiftLineTerminator();
1138 else if (m_current
== 'x') {
1140 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
1141 m_lexErrorMessage
= ASCIILiteral("\\x can only be followed by a hex character sequence");
1142 return (atEnd() || (isASCIIHexDigit(m_current
) && (m_code
+ 1 == m_codeEnd
))) ? StringUnterminated
: StringCannotBeParsed
;
1146 if (shouldBuildStrings
)
1147 record8(convertHex(prev
, m_current
));
1150 setOffset(startingOffset
, startingLineStartOffset
);
1151 setLineNumber(startingLineNumber
);
1152 m_buffer8
.shrink(0);
1153 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
1155 stringStart
= currentSourcePtr();
1159 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current
))) {
1160 setOffset(startingOffset
, startingLineStartOffset
);
1161 setLineNumber(startingLineNumber
);
1162 m_buffer8
.shrink(0);
1163 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
);
1169 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1170 append8(stringStart
, currentSourcePtr() - stringStart
);
1171 if (shouldBuildStrings
) {
1172 tokenData
->ident
= makeIdentifier(m_buffer8
.data(), m_buffer8
.size());
1173 m_buffer8
.shrink(0);
1175 tokenData
->ident
= 0;
1177 return StringParsedSuccessfully
;
1180 template <typename T
>
1181 template <bool shouldBuildStrings
> ALWAYS_INLINE
auto Lexer
<T
>::parseComplexEscape(EscapeParseMode escapeParseMode
, bool strictMode
, T stringQuoteCharacter
) -> StringParseResult
1183 if (m_current
== 'x') {
1185 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) {
1186 m_lexErrorMessage
= ASCIILiteral("\\x can only be followed by a hex character sequence");
1187 return StringCannotBeParsed
;
1191 if (shouldBuildStrings
)
1192 record16(convertHex(prev
, m_current
));
1194 return StringParsedSuccessfully
;
1197 if (m_current
== 'u') {
1200 if (escapeParseMode
== EscapeParseMode::String
&& m_current
== stringQuoteCharacter
) {
1201 if (shouldBuildStrings
)
1203 return StringParsedSuccessfully
;
1206 auto character
= parseUnicodeEscape();
1207 if (character
.isValid()) {
1208 if (shouldBuildStrings
)
1209 recordUnicodeCodePoint(character
.value());
1210 return StringParsedSuccessfully
;
1213 m_lexErrorMessage
= ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1214 return character
.isIncomplete() ? StringUnterminated
: StringCannotBeParsed
;
1218 if (isASCIIDigit(m_current
)) {
1219 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1220 int character1
= m_current
;
1222 if (character1
!= '0' || isASCIIDigit(m_current
)) {
1223 m_lexErrorMessage
= ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1224 return StringCannotBeParsed
;
1226 if (shouldBuildStrings
)
1228 return StringParsedSuccessfully
;
1231 if (isASCIIOctalDigit(m_current
)) {
1232 // Octal character sequences
1233 T character1
= m_current
;
1235 if (isASCIIOctalDigit(m_current
)) {
1236 // Two octal characters
1237 T character2
= m_current
;
1239 if (character1
>= '0' && character1
<= '3' && isASCIIOctalDigit(m_current
)) {
1240 if (shouldBuildStrings
)
1241 record16((character1
- '0') * 64 + (character2
- '0') * 8 + m_current
- '0');
1244 if (shouldBuildStrings
)
1245 record16((character1
- '0') * 8 + character2
- '0');
1248 if (shouldBuildStrings
)
1249 record16(character1
- '0');
1251 return StringParsedSuccessfully
;
1256 if (shouldBuildStrings
)
1257 record16(m_current
);
1259 return StringParsedSuccessfully
;
1262 m_lexErrorMessage
= ASCIILiteral("Unterminated string constant");
1263 return StringUnterminated
;
1266 template <typename T
>
1267 template <bool shouldBuildStrings
> auto Lexer
<T
>::parseStringSlowCase(JSTokenData
* tokenData
, bool strictMode
) -> StringParseResult
1269 T stringQuoteCharacter
= m_current
;
1272 const T
* stringStart
= currentSourcePtr();
1274 while (m_current
!= stringQuoteCharacter
) {
1275 if (UNLIKELY(m_current
== '\\')) {
1276 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1277 append16(stringStart
, currentSourcePtr() - stringStart
);
1280 LChar escape
= singleEscape(m_current
);
1282 // Most common escape sequences first
1284 if (shouldBuildStrings
)
1287 } else if (UNLIKELY(isLineTerminator(m_current
)))
1288 shiftLineTerminator();
1290 StringParseResult result
= parseComplexEscape
<shouldBuildStrings
>(EscapeParseMode::String
, strictMode
, stringQuoteCharacter
);
1291 if (result
!= StringParsedSuccessfully
)
1295 stringStart
= currentSourcePtr();
1298 // Fast check for characters that require special handling.
1299 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1300 // as possible, and lets through all common ASCII characters.
1301 if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) {
1302 // New-line or end of input is not allowed
1303 if (atEnd() || isLineTerminator(m_current
)) {
1304 m_lexErrorMessage
= ASCIILiteral("Unexpected EOF");
1305 return atEnd() ? StringUnterminated
: StringCannotBeParsed
;
1307 // Anything else is just a normal character
1312 if (currentSourcePtr() != stringStart
&& shouldBuildStrings
)
1313 append16(stringStart
, currentSourcePtr() - stringStart
);
1314 if (shouldBuildStrings
)
1315 tokenData
->ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1317 tokenData
->ident
= 0;
1319 m_buffer16
.shrink(0);
1320 return StringParsedSuccessfully
;
1323 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
1324 // While the lexer accepts <LF><CR> (not <CR><LF>) sequence
1325 // as one line terminator and increments one line number,
1326 // TemplateLiteral considers it as two line terminators <LF> and <CR>.
1328 // TemplateLiteral normalizes line terminators as follows.
1333 // <\u2028> => <\u2028>
1334 // <\u2029> => <\u2029>
1336 // So, <LF><CR> should be normalized to <LF><LF>.
1337 // However, the lexer should increment the line number only once for <LF><CR>.
1339 // To achieve this, LineNumberAdder holds the current status of line terminator sequence.
1340 // When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
1341 // LineNumberAdder maintains the status and increments the line number when it's necessary.
1342 // For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
1343 template<typename CharacterType
>
1344 class LineNumberAdder
{
1346 LineNumberAdder(int& lineNumber
)
1347 : m_lineNumber(lineNumber
)
1356 void add(CharacterType character
)
1358 ASSERT(Lexer
<CharacterType
>::isLineTerminator(character
));
1359 if ((character
+ m_previous
) == ('\n' + '\r'))
1363 m_previous
= character
;
1369 CharacterType m_previous
{ 0 };
1372 template <typename T
>
1373 template <bool shouldBuildStrings
> typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseTemplateLiteral(JSTokenData
* tokenData
, RawStringsBuildMode rawStringsBuildMode
)
1375 const T
* stringStart
= currentSourcePtr();
1376 const T
* rawStringStart
= currentSourcePtr();
1378 LineNumberAdder
<T
> lineNumberAdder(m_lineNumber
);
1380 while (m_current
!= '`') {
1381 if (UNLIKELY(m_current
== '\\')) {
1382 lineNumberAdder
.clear();
1383 if (stringStart
!= currentSourcePtr() && shouldBuildStrings
)
1384 append16(stringStart
, currentSourcePtr() - stringStart
);
1387 LChar escape
= singleEscape(m_current
);
1389 // Most common escape sequences first.
1391 if (shouldBuildStrings
)
1394 } else if (UNLIKELY(isLineTerminator(m_current
))) {
1395 if (m_current
== '\r') {
1396 lineNumberAdder
.add(m_current
);
1398 if (m_current
== '\n') {
1399 lineNumberAdder
.add(m_current
);
1403 lineNumberAdder
.add(m_current
);
1407 bool strictMode
= true;
1408 StringParseResult result
= parseComplexEscape
<shouldBuildStrings
>(EscapeParseMode::Template
, strictMode
, '`');
1409 if (result
!= StringParsedSuccessfully
)
1413 stringStart
= currentSourcePtr();
1417 if (m_current
== '$' && peek(1) == '{')
1420 // Fast check for characters that require special handling.
1421 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1422 // as possible, and lets through all common ASCII characters.
1423 if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) {
1424 // End of input is not allowed.
1425 // Unlike String, line terminator is allowed.
1427 m_lexErrorMessage
= ASCIILiteral("Unexpected EOF");
1428 return atEnd() ? StringUnterminated
: StringCannotBeParsed
;
1431 if (isLineTerminator(m_current
)) {
1432 if (m_current
== '\r') {
1433 // Normalize <CR>, <CR><LF> to <LF>.
1434 if (shouldBuildStrings
) {
1435 if (stringStart
!= currentSourcePtr())
1436 append16(stringStart
, currentSourcePtr() - stringStart
);
1437 if (rawStringStart
!= currentSourcePtr() && rawStringsBuildMode
== RawStringsBuildMode::BuildRawStrings
)
1438 m_bufferForRawTemplateString16
.append(rawStringStart
, currentSourcePtr() - rawStringStart
);
1441 if (rawStringsBuildMode
== RawStringsBuildMode::BuildRawStrings
)
1442 m_bufferForRawTemplateString16
.append('\n');
1444 lineNumberAdder
.add(m_current
);
1446 if (m_current
== '\n') {
1447 lineNumberAdder
.add(m_current
);
1450 stringStart
= currentSourcePtr();
1451 rawStringStart
= currentSourcePtr();
1453 lineNumberAdder
.add(m_current
);
1458 // Anything else is just a normal character
1461 lineNumberAdder
.clear();
1465 bool isTail
= m_current
== '`';
1467 if (shouldBuildStrings
) {
1468 if (currentSourcePtr() != stringStart
)
1469 append16(stringStart
, currentSourcePtr() - stringStart
);
1470 if (rawStringStart
!= currentSourcePtr() && rawStringsBuildMode
== RawStringsBuildMode::BuildRawStrings
)
1471 m_bufferForRawTemplateString16
.append(rawStringStart
, currentSourcePtr() - rawStringStart
);
1474 if (shouldBuildStrings
) {
1475 tokenData
->cooked
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1476 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1477 if (rawStringsBuildMode
== RawStringsBuildMode::BuildRawStrings
)
1478 tokenData
->raw
= makeIdentifier(m_bufferForRawTemplateString16
.data(), m_bufferForRawTemplateString16
.size());
1480 tokenData
->raw
= makeEmptyIdentifier();
1482 tokenData
->cooked
= makeEmptyIdentifier();
1483 tokenData
->raw
= makeEmptyIdentifier();
1485 tokenData
->isTail
= isTail
;
1487 m_buffer16
.shrink(0);
1488 m_bufferForRawTemplateString16
.shrink(0);
1499 return StringParsedSuccessfully
;
1503 template <typename T
>
1504 ALWAYS_INLINE
void Lexer
<T
>::parseHex(double& returnValue
)
1506 // Optimization: most hexadecimal values fit into 4 bytes.
1507 uint32_t hexValue
= 0;
1508 int maximumDigits
= 7;
1511 hexValue
= (hexValue
<< 4) + toASCIIHexValue(m_current
);
1514 } while (isASCIIHexDigit(m_current
) && maximumDigits
>= 0);
1516 if (maximumDigits
>= 0) {
1517 returnValue
= hexValue
;
1521 // No more place in the hexValue buffer.
1522 // The values are shifted out and placed into the m_buffer8 vector.
1523 for (int i
= 0; i
< 8; ++i
) {
1524 int digit
= hexValue
>> 28;
1526 record8(digit
+ '0');
1528 record8(digit
- 10 + 'a');
1532 while (isASCIIHexDigit(m_current
)) {
1537 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 16);
1540 template <typename T
>
1541 ALWAYS_INLINE
bool Lexer
<T
>::parseBinary(double& returnValue
)
1543 // Optimization: most binary values fit into 4 bytes.
1544 uint32_t binaryValue
= 0;
1545 const unsigned maximumDigits
= 32;
1546 int digit
= maximumDigits
- 1;
1547 // Temporary buffer for the digits. Makes easier
1548 // to reconstruct the input characters when needed.
1549 LChar digits
[maximumDigits
];
1552 binaryValue
= (binaryValue
<< 1) + (m_current
- '0');
1553 digits
[digit
] = m_current
;
1556 } while (isASCIIBinaryDigit(m_current
) && digit
>= 0);
1558 if (!isASCIIDigit(m_current
) && digit
>= 0) {
1559 returnValue
= binaryValue
;
1563 for (int i
= maximumDigits
- 1; i
> digit
; --i
)
1566 while (isASCIIBinaryDigit(m_current
)) {
1571 if (isASCIIDigit(m_current
))
1574 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 2);
1578 template <typename T
>
1579 ALWAYS_INLINE
bool Lexer
<T
>::parseOctal(double& returnValue
)
1581 // Optimization: most octal values fit into 4 bytes.
1582 uint32_t octalValue
= 0;
1583 const unsigned maximumDigits
= 10;
1584 int digit
= maximumDigits
- 1;
1585 // Temporary buffer for the digits. Makes easier
1586 // to reconstruct the input characters when needed.
1587 LChar digits
[maximumDigits
];
1590 octalValue
= octalValue
* 8 + (m_current
- '0');
1591 digits
[digit
] = m_current
;
1594 } while (isASCIIOctalDigit(m_current
) && digit
>= 0);
1596 if (!isASCIIDigit(m_current
) && digit
>= 0) {
1597 returnValue
= octalValue
;
1601 for (int i
= maximumDigits
- 1; i
> digit
; --i
)
1604 while (isASCIIOctalDigit(m_current
)) {
1609 if (isASCIIDigit(m_current
))
1612 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 8);
1616 template <typename T
>
1617 ALWAYS_INLINE
bool Lexer
<T
>::parseDecimal(double& returnValue
)
1619 // Optimization: most decimal values fit into 4 bytes.
1620 uint32_t decimalValue
= 0;
1622 // Since parseOctal may be executed before parseDecimal,
1623 // the m_buffer8 may hold ascii digits.
1624 if (!m_buffer8
.size()) {
1625 const unsigned maximumDigits
= 10;
1626 int digit
= maximumDigits
- 1;
1627 // Temporary buffer for the digits. Makes easier
1628 // to reconstruct the input characters when needed.
1629 LChar digits
[maximumDigits
];
1632 decimalValue
= decimalValue
* 10 + (m_current
- '0');
1633 digits
[digit
] = m_current
;
1636 } while (isASCIIDigit(m_current
) && digit
>= 0);
1638 if (digit
>= 0 && m_current
!= '.' && (m_current
| 0x20) != 'e') {
1639 returnValue
= decimalValue
;
1643 for (int i
= maximumDigits
- 1; i
> digit
; --i
)
1647 while (isASCIIDigit(m_current
)) {
1655 template <typename T
>
1656 ALWAYS_INLINE
void Lexer
<T
>::parseNumberAfterDecimalPoint()
1659 while (isASCIIDigit(m_current
)) {
1665 template <typename T
>
1666 ALWAYS_INLINE
bool Lexer
<T
>::parseNumberAfterExponentIndicator()
1670 if (m_current
== '+' || m_current
== '-') {
1675 if (!isASCIIDigit(m_current
))
1681 } while (isASCIIDigit(m_current
));
1685 template <typename T
>
1686 ALWAYS_INLINE
bool Lexer
<T
>::parseMultilineComment()
1689 while (UNLIKELY(m_current
== '*')) {
1691 if (m_current
== '/') {
1700 if (isLineTerminator(m_current
)) {
1701 shiftLineTerminator();
1702 m_terminator
= true;
1708 template <typename T
>
1709 bool Lexer
<T
>::nextTokenIsColon()
1711 const T
* code
= m_code
;
1712 while (code
< m_codeEnd
&& (isWhiteSpace(*code
) || isLineTerminator(*code
)))
1715 return code
< m_codeEnd
&& *code
== ':';
1718 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1719 template <typename T
>
1720 void Lexer
<T
>::setTokenPosition(JSToken
* tokenRecord
)
1722 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
1723 tokenData
->line
= lineNumber();
1724 tokenData
->offset
= currentOffset();
1725 tokenData
->lineStartOffset
= currentLineStartOffset();
1726 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1730 template <typename T
>
1731 JSTokenType Lexer
<T
>::lex(JSToken
* tokenRecord
, unsigned lexerFlags
, bool strictMode
)
1733 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
1734 JSTokenLocation
* tokenLocation
= &tokenRecord
->m_location
;
1735 m_lastTockenLocation
= JSTokenLocation(tokenRecord
->m_location
);
1738 ASSERT(m_buffer8
.isEmpty());
1739 ASSERT(m_buffer16
.isEmpty());
1741 JSTokenType token
= ERRORTOK
;
1742 m_terminator
= false;
1745 while (isWhiteSpace(m_current
))
1751 tokenLocation
->startOffset
= currentOffset();
1752 ASSERT(currentOffset() >= currentLineStartOffset());
1753 tokenRecord
->m_startPosition
= currentPosition();
1756 if (LIKELY(isLatin1(m_current
)))
1757 type
= static_cast<CharacterType
>(typesOfLatin1Characters
[m_current
]);
1758 else if (isNonLatin1IdentStart(m_current
))
1759 type
= CharacterIdentifierStart
;
1760 else if (isLineTerminator(m_current
))
1761 type
= CharacterLineTerminator
;
1763 type
= CharacterInvalid
;
1766 case CharacterGreater
:
1768 if (m_current
== '>') {
1770 if (m_current
== '>') {
1772 if (m_current
== '=') {
1774 token
= URSHIFTEQUAL
;
1780 if (m_current
== '=') {
1782 token
= RSHIFTEQUAL
;
1788 if (m_current
== '=') {
1795 case CharacterEqual
: {
1796 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1797 if (peek(1) == '>') {
1798 token
= ARROWFUNCTION
;
1799 tokenData
->line
= lineNumber();
1800 tokenData
->offset
= currentOffset();
1801 tokenData
->lineStartOffset
= currentLineStartOffset();
1802 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
1809 if (m_current
== '=') {
1811 if (m_current
== '=') {
1824 if (m_current
== '!' && peek(1) == '-' && peek(2) == '-') {
1825 // <!-- marks the beginning of a line comment (for www usage)
1826 goto inSingleLineComment
;
1828 if (m_current
== '<') {
1830 if (m_current
== '=') {
1832 token
= LSHIFTEQUAL
;
1838 if (m_current
== '=') {
1845 case CharacterExclamationMark
:
1847 if (m_current
== '=') {
1849 if (m_current
== '=') {
1857 token
= EXCLAMATION
;
1861 if (m_current
== '+') {
1863 token
= (!m_terminator
) ? PLUSPLUS
: AUTOPLUSPLUS
;
1866 if (m_current
== '=') {
1875 if (m_current
== '-') {
1877 if (m_atLineStart
&& m_current
== '>') {
1879 goto inSingleLineComment
;
1881 token
= (!m_terminator
) ? MINUSMINUS
: AUTOMINUSMINUS
;
1884 if (m_current
== '=') {
1891 case CharacterMultiply
:
1893 if (m_current
== '=') {
1900 case CharacterSlash
:
1902 if (m_current
== '/') {
1904 goto inSingleLineComment
;
1906 if (m_current
== '*') {
1908 if (parseMultilineComment())
1910 m_lexErrorMessage
= ASCIILiteral("Multiline comment was not closed properly");
1911 token
= UNTERMINATED_MULTILINE_COMMENT_ERRORTOK
;
1914 if (m_current
== '=') {
1923 if (m_current
== '&') {
1928 if (m_current
== '=') {
1937 if (m_current
== '=') {
1944 case CharacterModulo
:
1946 if (m_current
== '=') {
1955 if (m_current
== '=') {
1960 if (m_current
== '|') {
1967 case CharacterOpenParen
:
1971 case CharacterCloseParen
:
1975 case CharacterOpenBracket
:
1976 token
= OPENBRACKET
;
1979 case CharacterCloseBracket
:
1980 token
= CLOSEBRACKET
;
1983 case CharacterComma
:
1987 case CharacterColon
:
1991 case CharacterQuestion
:
1995 case CharacterTilde
:
1999 case CharacterSemicolon
:
2003 case CharacterOpenBrace
:
2004 tokenData
->line
= lineNumber();
2005 tokenData
->offset
= currentOffset();
2006 tokenData
->lineStartOffset
= currentLineStartOffset();
2007 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
2011 case CharacterCloseBrace
:
2012 tokenData
->line
= lineNumber();
2013 tokenData
->offset
= currentOffset();
2014 tokenData
->lineStartOffset
= currentLineStartOffset();
2015 ASSERT(tokenData
->offset
>= tokenData
->lineStartOffset
);
2021 if (!isASCIIDigit(m_current
)) {
2022 if (UNLIKELY((m_current
== '.') && (peek(1) == '.'))) {
2031 goto inNumberAfterDecimalPoint
;
2034 if ((m_current
| 0x20) == 'x') {
2035 if (!isASCIIHexDigit(peek(1))) {
2036 m_lexErrorMessage
= ASCIILiteral("No hexadecimal digits after '0x'");
2037 token
= INVALID_HEX_NUMBER_ERRORTOK
;
2041 // Shift out the 'x' prefix.
2044 parseHex(tokenData
->doubleValue
);
2045 if (isIdentStart(m_current
)) {
2046 m_lexErrorMessage
= ASCIILiteral("No space between hexadecimal literal and identifier");
2047 token
= INVALID_HEX_NUMBER_ERRORTOK
;
2050 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2051 m_buffer8
.shrink(0);
2054 if ((m_current
| 0x20) == 'b') {
2055 if (!isASCIIBinaryDigit(peek(1))) {
2056 m_lexErrorMessage
= ASCIILiteral("No binary digits after '0b'");
2057 token
= INVALID_BINARY_NUMBER_ERRORTOK
;
2061 // Shift out the 'b' prefix.
2064 parseBinary(tokenData
->doubleValue
);
2065 if (isIdentStart(m_current
)) {
2066 m_lexErrorMessage
= ASCIILiteral("No space between binary literal and identifier");
2067 token
= INVALID_BINARY_NUMBER_ERRORTOK
;
2070 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2071 m_buffer8
.shrink(0);
2075 if ((m_current
| 0x20) == 'o') {
2076 if (!isASCIIOctalDigit(peek(1))) {
2077 m_lexErrorMessage
= ASCIILiteral("No octal digits after '0o'");
2078 token
= INVALID_OCTAL_NUMBER_ERRORTOK
;
2082 // Shift out the 'o' prefix.
2085 parseOctal(tokenData
->doubleValue
);
2086 if (isIdentStart(m_current
)) {
2087 m_lexErrorMessage
= ASCIILiteral("No space between octal literal and identifier");
2088 token
= INVALID_OCTAL_NUMBER_ERRORTOK
;
2091 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2092 m_buffer8
.shrink(0);
2097 if (strictMode
&& isASCIIDigit(m_current
)) {
2098 m_lexErrorMessage
= ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
2099 token
= INVALID_OCTAL_NUMBER_ERRORTOK
;
2102 if (isASCIIOctalDigit(m_current
)) {
2103 if (parseOctal(tokenData
->doubleValue
)) {
2104 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2108 case CharacterNumber
:
2109 if (LIKELY(token
!= INTEGER
&& token
!= DOUBLE
)) {
2110 if (!parseDecimal(tokenData
->doubleValue
)) {
2112 if (m_current
== '.') {
2114 inNumberAfterDecimalPoint
:
2115 parseNumberAfterDecimalPoint();
2118 if ((m_current
| 0x20) == 'e') {
2119 if (!parseNumberAfterExponentIndicator()) {
2120 m_lexErrorMessage
= ASCIILiteral("Non-number found after exponent indicator");
2121 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
2125 size_t parsedLength
;
2126 tokenData
->doubleValue
= parseDouble(m_buffer8
.data(), m_buffer8
.size(), parsedLength
);
2127 if (token
== INTEGER
)
2128 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2130 token
= tokenTypeForIntegerLikeToken(tokenData
->doubleValue
);
2133 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
2134 if (UNLIKELY(isIdentStart(m_current
))) {
2135 m_lexErrorMessage
= ASCIILiteral("At least one digit must occur after a decimal point");
2136 token
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK
: INVALID_NUMERIC_LITERAL_ERRORTOK
;
2139 m_buffer8
.shrink(0);
2141 case CharacterQuote
: {
2142 StringParseResult result
= StringCannotBeParsed
;
2143 if (lexerFlags
& LexerFlagsDontBuildStrings
)
2144 result
= parseString
<false>(tokenData
, strictMode
);
2146 result
= parseString
<true>(tokenData
, strictMode
);
2148 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
2149 token
= result
== StringUnterminated
? UNTERMINATED_STRING_LITERAL_ERRORTOK
: INVALID_STRING_LITERAL_ERRORTOK
;
2156 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2157 case CharacterBackQuote
: {
2160 StringParseResult result
= StringCannotBeParsed
;
2161 if (lexerFlags
& LexerFlagsDontBuildStrings
)
2162 result
= parseTemplateLiteral
<false>(tokenData
, RawStringsBuildMode::BuildRawStrings
);
2164 result
= parseTemplateLiteral
<true>(tokenData
, RawStringsBuildMode::BuildRawStrings
);
2166 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
2167 token
= result
== StringUnterminated
? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK
: INVALID_TEMPLATE_LITERAL_ERRORTOK
;
2174 case CharacterIdentifierStart
:
2175 ASSERT(isIdentStart(m_current
));
2177 case CharacterBackSlash
:
2179 if (lexerFlags
& LexexFlagsDontBuildKeywords
)
2180 token
= parseIdentifier
<false>(tokenData
, lexerFlags
, strictMode
);
2182 token
= parseIdentifier
<true>(tokenData
, lexerFlags
, strictMode
);
2184 case CharacterLineTerminator
:
2185 ASSERT(isLineTerminator(m_current
));
2186 shiftLineTerminator();
2187 m_atLineStart
= true;
2188 m_terminator
= true;
2189 m_lineStart
= m_code
;
2191 case CharacterPrivateIdentifierStart
:
2192 if (m_parsingBuiltinFunction
)
2196 case CharacterInvalid
:
2197 m_lexErrorMessage
= invalidCharacterMessage();
2201 RELEASE_ASSERT_NOT_REACHED();
2202 m_lexErrorMessage
= ASCIILiteral("Internal Error");
2207 m_atLineStart
= false;
2210 inSingleLineComment
:
2211 while (!isLineTerminator(m_current
)) {
2216 shiftLineTerminator();
2217 m_atLineStart
= true;
2218 m_terminator
= true;
2219 m_lineStart
= m_code
;
2220 if (!lastTokenWasRestrKeyword())
2224 // Fall through into returnToken.
2227 tokenLocation
->line
= m_lineNumber
;
2228 tokenLocation
->endOffset
= currentOffset();
2229 tokenLocation
->lineStartOffset
= currentLineStartOffset();
2230 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
2231 tokenRecord
->m_endPosition
= currentPosition();
2232 m_lastToken
= token
;
2237 tokenLocation
->line
= m_lineNumber
;
2238 tokenLocation
->endOffset
= currentOffset();
2239 tokenLocation
->lineStartOffset
= currentLineStartOffset();
2240 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
2241 tokenRecord
->m_endPosition
= currentPosition();
2242 RELEASE_ASSERT(token
& ErrorTokenFlag
);
2246 template <typename T
>
2247 static inline void orCharacter(UChar
&, UChar
);
2250 inline void orCharacter
<LChar
>(UChar
&, UChar
) { }
2253 inline void orCharacter
<UChar
>(UChar
& orAccumulator
, UChar character
)
2255 orAccumulator
|= character
;
2258 template <typename T
>
2259 bool Lexer
<T
>::scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
)
2261 ASSERT(m_buffer16
.isEmpty());
2263 bool lastWasEscape
= false;
2264 bool inBrackets
= false;
2265 UChar charactersOredTogether
= 0;
2267 if (patternPrefix
) {
2268 ASSERT(!isLineTerminator(patternPrefix
));
2269 ASSERT(patternPrefix
!= '/');
2270 ASSERT(patternPrefix
!= '[');
2271 record16(patternPrefix
);
2275 if (isLineTerminator(m_current
) || atEnd()) {
2276 m_buffer16
.shrink(0);
2284 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
2288 orCharacter
<T
>(charactersOredTogether
, prev
);
2290 if (lastWasEscape
) {
2291 lastWasEscape
= false;
2303 lastWasEscape
= true;
2308 pattern
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
2310 m_buffer16
.shrink(0);
2311 charactersOredTogether
= 0;
2313 while (isIdentPart(m_current
)) {
2314 record16(m_current
);
2315 orCharacter
<T
>(charactersOredTogether
, m_current
);
2319 flags
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
);
2320 m_buffer16
.shrink(0);
2325 template <typename T
>
2326 bool Lexer
<T
>::skipRegExp()
2328 bool lastWasEscape
= false;
2329 bool inBrackets
= false;
2332 if (isLineTerminator(m_current
) || atEnd())
2339 if (prev
== '/' && !lastWasEscape
&& !inBrackets
)
2342 if (lastWasEscape
) {
2343 lastWasEscape
= false;
2355 lastWasEscape
= true;
2360 while (isIdentPart(m_current
))
2366 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2367 template <typename T
>
2368 JSTokenType Lexer
<T
>::scanTrailingTemplateString(JSToken
* tokenRecord
, RawStringsBuildMode rawStringsBuildMode
)
2370 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
2371 JSTokenLocation
* tokenLocation
= &tokenRecord
->m_location
;
2373 ASSERT(m_buffer16
.isEmpty());
2375 // Leading closing brace } is already shifted in the previous token scan.
2376 // So in this re-scan phase, shift() is not needed here.
2377 StringParseResult result
= parseTemplateLiteral
<true>(tokenData
, rawStringsBuildMode
);
2378 JSTokenType token
= ERRORTOK
;
2379 if (UNLIKELY(result
!= StringParsedSuccessfully
)) {
2380 token
= result
== StringUnterminated
? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK
: INVALID_TEMPLATE_LITERAL_ERRORTOK
;
2384 m_lastToken
= token
;
2387 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2388 m_atLineStart
= false;
2390 // Adjust current tokenLocation data for TemplateString.
2391 tokenLocation
->line
= m_lineNumber
;
2392 tokenLocation
->endOffset
= currentOffset();
2393 tokenLocation
->lineStartOffset
= currentLineStartOffset();
2394 ASSERT(tokenLocation
->endOffset
>= tokenLocation
->lineStartOffset
);
2395 tokenRecord
->m_endPosition
= currentPosition();
2400 template <typename T
>
2401 void Lexer
<T
>::clear()
2405 Vector
<LChar
> newBuffer8
;
2406 m_buffer8
.swap(newBuffer8
);
2408 Vector
<UChar
> newBuffer16
;
2409 m_buffer16
.swap(newBuffer16
);
2411 Vector
<UChar
> newBufferForRawTemplateString16
;
2412 m_bufferForRawTemplateString16
.swap(newBufferForRawTemplateString16
);
2414 m_isReparsing
= false;
2417 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2418 template class Lexer
<LChar
>;
2419 template class Lexer
<UChar
>;