]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.cpp
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunctionInlines.h"
29
30 #include "BuiltinNames.h"
31 #include "JSGlobalObjectFunctions.h"
32 #include "Identifier.h"
33 #include "Nodes.h"
34 #include "JSCInlines.h"
35 #include <wtf/dtoa.h>
36 #include <ctype.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <wtf/Assertions.h>
40
41 #include "KeywordLookup.h"
42 #include "Lexer.lut.h"
43 #include "Parser.h"
44
45 namespace JSC {
46
47 Keywords::Keywords(VM& vm)
48 : m_vm(vm)
49 , m_keywordTable(JSC::mainTable)
50 {
51 }
52
53 enum CharacterType {
54 // Types for the main switch
55
56 // The first three types are fixed, and also used for identifying
57 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
58 CharacterIdentifierStart,
59 CharacterZero,
60 CharacterNumber,
61
62 CharacterInvalid,
63 CharacterLineTerminator,
64 CharacterExclamationMark,
65 CharacterOpenParen,
66 CharacterCloseParen,
67 CharacterOpenBracket,
68 CharacterCloseBracket,
69 CharacterComma,
70 CharacterColon,
71 CharacterQuestion,
72 CharacterTilde,
73 CharacterQuote,
74 CharacterBackQuote,
75 CharacterDot,
76 CharacterSlash,
77 CharacterBackSlash,
78 CharacterSemicolon,
79 CharacterOpenBrace,
80 CharacterCloseBrace,
81
82 CharacterAdd,
83 CharacterSub,
84 CharacterMultiply,
85 CharacterModulo,
86 CharacterAnd,
87 CharacterXor,
88 CharacterOr,
89 CharacterLess,
90 CharacterGreater,
91 CharacterEqual,
92
93 // Other types (only one so far)
94 CharacterWhiteSpace,
95 CharacterPrivateIdentifierStart
96 };
97
98 // 256 Latin-1 codes
99 static const unsigned short typesOfLatin1Characters[256] = {
100 /* 0 - Null */ CharacterInvalid,
101 /* 1 - Start of Heading */ CharacterInvalid,
102 /* 2 - Start of Text */ CharacterInvalid,
103 /* 3 - End of Text */ CharacterInvalid,
104 /* 4 - End of Transm. */ CharacterInvalid,
105 /* 5 - Enquiry */ CharacterInvalid,
106 /* 6 - Acknowledgment */ CharacterInvalid,
107 /* 7 - Bell */ CharacterInvalid,
108 /* 8 - Back Space */ CharacterInvalid,
109 /* 9 - Horizontal Tab */ CharacterWhiteSpace,
110 /* 10 - Line Feed */ CharacterLineTerminator,
111 /* 11 - Vertical Tab */ CharacterWhiteSpace,
112 /* 12 - Form Feed */ CharacterWhiteSpace,
113 /* 13 - Carriage Return */ CharacterLineTerminator,
114 /* 14 - Shift Out */ CharacterInvalid,
115 /* 15 - Shift In */ CharacterInvalid,
116 /* 16 - Data Line Escape */ CharacterInvalid,
117 /* 17 - Device Control 1 */ CharacterInvalid,
118 /* 18 - Device Control 2 */ CharacterInvalid,
119 /* 19 - Device Control 3 */ CharacterInvalid,
120 /* 20 - Device Control 4 */ CharacterInvalid,
121 /* 21 - Negative Ack. */ CharacterInvalid,
122 /* 22 - Synchronous Idle */ CharacterInvalid,
123 /* 23 - End of Transmit */ CharacterInvalid,
124 /* 24 - Cancel */ CharacterInvalid,
125 /* 25 - End of Medium */ CharacterInvalid,
126 /* 26 - Substitute */ CharacterInvalid,
127 /* 27 - Escape */ CharacterInvalid,
128 /* 28 - File Separator */ CharacterInvalid,
129 /* 29 - Group Separator */ CharacterInvalid,
130 /* 30 - Record Separator */ CharacterInvalid,
131 /* 31 - Unit Separator */ CharacterInvalid,
132 /* 32 - Space */ CharacterWhiteSpace,
133 /* 33 - ! */ CharacterExclamationMark,
134 /* 34 - " */ CharacterQuote,
135 /* 35 - # */ CharacterInvalid,
136 /* 36 - $ */ CharacterIdentifierStart,
137 /* 37 - % */ CharacterModulo,
138 /* 38 - & */ CharacterAnd,
139 /* 39 - ' */ CharacterQuote,
140 /* 40 - ( */ CharacterOpenParen,
141 /* 41 - ) */ CharacterCloseParen,
142 /* 42 - * */ CharacterMultiply,
143 /* 43 - + */ CharacterAdd,
144 /* 44 - , */ CharacterComma,
145 /* 45 - - */ CharacterSub,
146 /* 46 - . */ CharacterDot,
147 /* 47 - / */ CharacterSlash,
148 /* 48 - 0 */ CharacterZero,
149 /* 49 - 1 */ CharacterNumber,
150 /* 50 - 2 */ CharacterNumber,
151 /* 51 - 3 */ CharacterNumber,
152 /* 52 - 4 */ CharacterNumber,
153 /* 53 - 5 */ CharacterNumber,
154 /* 54 - 6 */ CharacterNumber,
155 /* 55 - 7 */ CharacterNumber,
156 /* 56 - 8 */ CharacterNumber,
157 /* 57 - 9 */ CharacterNumber,
158 /* 58 - : */ CharacterColon,
159 /* 59 - ; */ CharacterSemicolon,
160 /* 60 - < */ CharacterLess,
161 /* 61 - = */ CharacterEqual,
162 /* 62 - > */ CharacterGreater,
163 /* 63 - ? */ CharacterQuestion,
164 /* 64 - @ */ CharacterPrivateIdentifierStart,
165 /* 65 - A */ CharacterIdentifierStart,
166 /* 66 - B */ CharacterIdentifierStart,
167 /* 67 - C */ CharacterIdentifierStart,
168 /* 68 - D */ CharacterIdentifierStart,
169 /* 69 - E */ CharacterIdentifierStart,
170 /* 70 - F */ CharacterIdentifierStart,
171 /* 71 - G */ CharacterIdentifierStart,
172 /* 72 - H */ CharacterIdentifierStart,
173 /* 73 - I */ CharacterIdentifierStart,
174 /* 74 - J */ CharacterIdentifierStart,
175 /* 75 - K */ CharacterIdentifierStart,
176 /* 76 - L */ CharacterIdentifierStart,
177 /* 77 - M */ CharacterIdentifierStart,
178 /* 78 - N */ CharacterIdentifierStart,
179 /* 79 - O */ CharacterIdentifierStart,
180 /* 80 - P */ CharacterIdentifierStart,
181 /* 81 - Q */ CharacterIdentifierStart,
182 /* 82 - R */ CharacterIdentifierStart,
183 /* 83 - S */ CharacterIdentifierStart,
184 /* 84 - T */ CharacterIdentifierStart,
185 /* 85 - U */ CharacterIdentifierStart,
186 /* 86 - V */ CharacterIdentifierStart,
187 /* 87 - W */ CharacterIdentifierStart,
188 /* 88 - X */ CharacterIdentifierStart,
189 /* 89 - Y */ CharacterIdentifierStart,
190 /* 90 - Z */ CharacterIdentifierStart,
191 /* 91 - [ */ CharacterOpenBracket,
192 /* 92 - \ */ CharacterBackSlash,
193 /* 93 - ] */ CharacterCloseBracket,
194 /* 94 - ^ */ CharacterXor,
195 /* 95 - _ */ CharacterIdentifierStart,
196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
197 /* 96 - ` */ CharacterBackQuote,
198 #else
199 /* 96 - ` */ CharacterInvalid,
200 #endif
201 /* 97 - a */ CharacterIdentifierStart,
202 /* 98 - b */ CharacterIdentifierStart,
203 /* 99 - c */ CharacterIdentifierStart,
204 /* 100 - d */ CharacterIdentifierStart,
205 /* 101 - e */ CharacterIdentifierStart,
206 /* 102 - f */ CharacterIdentifierStart,
207 /* 103 - g */ CharacterIdentifierStart,
208 /* 104 - h */ CharacterIdentifierStart,
209 /* 105 - i */ CharacterIdentifierStart,
210 /* 106 - j */ CharacterIdentifierStart,
211 /* 107 - k */ CharacterIdentifierStart,
212 /* 108 - l */ CharacterIdentifierStart,
213 /* 109 - m */ CharacterIdentifierStart,
214 /* 110 - n */ CharacterIdentifierStart,
215 /* 111 - o */ CharacterIdentifierStart,
216 /* 112 - p */ CharacterIdentifierStart,
217 /* 113 - q */ CharacterIdentifierStart,
218 /* 114 - r */ CharacterIdentifierStart,
219 /* 115 - s */ CharacterIdentifierStart,
220 /* 116 - t */ CharacterIdentifierStart,
221 /* 117 - u */ CharacterIdentifierStart,
222 /* 118 - v */ CharacterIdentifierStart,
223 /* 119 - w */ CharacterIdentifierStart,
224 /* 120 - x */ CharacterIdentifierStart,
225 /* 121 - y */ CharacterIdentifierStart,
226 /* 122 - z */ CharacterIdentifierStart,
227 /* 123 - { */ CharacterOpenBrace,
228 /* 124 - | */ CharacterOr,
229 /* 125 - } */ CharacterCloseBrace,
230 /* 126 - ~ */ CharacterTilde,
231 /* 127 - Delete */ CharacterInvalid,
232 /* 128 - Cc category */ CharacterInvalid,
233 /* 129 - Cc category */ CharacterInvalid,
234 /* 130 - Cc category */ CharacterInvalid,
235 /* 131 - Cc category */ CharacterInvalid,
236 /* 132 - Cc category */ CharacterInvalid,
237 /* 133 - Cc category */ CharacterInvalid,
238 /* 134 - Cc category */ CharacterInvalid,
239 /* 135 - Cc category */ CharacterInvalid,
240 /* 136 - Cc category */ CharacterInvalid,
241 /* 137 - Cc category */ CharacterInvalid,
242 /* 138 - Cc category */ CharacterInvalid,
243 /* 139 - Cc category */ CharacterInvalid,
244 /* 140 - Cc category */ CharacterInvalid,
245 /* 141 - Cc category */ CharacterInvalid,
246 /* 142 - Cc category */ CharacterInvalid,
247 /* 143 - Cc category */ CharacterInvalid,
248 /* 144 - Cc category */ CharacterInvalid,
249 /* 145 - Cc category */ CharacterInvalid,
250 /* 146 - Cc category */ CharacterInvalid,
251 /* 147 - Cc category */ CharacterInvalid,
252 /* 148 - Cc category */ CharacterInvalid,
253 /* 149 - Cc category */ CharacterInvalid,
254 /* 150 - Cc category */ CharacterInvalid,
255 /* 151 - Cc category */ CharacterInvalid,
256 /* 152 - Cc category */ CharacterInvalid,
257 /* 153 - Cc category */ CharacterInvalid,
258 /* 154 - Cc category */ CharacterInvalid,
259 /* 155 - Cc category */ CharacterInvalid,
260 /* 156 - Cc category */ CharacterInvalid,
261 /* 157 - Cc category */ CharacterInvalid,
262 /* 158 - Cc category */ CharacterInvalid,
263 /* 159 - Cc category */ CharacterInvalid,
264 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
265 /* 161 - Po category */ CharacterInvalid,
266 /* 162 - Sc category */ CharacterInvalid,
267 /* 163 - Sc category */ CharacterInvalid,
268 /* 164 - Sc category */ CharacterInvalid,
269 /* 165 - Sc category */ CharacterInvalid,
270 /* 166 - So category */ CharacterInvalid,
271 /* 167 - So category */ CharacterInvalid,
272 /* 168 - Sk category */ CharacterInvalid,
273 /* 169 - So category */ CharacterInvalid,
274 /* 170 - Ll category */ CharacterIdentifierStart,
275 /* 171 - Pi category */ CharacterInvalid,
276 /* 172 - Sm category */ CharacterInvalid,
277 /* 173 - Cf category */ CharacterInvalid,
278 /* 174 - So category */ CharacterInvalid,
279 /* 175 - Sk category */ CharacterInvalid,
280 /* 176 - So category */ CharacterInvalid,
281 /* 177 - Sm category */ CharacterInvalid,
282 /* 178 - No category */ CharacterInvalid,
283 /* 179 - No category */ CharacterInvalid,
284 /* 180 - Sk category */ CharacterInvalid,
285 /* 181 - Ll category */ CharacterIdentifierStart,
286 /* 182 - So category */ CharacterInvalid,
287 /* 183 - Po category */ CharacterInvalid,
288 /* 184 - Sk category */ CharacterInvalid,
289 /* 185 - No category */ CharacterInvalid,
290 /* 186 - Ll category */ CharacterIdentifierStart,
291 /* 187 - Pf category */ CharacterInvalid,
292 /* 188 - No category */ CharacterInvalid,
293 /* 189 - No category */ CharacterInvalid,
294 /* 190 - No category */ CharacterInvalid,
295 /* 191 - Po category */ CharacterInvalid,
296 /* 192 - Lu category */ CharacterIdentifierStart,
297 /* 193 - Lu category */ CharacterIdentifierStart,
298 /* 194 - Lu category */ CharacterIdentifierStart,
299 /* 195 - Lu category */ CharacterIdentifierStart,
300 /* 196 - Lu category */ CharacterIdentifierStart,
301 /* 197 - Lu category */ CharacterIdentifierStart,
302 /* 198 - Lu category */ CharacterIdentifierStart,
303 /* 199 - Lu category */ CharacterIdentifierStart,
304 /* 200 - Lu category */ CharacterIdentifierStart,
305 /* 201 - Lu category */ CharacterIdentifierStart,
306 /* 202 - Lu category */ CharacterIdentifierStart,
307 /* 203 - Lu category */ CharacterIdentifierStart,
308 /* 204 - Lu category */ CharacterIdentifierStart,
309 /* 205 - Lu category */ CharacterIdentifierStart,
310 /* 206 - Lu category */ CharacterIdentifierStart,
311 /* 207 - Lu category */ CharacterIdentifierStart,
312 /* 208 - Lu category */ CharacterIdentifierStart,
313 /* 209 - Lu category */ CharacterIdentifierStart,
314 /* 210 - Lu category */ CharacterIdentifierStart,
315 /* 211 - Lu category */ CharacterIdentifierStart,
316 /* 212 - Lu category */ CharacterIdentifierStart,
317 /* 213 - Lu category */ CharacterIdentifierStart,
318 /* 214 - Lu category */ CharacterIdentifierStart,
319 /* 215 - Sm category */ CharacterInvalid,
320 /* 216 - Lu category */ CharacterIdentifierStart,
321 /* 217 - Lu category */ CharacterIdentifierStart,
322 /* 218 - Lu category */ CharacterIdentifierStart,
323 /* 219 - Lu category */ CharacterIdentifierStart,
324 /* 220 - Lu category */ CharacterIdentifierStart,
325 /* 221 - Lu category */ CharacterIdentifierStart,
326 /* 222 - Lu category */ CharacterIdentifierStart,
327 /* 223 - Ll category */ CharacterIdentifierStart,
328 /* 224 - Ll category */ CharacterIdentifierStart,
329 /* 225 - Ll category */ CharacterIdentifierStart,
330 /* 226 - Ll category */ CharacterIdentifierStart,
331 /* 227 - Ll category */ CharacterIdentifierStart,
332 /* 228 - Ll category */ CharacterIdentifierStart,
333 /* 229 - Ll category */ CharacterIdentifierStart,
334 /* 230 - Ll category */ CharacterIdentifierStart,
335 /* 231 - Ll category */ CharacterIdentifierStart,
336 /* 232 - Ll category */ CharacterIdentifierStart,
337 /* 233 - Ll category */ CharacterIdentifierStart,
338 /* 234 - Ll category */ CharacterIdentifierStart,
339 /* 235 - Ll category */ CharacterIdentifierStart,
340 /* 236 - Ll category */ CharacterIdentifierStart,
341 /* 237 - Ll category */ CharacterIdentifierStart,
342 /* 238 - Ll category */ CharacterIdentifierStart,
343 /* 239 - Ll category */ CharacterIdentifierStart,
344 /* 240 - Ll category */ CharacterIdentifierStart,
345 /* 241 - Ll category */ CharacterIdentifierStart,
346 /* 242 - Ll category */ CharacterIdentifierStart,
347 /* 243 - Ll category */ CharacterIdentifierStart,
348 /* 244 - Ll category */ CharacterIdentifierStart,
349 /* 245 - Ll category */ CharacterIdentifierStart,
350 /* 246 - Ll category */ CharacterIdentifierStart,
351 /* 247 - Sm category */ CharacterInvalid,
352 /* 248 - Ll category */ CharacterIdentifierStart,
353 /* 249 - Ll category */ CharacterIdentifierStart,
354 /* 250 - Ll category */ CharacterIdentifierStart,
355 /* 251 - Ll category */ CharacterIdentifierStart,
356 /* 252 - Ll category */ CharacterIdentifierStart,
357 /* 253 - Ll category */ CharacterIdentifierStart,
358 /* 254 - Ll category */ CharacterIdentifierStart,
359 /* 255 - Ll category */ CharacterIdentifierStart
360 };
361
362 // This table provides the character that results from \X where X is the index in the table beginning
363 // with SPACE. A table value of 0 means that more processing needs to be done.
364 static const LChar singleCharacterEscapeValuesForASCII[128] = {
365 /* 0 - Null */ 0,
366 /* 1 - Start of Heading */ 0,
367 /* 2 - Start of Text */ 0,
368 /* 3 - End of Text */ 0,
369 /* 4 - End of Transm. */ 0,
370 /* 5 - Enquiry */ 0,
371 /* 6 - Acknowledgment */ 0,
372 /* 7 - Bell */ 0,
373 /* 8 - Back Space */ 0,
374 /* 9 - Horizontal Tab */ 0,
375 /* 10 - Line Feed */ 0,
376 /* 11 - Vertical Tab */ 0,
377 /* 12 - Form Feed */ 0,
378 /* 13 - Carriage Return */ 0,
379 /* 14 - Shift Out */ 0,
380 /* 15 - Shift In */ 0,
381 /* 16 - Data Line Escape */ 0,
382 /* 17 - Device Control 1 */ 0,
383 /* 18 - Device Control 2 */ 0,
384 /* 19 - Device Control 3 */ 0,
385 /* 20 - Device Control 4 */ 0,
386 /* 21 - Negative Ack. */ 0,
387 /* 22 - Synchronous Idle */ 0,
388 /* 23 - End of Transmit */ 0,
389 /* 24 - Cancel */ 0,
390 /* 25 - End of Medium */ 0,
391 /* 26 - Substitute */ 0,
392 /* 27 - Escape */ 0,
393 /* 28 - File Separator */ 0,
394 /* 29 - Group Separator */ 0,
395 /* 30 - Record Separator */ 0,
396 /* 31 - Unit Separator */ 0,
397 /* 32 - Space */ ' ',
398 /* 33 - ! */ '!',
399 /* 34 - " */ '"',
400 /* 35 - # */ '#',
401 /* 36 - $ */ '$',
402 /* 37 - % */ '%',
403 /* 38 - & */ '&',
404 /* 39 - ' */ '\'',
405 /* 40 - ( */ '(',
406 /* 41 - ) */ ')',
407 /* 42 - * */ '*',
408 /* 43 - + */ '+',
409 /* 44 - , */ ',',
410 /* 45 - - */ '-',
411 /* 46 - . */ '.',
412 /* 47 - / */ '/',
413 /* 48 - 0 */ 0,
414 /* 49 - 1 */ 0,
415 /* 50 - 2 */ 0,
416 /* 51 - 3 */ 0,
417 /* 52 - 4 */ 0,
418 /* 53 - 5 */ 0,
419 /* 54 - 6 */ 0,
420 /* 55 - 7 */ 0,
421 /* 56 - 8 */ 0,
422 /* 57 - 9 */ 0,
423 /* 58 - : */ ':',
424 /* 59 - ; */ ';',
425 /* 60 - < */ '<',
426 /* 61 - = */ '=',
427 /* 62 - > */ '>',
428 /* 63 - ? */ '?',
429 /* 64 - @ */ '@',
430 /* 65 - A */ 'A',
431 /* 66 - B */ 'B',
432 /* 67 - C */ 'C',
433 /* 68 - D */ 'D',
434 /* 69 - E */ 'E',
435 /* 70 - F */ 'F',
436 /* 71 - G */ 'G',
437 /* 72 - H */ 'H',
438 /* 73 - I */ 'I',
439 /* 74 - J */ 'J',
440 /* 75 - K */ 'K',
441 /* 76 - L */ 'L',
442 /* 77 - M */ 'M',
443 /* 78 - N */ 'N',
444 /* 79 - O */ 'O',
445 /* 80 - P */ 'P',
446 /* 81 - Q */ 'Q',
447 /* 82 - R */ 'R',
448 /* 83 - S */ 'S',
449 /* 84 - T */ 'T',
450 /* 85 - U */ 'U',
451 /* 86 - V */ 'V',
452 /* 87 - W */ 'W',
453 /* 88 - X */ 'X',
454 /* 89 - Y */ 'Y',
455 /* 90 - Z */ 'Z',
456 /* 91 - [ */ '[',
457 /* 92 - \ */ '\\',
458 /* 93 - ] */ ']',
459 /* 94 - ^ */ '^',
460 /* 95 - _ */ '_',
461 /* 96 - ` */ '`',
462 /* 97 - a */ 'a',
463 /* 98 - b */ 0x08,
464 /* 99 - c */ 'c',
465 /* 100 - d */ 'd',
466 /* 101 - e */ 'e',
467 /* 102 - f */ 0x0C,
468 /* 103 - g */ 'g',
469 /* 104 - h */ 'h',
470 /* 105 - i */ 'i',
471 /* 106 - j */ 'j',
472 /* 107 - k */ 'k',
473 /* 108 - l */ 'l',
474 /* 109 - m */ 'm',
475 /* 110 - n */ 0x0A,
476 /* 111 - o */ 'o',
477 /* 112 - p */ 'p',
478 /* 113 - q */ 'q',
479 /* 114 - r */ 0x0D,
480 /* 115 - s */ 's',
481 /* 116 - t */ 0x09,
482 /* 117 - u */ 0,
483 /* 118 - v */ 0x0B,
484 /* 119 - w */ 'w',
485 /* 120 - x */ 0,
486 /* 121 - y */ 'y',
487 /* 122 - z */ 'z',
488 /* 123 - { */ '{',
489 /* 124 - | */ '|',
490 /* 125 - } */ '}',
491 /* 126 - ~ */ '~',
492 /* 127 - Delete */ 0
493 };
494
495 template <typename T>
496 Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
497 : m_isReparsing(false)
498 , m_vm(vm)
499 , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
500 {
501 }
502
503 static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
504 {
505 if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
506 return INTEGER;
507 return DOUBLE;
508 }
509
510 template <typename T>
511 Lexer<T>::~Lexer()
512 {
513 }
514
515 template <typename T>
516 String Lexer<T>::invalidCharacterMessage() const
517 {
518 switch (m_current) {
519 case 0:
520 return ASCIILiteral("Invalid character: '\\0'");
521 case 10:
522 return ASCIILiteral("Invalid character: '\\n'");
523 case 11:
524 return ASCIILiteral("Invalid character: '\\v'");
525 case 13:
526 return ASCIILiteral("Invalid character: '\\r'");
527 case 35:
528 return ASCIILiteral("Invalid character: '#'");
529 case 64:
530 return ASCIILiteral("Invalid character: '@'");
531 case 96:
532 return ASCIILiteral("Invalid character: '`'");
533 default:
534 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
535 }
536 }
537
538 template <typename T>
539 ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
540 {
541 ASSERT(m_code <= m_codeEnd);
542 return m_code;
543 }
544
545 template <typename T>
546 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
547 {
548 m_arena = &arena->identifierArena();
549
550 m_lineNumber = source.firstLine();
551 m_lastToken = -1;
552
553 const String& sourceString = source.provider()->source();
554
555 if (!sourceString.isNull())
556 setCodeStart(sourceString.impl());
557 else
558 m_codeStart = 0;
559
560 m_source = &source;
561 m_sourceOffset = source.startOffset();
562 m_codeStartPlusOffset = m_codeStart + source.startOffset();
563 m_code = m_codeStartPlusOffset;
564 m_codeEnd = m_codeStart + source.endOffset();
565 m_error = false;
566 m_atLineStart = true;
567 m_lineStart = m_code;
568 m_lexErrorMessage = String();
569
570 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
571 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
572 m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
573
574 if (LIKELY(m_code < m_codeEnd))
575 m_current = *m_code;
576 else
577 m_current = 0;
578 ASSERT(currentOffset() == source.startOffset());
579 }
580
581 template <typename T>
582 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
583 {
584 m_code += shiftAmount;
585 ASSERT(currentOffset() >= currentLineStartOffset());
586 m_current = *m_code;
587 }
588
589 template <typename T>
590 ALWAYS_INLINE void Lexer<T>::shift()
591 {
592 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
593 m_current = 0;
594 ++m_code;
595 if (LIKELY(m_code < m_codeEnd))
596 m_current = *m_code;
597 }
598
599 template <typename T>
600 ALWAYS_INLINE bool Lexer<T>::atEnd() const
601 {
602 ASSERT(!m_current || m_code < m_codeEnd);
603 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
604 }
605
606 template <typename T>
607 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
608 {
609 ASSERT(offset > 0 && offset < 5);
610 const T* code = m_code + offset;
611 return (code < m_codeEnd) ? *code : 0;
612 }
613
614 struct ParsedUnicodeEscapeValue {
615 ParsedUnicodeEscapeValue(UChar32 value)
616 : m_value(value)
617 {
618 ASSERT(isValid());
619 }
620
621 enum SpecialValueType { Incomplete = -2, Invalid = -1 };
622 ParsedUnicodeEscapeValue(SpecialValueType type)
623 : m_value(type)
624 {
625 }
626
627 bool isValid() const { return m_value >= 0; }
628 bool isIncomplete() const { return m_value == Incomplete; }
629
630 UChar32 value() const
631 {
632 ASSERT(isValid());
633 return m_value;
634 }
635
636 private:
637 UChar32 m_value;
638 };
639
640 template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
641 {
642 if (m_current == '{') {
643 shift();
644 UChar32 codePoint = 0;
645 do {
646 if (!isASCIIHexDigit(m_current))
647 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
648 codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
649 if (codePoint > UCHAR_MAX_VALUE)
650 return ParsedUnicodeEscapeValue::Invalid;
651 shift();
652 } while (m_current != '}');
653 shift();
654 return codePoint;
655 }
656
657 auto character2 = peek(1);
658 auto character3 = peek(2);
659 auto character4 = peek(3);
660 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4)))
661 return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
662 auto result = convertUnicode(m_current, character2, character3, character4);
663 shift();
664 shift();
665 shift();
666 shift();
667 return result;
668 }
669
670 template <typename T>
671 void Lexer<T>::shiftLineTerminator()
672 {
673 ASSERT(isLineTerminator(m_current));
674
675 m_positionBeforeLastNewline = currentPosition();
676 T prev = m_current;
677 shift();
678
679 // Allow both CRLF and LFCR.
680 if (prev + m_current == '\n' + '\r')
681 shift();
682
683 ++m_lineNumber;
684 }
685
686 template <typename T>
687 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
688 {
689 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
690 }
691
692 static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
693 {
694 return U_GET_GC_MASK(c) & U_GC_L_MASK;
695 }
696
697 static ALWAYS_INLINE bool isLatin1(LChar)
698 {
699 return true;
700 }
701
702 static ALWAYS_INLINE bool isLatin1(UChar c)
703 {
704 return c < 256;
705 }
706
707 static ALWAYS_INLINE bool isLatin1(UChar32 c)
708 {
709 return !(c & ~0xFF);
710 }
711
712 static inline bool isIdentStart(LChar c)
713 {
714 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
715 }
716
717 static inline bool isIdentStart(UChar32 c)
718 {
719 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
720 }
721
722 static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
723 {
724 // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
725 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
726 }
727
728 static ALWAYS_INLINE bool isIdentPart(LChar c)
729 {
730 // Character types are divided into two groups depending on whether they can be part of an
731 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
732 // part of an identifier. (See the CharacterType definition for more details.)
733 return typesOfLatin1Characters[c] <= CharacterNumber;
734 }
735
736 static ALWAYS_INLINE bool isIdentPart(UChar32 c)
737 {
738 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
739 }
740
741 static ALWAYS_INLINE bool isIdentPart(UChar c)
742 {
743 return isIdentPart(static_cast<UChar32>(c));
744 }
745
746 template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
747 {
748 if (isIdentPart(code[0]))
749 return true;
750
751 // Shortest sequence handled below is \u{0}, which is 5 characters.
752 if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
753 return false;
754
755 if (code[2] == '{') {
756 UChar32 codePoint = 0;
757 const CharacterType* pointer;
758 for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
759 auto digit = *pointer;
760 if (!isASCIIHexDigit(digit))
761 break;
762 codePoint = (codePoint << 4) | toASCIIHexValue(digit);
763 if (codePoint > UCHAR_MAX_VALUE)
764 return false;
765 }
766 return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
767 }
768
769 // Shortest sequence handled below is \uXXXX, which is 6 characters.
770 if (codeEnd - code < 6)
771 return false;
772
773 auto character1 = code[2];
774 auto character2 = code[3];
775 auto character3 = code[4];
776 auto character4 = code[5];
777 return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
778 && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
779 }
780
781 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
782 {
783 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
784 }
785
786 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
787 {
788 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
789 }
790
791 static inline LChar singleEscape(int c)
792 {
793 if (c < 128) {
794 ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
795 return singleCharacterEscapeValuesForASCII[c];
796 }
797 return 0;
798 }
799
800 template <typename T>
801 inline void Lexer<T>::record8(int c)
802 {
803 ASSERT(c >= 0);
804 ASSERT(c <= 0xFF);
805 m_buffer8.append(static_cast<LChar>(c));
806 }
807
808 template <typename T>
809 inline void assertCharIsIn8BitRange(T c)
810 {
811 UNUSED_PARAM(c);
812 ASSERT(c >= 0);
813 ASSERT(c <= 0xFF);
814 }
815
816 template <>
817 inline void assertCharIsIn8BitRange(UChar c)
818 {
819 UNUSED_PARAM(c);
820 ASSERT(c <= 0xFF);
821 }
822
823 template <>
824 inline void assertCharIsIn8BitRange(LChar)
825 {
826 }
827
828 template <typename T>
829 inline void Lexer<T>::append8(const T* p, size_t length)
830 {
831 size_t currentSize = m_buffer8.size();
832 m_buffer8.grow(currentSize + length);
833 LChar* rawBuffer = m_buffer8.data() + currentSize;
834
835 for (size_t i = 0; i < length; i++) {
836 T c = p[i];
837 assertCharIsIn8BitRange(c);
838 rawBuffer[i] = c;
839 }
840 }
841
842 template <typename T>
843 inline void Lexer<T>::append16(const LChar* p, size_t length)
844 {
845 size_t currentSize = m_buffer16.size();
846 m_buffer16.grow(currentSize + length);
847 UChar* rawBuffer = m_buffer16.data() + currentSize;
848
849 for (size_t i = 0; i < length; i++)
850 rawBuffer[i] = p[i];
851 }
852
853 template <typename T>
854 inline void Lexer<T>::record16(T c)
855 {
856 m_buffer16.append(c);
857 }
858
859 template <typename T>
860 inline void Lexer<T>::record16(int c)
861 {
862 ASSERT(c >= 0);
863 ASSERT(c <= static_cast<int>(USHRT_MAX));
864 m_buffer16.append(static_cast<UChar>(c));
865 }
866
867 template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
868 {
869 ASSERT(codePoint >= 0);
870 ASSERT(codePoint <= UCHAR_MAX_VALUE);
871 if (U_IS_BMP(codePoint))
872 record16(codePoint);
873 else {
874 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
875 append16(codeUnits, 2);
876 }
877 }
878
879 #if !ASSERT_DISABLED
880 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
881 {
882 if (!ident)
883 return true;
884 /* Just block any use of suspicious identifiers. This is intended to
885 * be used as a safety net while implementing builtins.
886 */
887 // FIXME: How can a debug-only assertion be a safety net?
888 if (*ident == vm.propertyNames->builtinNames().callPublicName())
889 return false;
890 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
891 return false;
892 if (*ident == vm.propertyNames->eval)
893 return false;
894 if (*ident == vm.propertyNames->Function)
895 return false;
896 return true;
897 }
898 #endif
899
900 template <>
901 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
902 {
903 const ptrdiff_t remaining = m_codeEnd - m_code;
904 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
905 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
906 if (keyword != IDENT) {
907 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
908 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
909 }
910 }
911
912 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
913 if (isPrivateName)
914 shift();
915
916 const LChar* identifierStart = currentSourcePtr();
917 unsigned identifierLineStart = currentLineStartOffset();
918
919 while (isIdentPart(m_current))
920 shift();
921
922 if (UNLIKELY(m_current == '\\')) {
923 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
924 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
925 }
926
927 const Identifier* ident = 0;
928
929 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
930 int identifierLength = currentSourcePtr() - identifierStart;
931 ident = makeIdentifier(identifierStart, identifierLength);
932 if (m_parsingBuiltinFunction) {
933 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
934 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
935 return ERRORTOK;
936 }
937 if (isPrivateName)
938 ident = m_vm->propertyNames->getPrivateName(*ident);
939 else if (*ident == m_vm->propertyNames->undefinedKeyword)
940 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
941 if (!ident)
942 return INVALID_PRIVATE_NAME_ERRORTOK;
943 }
944 tokenData->ident = ident;
945 } else
946 tokenData->ident = 0;
947
948 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
949 ASSERT(shouldCreateIdentifier);
950 if (remaining < maxTokenLength) {
951 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
952 ASSERT((remaining < maxTokenLength) || !entry);
953 if (!entry)
954 return IDENT;
955 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
956 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
957 }
958 return IDENT;
959 }
960
961 return IDENT;
962 }
963
964 template <>
965 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
966 {
967 const ptrdiff_t remaining = m_codeEnd - m_code;
968 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
969 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
970 if (keyword != IDENT) {
971 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
972 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
973 }
974 }
975
976 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
977 if (isPrivateName)
978 shift();
979
980 const UChar* identifierStart = currentSourcePtr();
981 int identifierLineStart = currentLineStartOffset();
982
983 UChar orAllChars = 0;
984
985 while (isIdentPart(m_current)) {
986 orAllChars |= m_current;
987 shift();
988 }
989
990 if (UNLIKELY(m_current == '\\')) {
991 ASSERT(!isPrivateName);
992 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
993 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
994 }
995
996 bool isAll8Bit = false;
997
998 if (!(orAllChars & ~0xff))
999 isAll8Bit = true;
1000
1001 const Identifier* ident = 0;
1002
1003 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
1004 int identifierLength = currentSourcePtr() - identifierStart;
1005 if (isAll8Bit)
1006 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1007 else
1008 ident = makeIdentifier(identifierStart, identifierLength);
1009 if (m_parsingBuiltinFunction) {
1010 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1011 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1012 return ERRORTOK;
1013 }
1014 if (isPrivateName)
1015 ident = m_vm->propertyNames->getPrivateName(*ident);
1016 else if (*ident == m_vm->propertyNames->undefinedKeyword)
1017 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
1018 if (!ident)
1019 return INVALID_PRIVATE_NAME_ERRORTOK;
1020 }
1021 tokenData->ident = ident;
1022 } else
1023 tokenData->ident = 0;
1024
1025 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
1026 ASSERT(shouldCreateIdentifier);
1027 if (remaining < maxTokenLength) {
1028 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1029 ASSERT((remaining < maxTokenLength) || !entry);
1030 if (!entry)
1031 return IDENT;
1032 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1033 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1034 }
1035 return IDENT;
1036 }
1037
1038 return IDENT;
1039 }
1040
1041 template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1042 {
1043 auto identifierStart = currentSourcePtr();
1044 bool bufferRequired = false;
1045
1046 while (true) {
1047 if (LIKELY(isIdentPart(m_current))) {
1048 shift();
1049 continue;
1050 }
1051 if (LIKELY(m_current != '\\'))
1052 break;
1053
1054 // \uXXXX unicode characters.
1055 bufferRequired = true;
1056 if (identifierStart != currentSourcePtr())
1057 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1058 shift();
1059 if (UNLIKELY(m_current != 'u'))
1060 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1061 shift();
1062 auto character = parseUnicodeEscape();
1063 if (UNLIKELY(!character.isValid()))
1064 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1065 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1066 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1067 if (shouldCreateIdentifier)
1068 recordUnicodeCodePoint(character.value());
1069 identifierStart = currentSourcePtr();
1070 }
1071
1072 int identifierLength;
1073 const Identifier* ident = nullptr;
1074 if (shouldCreateIdentifier) {
1075 if (!bufferRequired) {
1076 identifierLength = currentSourcePtr() - identifierStart;
1077 ident = makeIdentifier(identifierStart, identifierLength);
1078 } else {
1079 if (identifierStart != currentSourcePtr())
1080 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1081 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1082 }
1083
1084 tokenData->ident = ident;
1085 } else
1086 tokenData->ident = nullptr;
1087
1088 m_buffer16.shrink(0);
1089
1090 if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1091 ASSERT(shouldCreateIdentifier);
1092 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1093 if (!entry)
1094 return IDENT;
1095 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1096 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1097 }
1098
1099 return IDENT;
1100 }
1101
1102 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1103 {
1104 return character < 0xE;
1105 }
1106
1107 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1108 {
1109 return character < 0xE || character > 0xFF;
1110 }
1111
1112 template <typename T>
1113 template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1114 {
1115 int startingOffset = currentOffset();
1116 int startingLineStartOffset = currentLineStartOffset();
1117 int startingLineNumber = lineNumber();
1118 T stringQuoteCharacter = m_current;
1119 shift();
1120
1121 const T* stringStart = currentSourcePtr();
1122
1123 while (m_current != stringQuoteCharacter) {
1124 if (UNLIKELY(m_current == '\\')) {
1125 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1126 append8(stringStart, currentSourcePtr() - stringStart);
1127 shift();
1128
1129 LChar escape = singleEscape(m_current);
1130
1131 // Most common escape sequences first.
1132 if (escape) {
1133 if (shouldBuildStrings)
1134 record8(escape);
1135 shift();
1136 } else if (UNLIKELY(isLineTerminator(m_current)))
1137 shiftLineTerminator();
1138 else if (m_current == 'x') {
1139 shift();
1140 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1141 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1142 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1143 }
1144 T prev = m_current;
1145 shift();
1146 if (shouldBuildStrings)
1147 record8(convertHex(prev, m_current));
1148 shift();
1149 } else {
1150 setOffset(startingOffset, startingLineStartOffset);
1151 setLineNumber(startingLineNumber);
1152 m_buffer8.shrink(0);
1153 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1154 }
1155 stringStart = currentSourcePtr();
1156 continue;
1157 }
1158
1159 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1160 setOffset(startingOffset, startingLineStartOffset);
1161 setLineNumber(startingLineNumber);
1162 m_buffer8.shrink(0);
1163 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1164 }
1165
1166 shift();
1167 }
1168
1169 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1170 append8(stringStart, currentSourcePtr() - stringStart);
1171 if (shouldBuildStrings) {
1172 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1173 m_buffer8.shrink(0);
1174 } else
1175 tokenData->ident = 0;
1176
1177 return StringParsedSuccessfully;
1178 }
1179
1180 template <typename T>
1181 template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
1182 {
1183 if (m_current == 'x') {
1184 shift();
1185 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1186 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1187 return StringCannotBeParsed;
1188 }
1189 T prev = m_current;
1190 shift();
1191 if (shouldBuildStrings)
1192 record16(convertHex(prev, m_current));
1193 shift();
1194 return StringParsedSuccessfully;
1195 }
1196
1197 if (m_current == 'u') {
1198 shift();
1199
1200 if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
1201 if (shouldBuildStrings)
1202 record16('u');
1203 return StringParsedSuccessfully;
1204 }
1205
1206 auto character = parseUnicodeEscape();
1207 if (character.isValid()) {
1208 if (shouldBuildStrings)
1209 recordUnicodeCodePoint(character.value());
1210 return StringParsedSuccessfully;
1211 }
1212
1213 m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1214 return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed;
1215 }
1216
1217 if (strictMode) {
1218 if (isASCIIDigit(m_current)) {
1219 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1220 int character1 = m_current;
1221 shift();
1222 if (character1 != '0' || isASCIIDigit(m_current)) {
1223 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1224 return StringCannotBeParsed;
1225 }
1226 if (shouldBuildStrings)
1227 record16(0);
1228 return StringParsedSuccessfully;
1229 }
1230 } else {
1231 if (isASCIIOctalDigit(m_current)) {
1232 // Octal character sequences
1233 T character1 = m_current;
1234 shift();
1235 if (isASCIIOctalDigit(m_current)) {
1236 // Two octal characters
1237 T character2 = m_current;
1238 shift();
1239 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1240 if (shouldBuildStrings)
1241 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1242 shift();
1243 } else {
1244 if (shouldBuildStrings)
1245 record16((character1 - '0') * 8 + character2 - '0');
1246 }
1247 } else {
1248 if (shouldBuildStrings)
1249 record16(character1 - '0');
1250 }
1251 return StringParsedSuccessfully;
1252 }
1253 }
1254
1255 if (!atEnd()) {
1256 if (shouldBuildStrings)
1257 record16(m_current);
1258 shift();
1259 return StringParsedSuccessfully;
1260 }
1261
1262 m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
1263 return StringUnterminated;
1264 }
1265
1266 template <typename T>
1267 template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1268 {
1269 T stringQuoteCharacter = m_current;
1270 shift();
1271
1272 const T* stringStart = currentSourcePtr();
1273
1274 while (m_current != stringQuoteCharacter) {
1275 if (UNLIKELY(m_current == '\\')) {
1276 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1277 append16(stringStart, currentSourcePtr() - stringStart);
1278 shift();
1279
1280 LChar escape = singleEscape(m_current);
1281
1282 // Most common escape sequences first
1283 if (escape) {
1284 if (shouldBuildStrings)
1285 record16(escape);
1286 shift();
1287 } else if (UNLIKELY(isLineTerminator(m_current)))
1288 shiftLineTerminator();
1289 else {
1290 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
1291 if (result != StringParsedSuccessfully)
1292 return result;
1293 }
1294
1295 stringStart = currentSourcePtr();
1296 continue;
1297 }
1298 // Fast check for characters that require special handling.
1299 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1300 // as possible, and lets through all common ASCII characters.
1301 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1302 // New-line or end of input is not allowed
1303 if (atEnd() || isLineTerminator(m_current)) {
1304 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1305 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1306 }
1307 // Anything else is just a normal character
1308 }
1309 shift();
1310 }
1311
1312 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1313 append16(stringStart, currentSourcePtr() - stringStart);
1314 if (shouldBuildStrings)
1315 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1316 else
1317 tokenData->ident = 0;
1318
1319 m_buffer16.shrink(0);
1320 return StringParsedSuccessfully;
1321 }
1322
1323 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
1324 // While the lexer accepts <LF><CR> (not <CR><LF>) sequence
1325 // as one line terminator and increments one line number,
1326 // TemplateLiteral considers it as two line terminators <LF> and <CR>.
1327 //
1328 // TemplateLiteral normalizes line terminators as follows.
1329 //
1330 // <LF> => <LF>
1331 // <CR> => <LF>
1332 // <CR><LF> => <LF>
1333 // <\u2028> => <\u2028>
1334 // <\u2029> => <\u2029>
1335 //
1336 // So, <LF><CR> should be normalized to <LF><LF>.
1337 // However, the lexer should increment the line number only once for <LF><CR>.
1338 //
1339 // To achieve this, LineNumberAdder holds the current status of line terminator sequence.
1340 // When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
1341 // LineNumberAdder maintains the status and increments the line number when it's necessary.
1342 // For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
1343 template<typename CharacterType>
1344 class LineNumberAdder {
1345 public:
1346 LineNumberAdder(int& lineNumber)
1347 : m_lineNumber(lineNumber)
1348 {
1349 }
1350
1351 void clear()
1352 {
1353 m_previous = 0;
1354 }
1355
1356 void add(CharacterType character)
1357 {
1358 ASSERT(Lexer<CharacterType>::isLineTerminator(character));
1359 if ((character + m_previous) == ('\n' + '\r'))
1360 m_previous = 0;
1361 else {
1362 ++m_lineNumber;
1363 m_previous = character;
1364 }
1365 }
1366
1367 private:
1368 int& m_lineNumber;
1369 CharacterType m_previous { 0 };
1370 };
1371
1372 template <typename T>
1373 template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1374 {
1375 const T* stringStart = currentSourcePtr();
1376 const T* rawStringStart = currentSourcePtr();
1377
1378 LineNumberAdder<T> lineNumberAdder(m_lineNumber);
1379
1380 while (m_current != '`') {
1381 if (UNLIKELY(m_current == '\\')) {
1382 lineNumberAdder.clear();
1383 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1384 append16(stringStart, currentSourcePtr() - stringStart);
1385 shift();
1386
1387 LChar escape = singleEscape(m_current);
1388
1389 // Most common escape sequences first.
1390 if (escape) {
1391 if (shouldBuildStrings)
1392 record16(escape);
1393 shift();
1394 } else if (UNLIKELY(isLineTerminator(m_current))) {
1395 if (m_current == '\r') {
1396 lineNumberAdder.add(m_current);
1397 shift();
1398 if (m_current == '\n') {
1399 lineNumberAdder.add(m_current);
1400 shift();
1401 }
1402 } else {
1403 lineNumberAdder.add(m_current);
1404 shift();
1405 }
1406 } else {
1407 bool strictMode = true;
1408 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
1409 if (result != StringParsedSuccessfully)
1410 return result;
1411 }
1412
1413 stringStart = currentSourcePtr();
1414 continue;
1415 }
1416
1417 if (m_current == '$' && peek(1) == '{')
1418 break;
1419
1420 // Fast check for characters that require special handling.
1421 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1422 // as possible, and lets through all common ASCII characters.
1423 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1424 // End of input is not allowed.
1425 // Unlike String, line terminator is allowed.
1426 if (atEnd()) {
1427 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1428 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1429 }
1430
1431 if (isLineTerminator(m_current)) {
1432 if (m_current == '\r') {
1433 // Normalize <CR>, <CR><LF> to <LF>.
1434 if (shouldBuildStrings) {
1435 if (stringStart != currentSourcePtr())
1436 append16(stringStart, currentSourcePtr() - stringStart);
1437 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1438 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1439
1440 record16('\n');
1441 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1442 m_bufferForRawTemplateString16.append('\n');
1443 }
1444 lineNumberAdder.add(m_current);
1445 shift();
1446 if (m_current == '\n') {
1447 lineNumberAdder.add(m_current);
1448 shift();
1449 }
1450 stringStart = currentSourcePtr();
1451 rawStringStart = currentSourcePtr();
1452 } else {
1453 lineNumberAdder.add(m_current);
1454 shift();
1455 }
1456 continue;
1457 }
1458 // Anything else is just a normal character
1459 }
1460
1461 lineNumberAdder.clear();
1462 shift();
1463 }
1464
1465 bool isTail = m_current == '`';
1466
1467 if (shouldBuildStrings) {
1468 if (currentSourcePtr() != stringStart)
1469 append16(stringStart, currentSourcePtr() - stringStart);
1470 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1471 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1472 }
1473
1474 if (shouldBuildStrings) {
1475 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1476 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1477 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1478 tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1479 else
1480 tokenData->raw = makeEmptyIdentifier();
1481 } else {
1482 tokenData->cooked = makeEmptyIdentifier();
1483 tokenData->raw = makeEmptyIdentifier();
1484 }
1485 tokenData->isTail = isTail;
1486
1487 m_buffer16.shrink(0);
1488 m_bufferForRawTemplateString16.shrink(0);
1489
1490 if (isTail) {
1491 // Skip `
1492 shift();
1493 } else {
1494 // Skip $ and {
1495 shift();
1496 shift();
1497 }
1498
1499 return StringParsedSuccessfully;
1500 }
1501 #endif
1502
1503 template <typename T>
1504 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1505 {
1506 // Optimization: most hexadecimal values fit into 4 bytes.
1507 uint32_t hexValue = 0;
1508 int maximumDigits = 7;
1509
1510 do {
1511 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1512 shift();
1513 --maximumDigits;
1514 } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1515
1516 if (maximumDigits >= 0) {
1517 returnValue = hexValue;
1518 return;
1519 }
1520
1521 // No more place in the hexValue buffer.
1522 // The values are shifted out and placed into the m_buffer8 vector.
1523 for (int i = 0; i < 8; ++i) {
1524 int digit = hexValue >> 28;
1525 if (digit < 10)
1526 record8(digit + '0');
1527 else
1528 record8(digit - 10 + 'a');
1529 hexValue <<= 4;
1530 }
1531
1532 while (isASCIIHexDigit(m_current)) {
1533 record8(m_current);
1534 shift();
1535 }
1536
1537 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1538 }
1539
1540 template <typename T>
1541 ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
1542 {
1543 // Optimization: most binary values fit into 4 bytes.
1544 uint32_t binaryValue = 0;
1545 const unsigned maximumDigits = 32;
1546 int digit = maximumDigits - 1;
1547 // Temporary buffer for the digits. Makes easier
1548 // to reconstruct the input characters when needed.
1549 LChar digits[maximumDigits];
1550
1551 do {
1552 binaryValue = (binaryValue << 1) + (m_current - '0');
1553 digits[digit] = m_current;
1554 shift();
1555 --digit;
1556 } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1557
1558 if (!isASCIIDigit(m_current) && digit >= 0) {
1559 returnValue = binaryValue;
1560 return true;
1561 }
1562
1563 for (int i = maximumDigits - 1; i > digit; --i)
1564 record8(digits[i]);
1565
1566 while (isASCIIBinaryDigit(m_current)) {
1567 record8(m_current);
1568 shift();
1569 }
1570
1571 if (isASCIIDigit(m_current))
1572 return false;
1573
1574 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
1575 return true;
1576 }
1577
1578 template <typename T>
1579 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1580 {
1581 // Optimization: most octal values fit into 4 bytes.
1582 uint32_t octalValue = 0;
1583 const unsigned maximumDigits = 10;
1584 int digit = maximumDigits - 1;
1585 // Temporary buffer for the digits. Makes easier
1586 // to reconstruct the input characters when needed.
1587 LChar digits[maximumDigits];
1588
1589 do {
1590 octalValue = octalValue * 8 + (m_current - '0');
1591 digits[digit] = m_current;
1592 shift();
1593 --digit;
1594 } while (isASCIIOctalDigit(m_current) && digit >= 0);
1595
1596 if (!isASCIIDigit(m_current) && digit >= 0) {
1597 returnValue = octalValue;
1598 return true;
1599 }
1600
1601 for (int i = maximumDigits - 1; i > digit; --i)
1602 record8(digits[i]);
1603
1604 while (isASCIIOctalDigit(m_current)) {
1605 record8(m_current);
1606 shift();
1607 }
1608
1609 if (isASCIIDigit(m_current))
1610 return false;
1611
1612 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1613 return true;
1614 }
1615
1616 template <typename T>
1617 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1618 {
1619 // Optimization: most decimal values fit into 4 bytes.
1620 uint32_t decimalValue = 0;
1621
1622 // Since parseOctal may be executed before parseDecimal,
1623 // the m_buffer8 may hold ascii digits.
1624 if (!m_buffer8.size()) {
1625 const unsigned maximumDigits = 10;
1626 int digit = maximumDigits - 1;
1627 // Temporary buffer for the digits. Makes easier
1628 // to reconstruct the input characters when needed.
1629 LChar digits[maximumDigits];
1630
1631 do {
1632 decimalValue = decimalValue * 10 + (m_current - '0');
1633 digits[digit] = m_current;
1634 shift();
1635 --digit;
1636 } while (isASCIIDigit(m_current) && digit >= 0);
1637
1638 if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1639 returnValue = decimalValue;
1640 return true;
1641 }
1642
1643 for (int i = maximumDigits - 1; i > digit; --i)
1644 record8(digits[i]);
1645 }
1646
1647 while (isASCIIDigit(m_current)) {
1648 record8(m_current);
1649 shift();
1650 }
1651
1652 return false;
1653 }
1654
1655 template <typename T>
1656 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1657 {
1658 record8('.');
1659 while (isASCIIDigit(m_current)) {
1660 record8(m_current);
1661 shift();
1662 }
1663 }
1664
1665 template <typename T>
1666 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1667 {
1668 record8('e');
1669 shift();
1670 if (m_current == '+' || m_current == '-') {
1671 record8(m_current);
1672 shift();
1673 }
1674
1675 if (!isASCIIDigit(m_current))
1676 return false;
1677
1678 do {
1679 record8(m_current);
1680 shift();
1681 } while (isASCIIDigit(m_current));
1682 return true;
1683 }
1684
1685 template <typename T>
1686 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1687 {
1688 while (true) {
1689 while (UNLIKELY(m_current == '*')) {
1690 shift();
1691 if (m_current == '/') {
1692 shift();
1693 return true;
1694 }
1695 }
1696
1697 if (atEnd())
1698 return false;
1699
1700 if (isLineTerminator(m_current)) {
1701 shiftLineTerminator();
1702 m_terminator = true;
1703 } else
1704 shift();
1705 }
1706 }
1707
1708 template <typename T>
1709 bool Lexer<T>::nextTokenIsColon()
1710 {
1711 const T* code = m_code;
1712 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1713 code++;
1714
1715 return code < m_codeEnd && *code == ':';
1716 }
1717
1718 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1719 template <typename T>
1720 void Lexer<T>::setTokenPosition(JSToken* tokenRecord)
1721 {
1722 JSTokenData* tokenData = &tokenRecord->m_data;
1723 tokenData->line = lineNumber();
1724 tokenData->offset = currentOffset();
1725 tokenData->lineStartOffset = currentLineStartOffset();
1726 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1727 }
1728 #endif
1729
1730 template <typename T>
1731 JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1732 {
1733 JSTokenData* tokenData = &tokenRecord->m_data;
1734 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1735 m_lastTockenLocation = JSTokenLocation(tokenRecord->m_location);
1736
1737 ASSERT(!m_error);
1738 ASSERT(m_buffer8.isEmpty());
1739 ASSERT(m_buffer16.isEmpty());
1740
1741 JSTokenType token = ERRORTOK;
1742 m_terminator = false;
1743
1744 start:
1745 while (isWhiteSpace(m_current))
1746 shift();
1747
1748 if (atEnd())
1749 return EOFTOK;
1750
1751 tokenLocation->startOffset = currentOffset();
1752 ASSERT(currentOffset() >= currentLineStartOffset());
1753 tokenRecord->m_startPosition = currentPosition();
1754
1755 CharacterType type;
1756 if (LIKELY(isLatin1(m_current)))
1757 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1758 else if (isNonLatin1IdentStart(m_current))
1759 type = CharacterIdentifierStart;
1760 else if (isLineTerminator(m_current))
1761 type = CharacterLineTerminator;
1762 else
1763 type = CharacterInvalid;
1764
1765 switch (type) {
1766 case CharacterGreater:
1767 shift();
1768 if (m_current == '>') {
1769 shift();
1770 if (m_current == '>') {
1771 shift();
1772 if (m_current == '=') {
1773 shift();
1774 token = URSHIFTEQUAL;
1775 break;
1776 }
1777 token = URSHIFT;
1778 break;
1779 }
1780 if (m_current == '=') {
1781 shift();
1782 token = RSHIFTEQUAL;
1783 break;
1784 }
1785 token = RSHIFT;
1786 break;
1787 }
1788 if (m_current == '=') {
1789 shift();
1790 token = GE;
1791 break;
1792 }
1793 token = GT;
1794 break;
1795 case CharacterEqual: {
1796 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1797 if (peek(1) == '>') {
1798 token = ARROWFUNCTION;
1799 tokenData->line = lineNumber();
1800 tokenData->offset = currentOffset();
1801 tokenData->lineStartOffset = currentLineStartOffset();
1802 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1803 shift();
1804 shift();
1805 break;
1806 }
1807 #endif
1808 shift();
1809 if (m_current == '=') {
1810 shift();
1811 if (m_current == '=') {
1812 shift();
1813 token = STREQ;
1814 break;
1815 }
1816 token = EQEQ;
1817 break;
1818 }
1819 token = EQUAL;
1820 break;
1821 }
1822 case CharacterLess:
1823 shift();
1824 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1825 // <!-- marks the beginning of a line comment (for www usage)
1826 goto inSingleLineComment;
1827 }
1828 if (m_current == '<') {
1829 shift();
1830 if (m_current == '=') {
1831 shift();
1832 token = LSHIFTEQUAL;
1833 break;
1834 }
1835 token = LSHIFT;
1836 break;
1837 }
1838 if (m_current == '=') {
1839 shift();
1840 token = LE;
1841 break;
1842 }
1843 token = LT;
1844 break;
1845 case CharacterExclamationMark:
1846 shift();
1847 if (m_current == '=') {
1848 shift();
1849 if (m_current == '=') {
1850 shift();
1851 token = STRNEQ;
1852 break;
1853 }
1854 token = NE;
1855 break;
1856 }
1857 token = EXCLAMATION;
1858 break;
1859 case CharacterAdd:
1860 shift();
1861 if (m_current == '+') {
1862 shift();
1863 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1864 break;
1865 }
1866 if (m_current == '=') {
1867 shift();
1868 token = PLUSEQUAL;
1869 break;
1870 }
1871 token = PLUS;
1872 break;
1873 case CharacterSub:
1874 shift();
1875 if (m_current == '-') {
1876 shift();
1877 if (m_atLineStart && m_current == '>') {
1878 shift();
1879 goto inSingleLineComment;
1880 }
1881 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1882 break;
1883 }
1884 if (m_current == '=') {
1885 shift();
1886 token = MINUSEQUAL;
1887 break;
1888 }
1889 token = MINUS;
1890 break;
1891 case CharacterMultiply:
1892 shift();
1893 if (m_current == '=') {
1894 shift();
1895 token = MULTEQUAL;
1896 break;
1897 }
1898 token = TIMES;
1899 break;
1900 case CharacterSlash:
1901 shift();
1902 if (m_current == '/') {
1903 shift();
1904 goto inSingleLineComment;
1905 }
1906 if (m_current == '*') {
1907 shift();
1908 if (parseMultilineComment())
1909 goto start;
1910 m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
1911 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
1912 goto returnError;
1913 }
1914 if (m_current == '=') {
1915 shift();
1916 token = DIVEQUAL;
1917 break;
1918 }
1919 token = DIVIDE;
1920 break;
1921 case CharacterAnd:
1922 shift();
1923 if (m_current == '&') {
1924 shift();
1925 token = AND;
1926 break;
1927 }
1928 if (m_current == '=') {
1929 shift();
1930 token = ANDEQUAL;
1931 break;
1932 }
1933 token = BITAND;
1934 break;
1935 case CharacterXor:
1936 shift();
1937 if (m_current == '=') {
1938 shift();
1939 token = XOREQUAL;
1940 break;
1941 }
1942 token = BITXOR;
1943 break;
1944 case CharacterModulo:
1945 shift();
1946 if (m_current == '=') {
1947 shift();
1948 token = MODEQUAL;
1949 break;
1950 }
1951 token = MOD;
1952 break;
1953 case CharacterOr:
1954 shift();
1955 if (m_current == '=') {
1956 shift();
1957 token = OREQUAL;
1958 break;
1959 }
1960 if (m_current == '|') {
1961 shift();
1962 token = OR;
1963 break;
1964 }
1965 token = BITOR;
1966 break;
1967 case CharacterOpenParen:
1968 token = OPENPAREN;
1969 shift();
1970 break;
1971 case CharacterCloseParen:
1972 token = CLOSEPAREN;
1973 shift();
1974 break;
1975 case CharacterOpenBracket:
1976 token = OPENBRACKET;
1977 shift();
1978 break;
1979 case CharacterCloseBracket:
1980 token = CLOSEBRACKET;
1981 shift();
1982 break;
1983 case CharacterComma:
1984 token = COMMA;
1985 shift();
1986 break;
1987 case CharacterColon:
1988 token = COLON;
1989 shift();
1990 break;
1991 case CharacterQuestion:
1992 token = QUESTION;
1993 shift();
1994 break;
1995 case CharacterTilde:
1996 token = TILDE;
1997 shift();
1998 break;
1999 case CharacterSemicolon:
2000 shift();
2001 token = SEMICOLON;
2002 break;
2003 case CharacterOpenBrace:
2004 tokenData->line = lineNumber();
2005 tokenData->offset = currentOffset();
2006 tokenData->lineStartOffset = currentLineStartOffset();
2007 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2008 shift();
2009 token = OPENBRACE;
2010 break;
2011 case CharacterCloseBrace:
2012 tokenData->line = lineNumber();
2013 tokenData->offset = currentOffset();
2014 tokenData->lineStartOffset = currentLineStartOffset();
2015 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2016 shift();
2017 token = CLOSEBRACE;
2018 break;
2019 case CharacterDot:
2020 shift();
2021 if (!isASCIIDigit(m_current)) {
2022 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2023 shift();
2024 shift();
2025 token = DOTDOTDOT;
2026 break;
2027 }
2028 token = DOT;
2029 break;
2030 }
2031 goto inNumberAfterDecimalPoint;
2032 case CharacterZero:
2033 shift();
2034 if ((m_current | 0x20) == 'x') {
2035 if (!isASCIIHexDigit(peek(1))) {
2036 m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
2037 token = INVALID_HEX_NUMBER_ERRORTOK;
2038 goto returnError;
2039 }
2040
2041 // Shift out the 'x' prefix.
2042 shift();
2043
2044 parseHex(tokenData->doubleValue);
2045 if (isIdentStart(m_current)) {
2046 m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
2047 token = INVALID_HEX_NUMBER_ERRORTOK;
2048 goto returnError;
2049 }
2050 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2051 m_buffer8.shrink(0);
2052 break;
2053 }
2054 if ((m_current | 0x20) == 'b') {
2055 if (!isASCIIBinaryDigit(peek(1))) {
2056 m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
2057 token = INVALID_BINARY_NUMBER_ERRORTOK;
2058 goto returnError;
2059 }
2060
2061 // Shift out the 'b' prefix.
2062 shift();
2063
2064 parseBinary(tokenData->doubleValue);
2065 if (isIdentStart(m_current)) {
2066 m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
2067 token = INVALID_BINARY_NUMBER_ERRORTOK;
2068 goto returnError;
2069 }
2070 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2071 m_buffer8.shrink(0);
2072 break;
2073 }
2074
2075 if ((m_current | 0x20) == 'o') {
2076 if (!isASCIIOctalDigit(peek(1))) {
2077 m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
2078 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2079 goto returnError;
2080 }
2081
2082 // Shift out the 'o' prefix.
2083 shift();
2084
2085 parseOctal(tokenData->doubleValue);
2086 if (isIdentStart(m_current)) {
2087 m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
2088 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2089 goto returnError;
2090 }
2091 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2092 m_buffer8.shrink(0);
2093 break;
2094 }
2095
2096 record8('0');
2097 if (strictMode && isASCIIDigit(m_current)) {
2098 m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
2099 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2100 goto returnError;
2101 }
2102 if (isASCIIOctalDigit(m_current)) {
2103 if (parseOctal(tokenData->doubleValue)) {
2104 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2105 }
2106 }
2107 FALLTHROUGH;
2108 case CharacterNumber:
2109 if (LIKELY(token != INTEGER && token != DOUBLE)) {
2110 if (!parseDecimal(tokenData->doubleValue)) {
2111 token = INTEGER;
2112 if (m_current == '.') {
2113 shift();
2114 inNumberAfterDecimalPoint:
2115 parseNumberAfterDecimalPoint();
2116 token = DOUBLE;
2117 }
2118 if ((m_current | 0x20) == 'e') {
2119 if (!parseNumberAfterExponentIndicator()) {
2120 m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
2121 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2122 goto returnError;
2123 }
2124 }
2125 size_t parsedLength;
2126 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2127 if (token == INTEGER)
2128 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2129 } else
2130 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2131 }
2132
2133 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
2134 if (UNLIKELY(isIdentStart(m_current))) {
2135 m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
2136 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2137 goto returnError;
2138 }
2139 m_buffer8.shrink(0);
2140 break;
2141 case CharacterQuote: {
2142 StringParseResult result = StringCannotBeParsed;
2143 if (lexerFlags & LexerFlagsDontBuildStrings)
2144 result = parseString<false>(tokenData, strictMode);
2145 else
2146 result = parseString<true>(tokenData, strictMode);
2147
2148 if (UNLIKELY(result != StringParsedSuccessfully)) {
2149 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2150 goto returnError;
2151 }
2152 shift();
2153 token = STRING;
2154 break;
2155 }
2156 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2157 case CharacterBackQuote: {
2158 // Skip backquote.
2159 shift();
2160 StringParseResult result = StringCannotBeParsed;
2161 if (lexerFlags & LexerFlagsDontBuildStrings)
2162 result = parseTemplateLiteral<false>(tokenData, RawStringsBuildMode::BuildRawStrings);
2163 else
2164 result = parseTemplateLiteral<true>(tokenData, RawStringsBuildMode::BuildRawStrings);
2165
2166 if (UNLIKELY(result != StringParsedSuccessfully)) {
2167 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2168 goto returnError;
2169 }
2170 token = TEMPLATE;
2171 break;
2172 }
2173 #endif
2174 case CharacterIdentifierStart:
2175 ASSERT(isIdentStart(m_current));
2176 FALLTHROUGH;
2177 case CharacterBackSlash:
2178 parseIdent:
2179 if (lexerFlags & LexexFlagsDontBuildKeywords)
2180 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2181 else
2182 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2183 break;
2184 case CharacterLineTerminator:
2185 ASSERT(isLineTerminator(m_current));
2186 shiftLineTerminator();
2187 m_atLineStart = true;
2188 m_terminator = true;
2189 m_lineStart = m_code;
2190 goto start;
2191 case CharacterPrivateIdentifierStart:
2192 if (m_parsingBuiltinFunction)
2193 goto parseIdent;
2194
2195 FALLTHROUGH;
2196 case CharacterInvalid:
2197 m_lexErrorMessage = invalidCharacterMessage();
2198 token = ERRORTOK;
2199 goto returnError;
2200 default:
2201 RELEASE_ASSERT_NOT_REACHED();
2202 m_lexErrorMessage = ASCIILiteral("Internal Error");
2203 token = ERRORTOK;
2204 goto returnError;
2205 }
2206
2207 m_atLineStart = false;
2208 goto returnToken;
2209
2210 inSingleLineComment:
2211 while (!isLineTerminator(m_current)) {
2212 if (atEnd())
2213 return EOFTOK;
2214 shift();
2215 }
2216 shiftLineTerminator();
2217 m_atLineStart = true;
2218 m_terminator = true;
2219 m_lineStart = m_code;
2220 if (!lastTokenWasRestrKeyword())
2221 goto start;
2222
2223 token = SEMICOLON;
2224 // Fall through into returnToken.
2225
2226 returnToken:
2227 tokenLocation->line = m_lineNumber;
2228 tokenLocation->endOffset = currentOffset();
2229 tokenLocation->lineStartOffset = currentLineStartOffset();
2230 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2231 tokenRecord->m_endPosition = currentPosition();
2232 m_lastToken = token;
2233 return token;
2234
2235 returnError:
2236 m_error = true;
2237 tokenLocation->line = m_lineNumber;
2238 tokenLocation->endOffset = currentOffset();
2239 tokenLocation->lineStartOffset = currentLineStartOffset();
2240 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2241 tokenRecord->m_endPosition = currentPosition();
2242 RELEASE_ASSERT(token & ErrorTokenFlag);
2243 return token;
2244 }
2245
2246 template <typename T>
2247 static inline void orCharacter(UChar&, UChar);
2248
2249 template <>
2250 inline void orCharacter<LChar>(UChar&, UChar) { }
2251
2252 template <>
2253 inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2254 {
2255 orAccumulator |= character;
2256 }
2257
2258 template <typename T>
2259 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
2260 {
2261 ASSERT(m_buffer16.isEmpty());
2262
2263 bool lastWasEscape = false;
2264 bool inBrackets = false;
2265 UChar charactersOredTogether = 0;
2266
2267 if (patternPrefix) {
2268 ASSERT(!isLineTerminator(patternPrefix));
2269 ASSERT(patternPrefix != '/');
2270 ASSERT(patternPrefix != '[');
2271 record16(patternPrefix);
2272 }
2273
2274 while (true) {
2275 if (isLineTerminator(m_current) || atEnd()) {
2276 m_buffer16.shrink(0);
2277 return false;
2278 }
2279
2280 T prev = m_current;
2281
2282 shift();
2283
2284 if (prev == '/' && !lastWasEscape && !inBrackets)
2285 break;
2286
2287 record16(prev);
2288 orCharacter<T>(charactersOredTogether, prev);
2289
2290 if (lastWasEscape) {
2291 lastWasEscape = false;
2292 continue;
2293 }
2294
2295 switch (prev) {
2296 case '[':
2297 inBrackets = true;
2298 break;
2299 case ']':
2300 inBrackets = false;
2301 break;
2302 case '\\':
2303 lastWasEscape = true;
2304 break;
2305 }
2306 }
2307
2308 pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2309
2310 m_buffer16.shrink(0);
2311 charactersOredTogether = 0;
2312
2313 while (isIdentPart(m_current)) {
2314 record16(m_current);
2315 orCharacter<T>(charactersOredTogether, m_current);
2316 shift();
2317 }
2318
2319 flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2320 m_buffer16.shrink(0);
2321
2322 return true;
2323 }
2324
2325 template <typename T>
2326 bool Lexer<T>::skipRegExp()
2327 {
2328 bool lastWasEscape = false;
2329 bool inBrackets = false;
2330
2331 while (true) {
2332 if (isLineTerminator(m_current) || atEnd())
2333 return false;
2334
2335 T prev = m_current;
2336
2337 shift();
2338
2339 if (prev == '/' && !lastWasEscape && !inBrackets)
2340 break;
2341
2342 if (lastWasEscape) {
2343 lastWasEscape = false;
2344 continue;
2345 }
2346
2347 switch (prev) {
2348 case '[':
2349 inBrackets = true;
2350 break;
2351 case ']':
2352 inBrackets = false;
2353 break;
2354 case '\\':
2355 lastWasEscape = true;
2356 break;
2357 }
2358 }
2359
2360 while (isIdentPart(m_current))
2361 shift();
2362
2363 return true;
2364 }
2365
2366 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2367 template <typename T>
2368 JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2369 {
2370 JSTokenData* tokenData = &tokenRecord->m_data;
2371 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
2372 ASSERT(!m_error);
2373 ASSERT(m_buffer16.isEmpty());
2374
2375 // Leading closing brace } is already shifted in the previous token scan.
2376 // So in this re-scan phase, shift() is not needed here.
2377 StringParseResult result = parseTemplateLiteral<true>(tokenData, rawStringsBuildMode);
2378 JSTokenType token = ERRORTOK;
2379 if (UNLIKELY(result != StringParsedSuccessfully)) {
2380 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2381 m_error = true;
2382 } else {
2383 token = TEMPLATE;
2384 m_lastToken = token;
2385 }
2386
2387 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2388 m_atLineStart = false;
2389
2390 // Adjust current tokenLocation data for TemplateString.
2391 tokenLocation->line = m_lineNumber;
2392 tokenLocation->endOffset = currentOffset();
2393 tokenLocation->lineStartOffset = currentLineStartOffset();
2394 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2395 tokenRecord->m_endPosition = currentPosition();
2396 return token;
2397 }
2398 #endif
2399
2400 template <typename T>
2401 void Lexer<T>::clear()
2402 {
2403 m_arena = 0;
2404
2405 Vector<LChar> newBuffer8;
2406 m_buffer8.swap(newBuffer8);
2407
2408 Vector<UChar> newBuffer16;
2409 m_buffer16.swap(newBuffer16);
2410
2411 Vector<UChar> newBufferForRawTemplateString16;
2412 m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2413
2414 m_isReparsing = false;
2415 }
2416
2417 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2418 template class Lexer<LChar>;
2419 template class Lexer<UChar>;
2420
2421 } // namespace JSC