]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.cpp
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
93a37866 3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
9dae56ea 4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
14957cd0 5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6fe7ccc8 6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
9dae56ea
A
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "Lexer.h"
27
81345200 28#include "JSFunctionInlines.h"
14957cd0 29
81345200 30#include "BuiltinNames.h"
9dae56ea 31#include "JSGlobalObjectFunctions.h"
14957cd0 32#include "Identifier.h"
9dae56ea 33#include "Nodes.h"
81345200 34#include "JSCInlines.h"
6fe7ccc8 35#include <wtf/dtoa.h>
9dae56ea
A
36#include <ctype.h>
37#include <limits.h>
38#include <string.h>
9dae56ea 39#include <wtf/Assertions.h>
9dae56ea 40
14957cd0 41#include "KeywordLookup.h"
9dae56ea 42#include "Lexer.lut.h"
6fe7ccc8 43#include "Parser.h"
9dae56ea 44
9dae56ea
A
45namespace JSC {
46
81345200 47Keywords::Keywords(VM& vm)
93a37866 48 : m_vm(vm)
6fe7ccc8
A
49 , m_keywordTable(JSC::mainTable)
50{
51}
14957cd0
A
52
53enum CharacterType {
54 // Types for the main switch
55
56 // The first three types are fixed, and also used for identifying
57 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
58 CharacterIdentifierStart,
59 CharacterZero,
60 CharacterNumber,
61
62 CharacterInvalid,
63 CharacterLineTerminator,
64 CharacterExclamationMark,
65 CharacterOpenParen,
66 CharacterCloseParen,
67 CharacterOpenBracket,
68 CharacterCloseBracket,
69 CharacterComma,
70 CharacterColon,
71 CharacterQuestion,
72 CharacterTilde,
73 CharacterQuote,
ed1e77d3 74 CharacterBackQuote,
14957cd0
A
75 CharacterDot,
76 CharacterSlash,
77 CharacterBackSlash,
78 CharacterSemicolon,
79 CharacterOpenBrace,
80 CharacterCloseBrace,
81
82 CharacterAdd,
83 CharacterSub,
84 CharacterMultiply,
85 CharacterModulo,
86 CharacterAnd,
87 CharacterXor,
88 CharacterOr,
89 CharacterLess,
90 CharacterGreater,
91 CharacterEqual,
92
93 // Other types (only one so far)
94 CharacterWhiteSpace,
81345200 95 CharacterPrivateIdentifierStart
14957cd0
A
96};
97
6fe7ccc8
A
98// 256 Latin-1 codes
99static const unsigned short typesOfLatin1Characters[256] = {
14957cd0
A
100/* 0 - Null */ CharacterInvalid,
101/* 1 - Start of Heading */ CharacterInvalid,
102/* 2 - Start of Text */ CharacterInvalid,
103/* 3 - End of Text */ CharacterInvalid,
104/* 4 - End of Transm. */ CharacterInvalid,
105/* 5 - Enquiry */ CharacterInvalid,
106/* 6 - Acknowledgment */ CharacterInvalid,
107/* 7 - Bell */ CharacterInvalid,
108/* 8 - Back Space */ CharacterInvalid,
109/* 9 - Horizontal Tab */ CharacterWhiteSpace,
110/* 10 - Line Feed */ CharacterLineTerminator,
111/* 11 - Vertical Tab */ CharacterWhiteSpace,
112/* 12 - Form Feed */ CharacterWhiteSpace,
113/* 13 - Carriage Return */ CharacterLineTerminator,
114/* 14 - Shift Out */ CharacterInvalid,
115/* 15 - Shift In */ CharacterInvalid,
116/* 16 - Data Line Escape */ CharacterInvalid,
117/* 17 - Device Control 1 */ CharacterInvalid,
118/* 18 - Device Control 2 */ CharacterInvalid,
119/* 19 - Device Control 3 */ CharacterInvalid,
120/* 20 - Device Control 4 */ CharacterInvalid,
121/* 21 - Negative Ack. */ CharacterInvalid,
122/* 22 - Synchronous Idle */ CharacterInvalid,
123/* 23 - End of Transmit */ CharacterInvalid,
124/* 24 - Cancel */ CharacterInvalid,
125/* 25 - End of Medium */ CharacterInvalid,
126/* 26 - Substitute */ CharacterInvalid,
127/* 27 - Escape */ CharacterInvalid,
128/* 28 - File Separator */ CharacterInvalid,
129/* 29 - Group Separator */ CharacterInvalid,
130/* 30 - Record Separator */ CharacterInvalid,
131/* 31 - Unit Separator */ CharacterInvalid,
132/* 32 - Space */ CharacterWhiteSpace,
133/* 33 - ! */ CharacterExclamationMark,
134/* 34 - " */ CharacterQuote,
135/* 35 - # */ CharacterInvalid,
136/* 36 - $ */ CharacterIdentifierStart,
137/* 37 - % */ CharacterModulo,
138/* 38 - & */ CharacterAnd,
139/* 39 - ' */ CharacterQuote,
140/* 40 - ( */ CharacterOpenParen,
141/* 41 - ) */ CharacterCloseParen,
142/* 42 - * */ CharacterMultiply,
143/* 43 - + */ CharacterAdd,
144/* 44 - , */ CharacterComma,
145/* 45 - - */ CharacterSub,
146/* 46 - . */ CharacterDot,
147/* 47 - / */ CharacterSlash,
148/* 48 - 0 */ CharacterZero,
149/* 49 - 1 */ CharacterNumber,
150/* 50 - 2 */ CharacterNumber,
151/* 51 - 3 */ CharacterNumber,
152/* 52 - 4 */ CharacterNumber,
153/* 53 - 5 */ CharacterNumber,
154/* 54 - 6 */ CharacterNumber,
155/* 55 - 7 */ CharacterNumber,
156/* 56 - 8 */ CharacterNumber,
157/* 57 - 9 */ CharacterNumber,
158/* 58 - : */ CharacterColon,
159/* 59 - ; */ CharacterSemicolon,
160/* 60 - < */ CharacterLess,
161/* 61 - = */ CharacterEqual,
162/* 62 - > */ CharacterGreater,
163/* 63 - ? */ CharacterQuestion,
81345200 164/* 64 - @ */ CharacterPrivateIdentifierStart,
14957cd0
A
165/* 65 - A */ CharacterIdentifierStart,
166/* 66 - B */ CharacterIdentifierStart,
167/* 67 - C */ CharacterIdentifierStart,
168/* 68 - D */ CharacterIdentifierStart,
169/* 69 - E */ CharacterIdentifierStart,
170/* 70 - F */ CharacterIdentifierStart,
171/* 71 - G */ CharacterIdentifierStart,
172/* 72 - H */ CharacterIdentifierStart,
173/* 73 - I */ CharacterIdentifierStart,
174/* 74 - J */ CharacterIdentifierStart,
175/* 75 - K */ CharacterIdentifierStart,
176/* 76 - L */ CharacterIdentifierStart,
177/* 77 - M */ CharacterIdentifierStart,
178/* 78 - N */ CharacterIdentifierStart,
179/* 79 - O */ CharacterIdentifierStart,
180/* 80 - P */ CharacterIdentifierStart,
181/* 81 - Q */ CharacterIdentifierStart,
182/* 82 - R */ CharacterIdentifierStart,
183/* 83 - S */ CharacterIdentifierStart,
184/* 84 - T */ CharacterIdentifierStart,
185/* 85 - U */ CharacterIdentifierStart,
186/* 86 - V */ CharacterIdentifierStart,
187/* 87 - W */ CharacterIdentifierStart,
188/* 88 - X */ CharacterIdentifierStart,
189/* 89 - Y */ CharacterIdentifierStart,
190/* 90 - Z */ CharacterIdentifierStart,
191/* 91 - [ */ CharacterOpenBracket,
192/* 92 - \ */ CharacterBackSlash,
193/* 93 - ] */ CharacterCloseBracket,
194/* 94 - ^ */ CharacterXor,
195/* 95 - _ */ CharacterIdentifierStart,
ed1e77d3
A
196#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
197/* 96 - ` */ CharacterBackQuote,
198#else
14957cd0 199/* 96 - ` */ CharacterInvalid,
ed1e77d3 200#endif
14957cd0
A
201/* 97 - a */ CharacterIdentifierStart,
202/* 98 - b */ CharacterIdentifierStart,
203/* 99 - c */ CharacterIdentifierStart,
204/* 100 - d */ CharacterIdentifierStart,
205/* 101 - e */ CharacterIdentifierStart,
206/* 102 - f */ CharacterIdentifierStart,
207/* 103 - g */ CharacterIdentifierStart,
208/* 104 - h */ CharacterIdentifierStart,
209/* 105 - i */ CharacterIdentifierStart,
210/* 106 - j */ CharacterIdentifierStart,
211/* 107 - k */ CharacterIdentifierStart,
212/* 108 - l */ CharacterIdentifierStart,
213/* 109 - m */ CharacterIdentifierStart,
214/* 110 - n */ CharacterIdentifierStart,
215/* 111 - o */ CharacterIdentifierStart,
216/* 112 - p */ CharacterIdentifierStart,
217/* 113 - q */ CharacterIdentifierStart,
218/* 114 - r */ CharacterIdentifierStart,
219/* 115 - s */ CharacterIdentifierStart,
220/* 116 - t */ CharacterIdentifierStart,
221/* 117 - u */ CharacterIdentifierStart,
222/* 118 - v */ CharacterIdentifierStart,
223/* 119 - w */ CharacterIdentifierStart,
224/* 120 - x */ CharacterIdentifierStart,
225/* 121 - y */ CharacterIdentifierStart,
226/* 122 - z */ CharacterIdentifierStart,
227/* 123 - { */ CharacterOpenBrace,
228/* 124 - | */ CharacterOr,
229/* 125 - } */ CharacterCloseBrace,
230/* 126 - ~ */ CharacterTilde,
231/* 127 - Delete */ CharacterInvalid,
6fe7ccc8
A
232/* 128 - Cc category */ CharacterInvalid,
233/* 129 - Cc category */ CharacterInvalid,
234/* 130 - Cc category */ CharacterInvalid,
235/* 131 - Cc category */ CharacterInvalid,
236/* 132 - Cc category */ CharacterInvalid,
237/* 133 - Cc category */ CharacterInvalid,
238/* 134 - Cc category */ CharacterInvalid,
239/* 135 - Cc category */ CharacterInvalid,
240/* 136 - Cc category */ CharacterInvalid,
241/* 137 - Cc category */ CharacterInvalid,
242/* 138 - Cc category */ CharacterInvalid,
243/* 139 - Cc category */ CharacterInvalid,
244/* 140 - Cc category */ CharacterInvalid,
245/* 141 - Cc category */ CharacterInvalid,
246/* 142 - Cc category */ CharacterInvalid,
247/* 143 - Cc category */ CharacterInvalid,
248/* 144 - Cc category */ CharacterInvalid,
249/* 145 - Cc category */ CharacterInvalid,
250/* 146 - Cc category */ CharacterInvalid,
251/* 147 - Cc category */ CharacterInvalid,
252/* 148 - Cc category */ CharacterInvalid,
253/* 149 - Cc category */ CharacterInvalid,
254/* 150 - Cc category */ CharacterInvalid,
255/* 151 - Cc category */ CharacterInvalid,
256/* 152 - Cc category */ CharacterInvalid,
257/* 153 - Cc category */ CharacterInvalid,
258/* 154 - Cc category */ CharacterInvalid,
259/* 155 - Cc category */ CharacterInvalid,
260/* 156 - Cc category */ CharacterInvalid,
261/* 157 - Cc category */ CharacterInvalid,
262/* 158 - Cc category */ CharacterInvalid,
263/* 159 - Cc category */ CharacterInvalid,
264/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
265/* 161 - Po category */ CharacterInvalid,
266/* 162 - Sc category */ CharacterInvalid,
267/* 163 - Sc category */ CharacterInvalid,
268/* 164 - Sc category */ CharacterInvalid,
269/* 165 - Sc category */ CharacterInvalid,
270/* 166 - So category */ CharacterInvalid,
271/* 167 - So category */ CharacterInvalid,
272/* 168 - Sk category */ CharacterInvalid,
273/* 169 - So category */ CharacterInvalid,
274/* 170 - Ll category */ CharacterIdentifierStart,
275/* 171 - Pi category */ CharacterInvalid,
276/* 172 - Sm category */ CharacterInvalid,
277/* 173 - Cf category */ CharacterInvalid,
278/* 174 - So category */ CharacterInvalid,
279/* 175 - Sk category */ CharacterInvalid,
280/* 176 - So category */ CharacterInvalid,
281/* 177 - Sm category */ CharacterInvalid,
282/* 178 - No category */ CharacterInvalid,
283/* 179 - No category */ CharacterInvalid,
284/* 180 - Sk category */ CharacterInvalid,
285/* 181 - Ll category */ CharacterIdentifierStart,
286/* 182 - So category */ CharacterInvalid,
287/* 183 - Po category */ CharacterInvalid,
288/* 184 - Sk category */ CharacterInvalid,
289/* 185 - No category */ CharacterInvalid,
290/* 186 - Ll category */ CharacterIdentifierStart,
291/* 187 - Pf category */ CharacterInvalid,
292/* 188 - No category */ CharacterInvalid,
293/* 189 - No category */ CharacterInvalid,
294/* 190 - No category */ CharacterInvalid,
295/* 191 - Po category */ CharacterInvalid,
296/* 192 - Lu category */ CharacterIdentifierStart,
297/* 193 - Lu category */ CharacterIdentifierStart,
298/* 194 - Lu category */ CharacterIdentifierStart,
299/* 195 - Lu category */ CharacterIdentifierStart,
300/* 196 - Lu category */ CharacterIdentifierStart,
301/* 197 - Lu category */ CharacterIdentifierStart,
302/* 198 - Lu category */ CharacterIdentifierStart,
303/* 199 - Lu category */ CharacterIdentifierStart,
304/* 200 - Lu category */ CharacterIdentifierStart,
305/* 201 - Lu category */ CharacterIdentifierStart,
306/* 202 - Lu category */ CharacterIdentifierStart,
307/* 203 - Lu category */ CharacterIdentifierStart,
308/* 204 - Lu category */ CharacterIdentifierStart,
309/* 205 - Lu category */ CharacterIdentifierStart,
310/* 206 - Lu category */ CharacterIdentifierStart,
311/* 207 - Lu category */ CharacterIdentifierStart,
312/* 208 - Lu category */ CharacterIdentifierStart,
313/* 209 - Lu category */ CharacterIdentifierStart,
314/* 210 - Lu category */ CharacterIdentifierStart,
315/* 211 - Lu category */ CharacterIdentifierStart,
316/* 212 - Lu category */ CharacterIdentifierStart,
317/* 213 - Lu category */ CharacterIdentifierStart,
318/* 214 - Lu category */ CharacterIdentifierStart,
319/* 215 - Sm category */ CharacterInvalid,
320/* 216 - Lu category */ CharacterIdentifierStart,
321/* 217 - Lu category */ CharacterIdentifierStart,
322/* 218 - Lu category */ CharacterIdentifierStart,
323/* 219 - Lu category */ CharacterIdentifierStart,
324/* 220 - Lu category */ CharacterIdentifierStart,
325/* 221 - Lu category */ CharacterIdentifierStart,
326/* 222 - Lu category */ CharacterIdentifierStart,
327/* 223 - Ll category */ CharacterIdentifierStart,
328/* 224 - Ll category */ CharacterIdentifierStart,
329/* 225 - Ll category */ CharacterIdentifierStart,
330/* 226 - Ll category */ CharacterIdentifierStart,
331/* 227 - Ll category */ CharacterIdentifierStart,
332/* 228 - Ll category */ CharacterIdentifierStart,
333/* 229 - Ll category */ CharacterIdentifierStart,
334/* 230 - Ll category */ CharacterIdentifierStart,
335/* 231 - Ll category */ CharacterIdentifierStart,
336/* 232 - Ll category */ CharacterIdentifierStart,
337/* 233 - Ll category */ CharacterIdentifierStart,
338/* 234 - Ll category */ CharacterIdentifierStart,
339/* 235 - Ll category */ CharacterIdentifierStart,
340/* 236 - Ll category */ CharacterIdentifierStart,
341/* 237 - Ll category */ CharacterIdentifierStart,
342/* 238 - Ll category */ CharacterIdentifierStart,
343/* 239 - Ll category */ CharacterIdentifierStart,
344/* 240 - Ll category */ CharacterIdentifierStart,
345/* 241 - Ll category */ CharacterIdentifierStart,
346/* 242 - Ll category */ CharacterIdentifierStart,
347/* 243 - Ll category */ CharacterIdentifierStart,
348/* 244 - Ll category */ CharacterIdentifierStart,
349/* 245 - Ll category */ CharacterIdentifierStart,
350/* 246 - Ll category */ CharacterIdentifierStart,
351/* 247 - Sm category */ CharacterInvalid,
352/* 248 - Ll category */ CharacterIdentifierStart,
353/* 249 - Ll category */ CharacterIdentifierStart,
354/* 250 - Ll category */ CharacterIdentifierStart,
355/* 251 - Ll category */ CharacterIdentifierStart,
356/* 252 - Ll category */ CharacterIdentifierStart,
357/* 253 - Ll category */ CharacterIdentifierStart,
358/* 254 - Ll category */ CharacterIdentifierStart,
359/* 255 - Ll category */ CharacterIdentifierStart
14957cd0 360};
9dae56ea 361
93a37866
A
362// This table provides the character that results from \X where X is the index in the table beginning
363// with SPACE. A table value of 0 means that more processing needs to be done.
364static const LChar singleCharacterEscapeValuesForASCII[128] = {
365/* 0 - Null */ 0,
366/* 1 - Start of Heading */ 0,
367/* 2 - Start of Text */ 0,
368/* 3 - End of Text */ 0,
369/* 4 - End of Transm. */ 0,
370/* 5 - Enquiry */ 0,
371/* 6 - Acknowledgment */ 0,
372/* 7 - Bell */ 0,
373/* 8 - Back Space */ 0,
374/* 9 - Horizontal Tab */ 0,
375/* 10 - Line Feed */ 0,
376/* 11 - Vertical Tab */ 0,
377/* 12 - Form Feed */ 0,
378/* 13 - Carriage Return */ 0,
379/* 14 - Shift Out */ 0,
380/* 15 - Shift In */ 0,
381/* 16 - Data Line Escape */ 0,
382/* 17 - Device Control 1 */ 0,
383/* 18 - Device Control 2 */ 0,
384/* 19 - Device Control 3 */ 0,
385/* 20 - Device Control 4 */ 0,
386/* 21 - Negative Ack. */ 0,
387/* 22 - Synchronous Idle */ 0,
388/* 23 - End of Transmit */ 0,
389/* 24 - Cancel */ 0,
390/* 25 - End of Medium */ 0,
391/* 26 - Substitute */ 0,
392/* 27 - Escape */ 0,
393/* 28 - File Separator */ 0,
394/* 29 - Group Separator */ 0,
395/* 30 - Record Separator */ 0,
396/* 31 - Unit Separator */ 0,
397/* 32 - Space */ ' ',
398/* 33 - ! */ '!',
399/* 34 - " */ '"',
400/* 35 - # */ '#',
401/* 36 - $ */ '$',
402/* 37 - % */ '%',
403/* 38 - & */ '&',
404/* 39 - ' */ '\'',
405/* 40 - ( */ '(',
406/* 41 - ) */ ')',
407/* 42 - * */ '*',
408/* 43 - + */ '+',
409/* 44 - , */ ',',
410/* 45 - - */ '-',
411/* 46 - . */ '.',
412/* 47 - / */ '/',
413/* 48 - 0 */ 0,
414/* 49 - 1 */ 0,
415/* 50 - 2 */ 0,
416/* 51 - 3 */ 0,
417/* 52 - 4 */ 0,
418/* 53 - 5 */ 0,
419/* 54 - 6 */ 0,
420/* 55 - 7 */ 0,
421/* 56 - 8 */ 0,
422/* 57 - 9 */ 0,
423/* 58 - : */ ':',
424/* 59 - ; */ ';',
425/* 60 - < */ '<',
426/* 61 - = */ '=',
427/* 62 - > */ '>',
428/* 63 - ? */ '?',
429/* 64 - @ */ '@',
430/* 65 - A */ 'A',
431/* 66 - B */ 'B',
432/* 67 - C */ 'C',
433/* 68 - D */ 'D',
434/* 69 - E */ 'E',
435/* 70 - F */ 'F',
436/* 71 - G */ 'G',
437/* 72 - H */ 'H',
438/* 73 - I */ 'I',
439/* 74 - J */ 'J',
440/* 75 - K */ 'K',
441/* 76 - L */ 'L',
442/* 77 - M */ 'M',
443/* 78 - N */ 'N',
444/* 79 - O */ 'O',
445/* 80 - P */ 'P',
446/* 81 - Q */ 'Q',
447/* 82 - R */ 'R',
448/* 83 - S */ 'S',
449/* 84 - T */ 'T',
450/* 85 - U */ 'U',
451/* 86 - V */ 'V',
452/* 87 - W */ 'W',
453/* 88 - X */ 'X',
454/* 89 - Y */ 'Y',
455/* 90 - Z */ 'Z',
456/* 91 - [ */ '[',
457/* 92 - \ */ '\\',
458/* 93 - ] */ ']',
459/* 94 - ^ */ '^',
460/* 95 - _ */ '_',
461/* 96 - ` */ '`',
462/* 97 - a */ 'a',
463/* 98 - b */ 0x08,
464/* 99 - c */ 'c',
465/* 100 - d */ 'd',
466/* 101 - e */ 'e',
467/* 102 - f */ 0x0C,
468/* 103 - g */ 'g',
469/* 104 - h */ 'h',
470/* 105 - i */ 'i',
471/* 106 - j */ 'j',
472/* 107 - k */ 'k',
473/* 108 - l */ 'l',
474/* 109 - m */ 'm',
475/* 110 - n */ 0x0A,
476/* 111 - o */ 'o',
477/* 112 - p */ 'p',
478/* 113 - q */ 'q',
479/* 114 - r */ 0x0D,
480/* 115 - s */ 's',
481/* 116 - t */ 0x09,
482/* 117 - u */ 0,
483/* 118 - v */ 0x0B,
484/* 119 - w */ 'w',
485/* 120 - x */ 0,
486/* 121 - y */ 'y',
487/* 122 - z */ 'z',
488/* 123 - { */ '{',
489/* 124 - | */ '|',
490/* 125 - } */ '}',
491/* 126 - ~ */ '~',
492/* 127 - Delete */ 0
493};
494
6fe7ccc8 495template <typename T>
ed1e77d3 496Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode)
ba379fdc 497 : m_isReparsing(false)
93a37866 498 , m_vm(vm)
ed1e77d3 499 , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
9dae56ea 500{
9dae56ea
A
501}
502
ed1e77d3
A
503static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
504{
505 if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
506 return INTEGER;
507 return DOUBLE;
508}
509
6fe7ccc8
A
510template <typename T>
511Lexer<T>::~Lexer()
9dae56ea 512{
ba379fdc
A
513}
514
6fe7ccc8 515template <typename T>
93a37866 516String Lexer<T>::invalidCharacterMessage() const
ba379fdc 517{
6fe7ccc8
A
518 switch (m_current) {
519 case 0:
ed1e77d3 520 return ASCIILiteral("Invalid character: '\\0'");
6fe7ccc8 521 case 10:
ed1e77d3 522 return ASCIILiteral("Invalid character: '\\n'");
6fe7ccc8 523 case 11:
ed1e77d3 524 return ASCIILiteral("Invalid character: '\\v'");
6fe7ccc8 525 case 13:
ed1e77d3 526 return ASCIILiteral("Invalid character: '\\r'");
6fe7ccc8 527 case 35:
ed1e77d3 528 return ASCIILiteral("Invalid character: '#'");
6fe7ccc8 529 case 64:
ed1e77d3 530 return ASCIILiteral("Invalid character: '@'");
6fe7ccc8 531 case 96:
ed1e77d3 532 return ASCIILiteral("Invalid character: '`'");
6fe7ccc8 533 default:
ed1e77d3 534 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current));
6fe7ccc8 535 }
ba379fdc
A
536}
537
6fe7ccc8 538template <typename T>
93a37866 539ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
ba379fdc 540{
6fe7ccc8
A
541 ASSERT(m_code <= m_codeEnd);
542 return m_code;
ba379fdc
A
543}
544
6fe7ccc8
A
545template <typename T>
546void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
9dae56ea 547{
6fe7ccc8
A
548 m_arena = &arena->identifierArena();
549
ba379fdc 550 m_lineNumber = source.firstLine();
9dae56ea 551 m_lastToken = -1;
6fe7ccc8 552
93a37866 553 const String& sourceString = source.provider()->source();
9dae56ea 554
93a37866
A
555 if (!sourceString.isNull())
556 setCodeStart(sourceString.impl());
6fe7ccc8
A
557 else
558 m_codeStart = 0;
ba379fdc 559
9dae56ea 560 m_source = &source;
93a37866
A
561 m_sourceOffset = source.startOffset();
562 m_codeStartPlusOffset = m_codeStart + source.startOffset();
563 m_code = m_codeStartPlusOffset;
6fe7ccc8 564 m_codeEnd = m_codeStart + source.endOffset();
9dae56ea
A
565 m_error = false;
566 m_atLineStart = true;
93a37866
A
567 m_lineStart = m_code;
568 m_lexErrorMessage = String();
6fe7ccc8 569
4e4e5a6f
A
570 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
571 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
ed1e77d3 572 m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
6fe7ccc8 573
14957cd0
A
574 if (LIKELY(m_code < m_codeEnd))
575 m_current = *m_code;
576 else
6fe7ccc8 577 m_current = 0;
ba379fdc 578 ASSERT(currentOffset() == source.startOffset());
9dae56ea
A
579}
580
6fe7ccc8
A
581template <typename T>
582template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
9dae56ea 583{
6fe7ccc8 584 m_code += shiftAmount;
93a37866 585 ASSERT(currentOffset() >= currentLineStartOffset());
6fe7ccc8
A
586 m_current = *m_code;
587}
588
589template <typename T>
590ALWAYS_INLINE void Lexer<T>::shift()
591{
592 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
593 m_current = 0;
594 ++m_code;
595 if (LIKELY(m_code < m_codeEnd))
14957cd0 596 m_current = *m_code;
14957cd0
A
597}
598
6fe7ccc8
A
599template <typename T>
600ALWAYS_INLINE bool Lexer<T>::atEnd() const
14957cd0 601{
6fe7ccc8
A
602 ASSERT(!m_current || m_code < m_codeEnd);
603 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
14957cd0
A
604}
605
6fe7ccc8
A
606template <typename T>
607ALWAYS_INLINE T Lexer<T>::peek(int offset) const
14957cd0 608{
14957cd0 609 ASSERT(offset > 0 && offset < 5);
6fe7ccc8
A
610 const T* code = m_code + offset;
611 return (code < m_codeEnd) ? *code : 0;
14957cd0
A
612}
613
ed1e77d3
A
614struct ParsedUnicodeEscapeValue {
615 ParsedUnicodeEscapeValue(UChar32 value)
616 : m_value(value)
617 {
618 ASSERT(isValid());
619 }
620
621 enum SpecialValueType { Incomplete = -2, Invalid = -1 };
622 ParsedUnicodeEscapeValue(SpecialValueType type)
623 : m_value(type)
624 {
625 }
626
627 bool isValid() const { return m_value >= 0; }
628 bool isIncomplete() const { return m_value == Incomplete; }
14957cd0 629
ed1e77d3
A
630 UChar32 value() const
631 {
632 ASSERT(isValid());
633 return m_value;
634 }
635
636private:
637 UChar32 m_value;
638};
639
640template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
641{
642 if (m_current == '{') {
643 shift();
644 UChar32 codePoint = 0;
645 do {
646 if (!isASCIIHexDigit(m_current))
647 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
648 codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
649 if (codePoint > UCHAR_MAX_VALUE)
650 return ParsedUnicodeEscapeValue::Invalid;
651 shift();
652 } while (m_current != '}');
653 shift();
654 return codePoint;
655 }
14957cd0 656
ed1e77d3
A
657 auto character2 = peek(1);
658 auto character3 = peek(2);
659 auto character4 = peek(3);
660 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4)))
661 return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
662 auto result = convertUnicode(m_current, character2, character3, character4);
14957cd0
A
663 shift();
664 shift();
665 shift();
666 shift();
ed1e77d3 667 return result;
ba379fdc
A
668}
669
6fe7ccc8
A
670template <typename T>
671void Lexer<T>::shiftLineTerminator()
ba379fdc
A
672{
673 ASSERT(isLineTerminator(m_current));
674
81345200 675 m_positionBeforeLastNewline = currentPosition();
6fe7ccc8 676 T prev = m_current;
14957cd0
A
677 shift();
678
ba379fdc 679 // Allow both CRLF and LFCR.
6fe7ccc8 680 if (prev + m_current == '\n' + '\r')
14957cd0 681 shift();
ba379fdc
A
682
683 ++m_lineNumber;
684}
685
6fe7ccc8
A
686template <typename T>
687ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
ba379fdc
A
688{
689 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
690}
691
81345200 692static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
ba379fdc 693{
81345200 694 return U_GET_GC_MASK(c) & U_GC_L_MASK;
ba379fdc
A
695}
696
6fe7ccc8 697static ALWAYS_INLINE bool isLatin1(LChar)
ba379fdc 698{
6fe7ccc8 699 return true;
ba379fdc
A
700}
701
6fe7ccc8 702static ALWAYS_INLINE bool isLatin1(UChar c)
ba379fdc 703{
6fe7ccc8 704 return c < 256;
ba379fdc
A
705}
706
ed1e77d3
A
707static ALWAYS_INLINE bool isLatin1(UChar32 c)
708{
709 return !(c & ~0xFF);
710}
711
6fe7ccc8
A
712static inline bool isIdentStart(LChar c)
713{
714 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
715}
716
ed1e77d3 717static inline bool isIdentStart(UChar32 c)
6fe7ccc8
A
718{
719 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
720}
721
ed1e77d3 722static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
6fe7ccc8 723{
ed1e77d3 724 // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead.
81345200 725 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
6fe7ccc8
A
726}
727
728static ALWAYS_INLINE bool isIdentPart(LChar c)
ba379fdc 729{
14957cd0
A
730 // Character types are divided into two groups depending on whether they can be part of an
731 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
732 // part of an identifier. (See the CharacterType definition for more details.)
6fe7ccc8
A
733 return typesOfLatin1Characters[c] <= CharacterNumber;
734}
735
ed1e77d3 736static ALWAYS_INLINE bool isIdentPart(UChar32 c)
6fe7ccc8
A
737{
738 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
ba379fdc
A
739}
740
ed1e77d3
A
741static ALWAYS_INLINE bool isIdentPart(UChar c)
742{
743 return isIdentPart(static_cast<UChar32>(c));
744}
745
746template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
747{
748 if (isIdentPart(code[0]))
749 return true;
750
751 // Shortest sequence handled below is \u{0}, which is 5 characters.
752 if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
753 return false;
754
755 if (code[2] == '{') {
756 UChar32 codePoint = 0;
757 const CharacterType* pointer;
758 for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
759 auto digit = *pointer;
760 if (!isASCIIHexDigit(digit))
761 break;
762 codePoint = (codePoint << 4) | toASCIIHexValue(digit);
763 if (codePoint > UCHAR_MAX_VALUE)
764 return false;
765 }
766 return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
767 }
768
769 // Shortest sequence handled below is \uXXXX, which is 6 characters.
770 if (codeEnd - code < 6)
771 return false;
772
773 auto character1 = code[2];
774 auto character2 = code[3];
775 auto character3 = code[4];
776 auto character4 = code[5];
777 return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
778 && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
779}
780
781static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
782{
783 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
784}
785
786static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
787{
788 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
789}
790
93a37866 791static inline LChar singleEscape(int c)
ba379fdc 792{
93a37866
A
793 if (c < 128) {
794 ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
795 return singleCharacterEscapeValuesForASCII[c];
9dae56ea 796 }
93a37866 797 return 0;
9dae56ea
A
798}
799
6fe7ccc8
A
800template <typename T>
801inline void Lexer<T>::record8(int c)
9dae56ea 802{
ba379fdc
A
803 ASSERT(c >= 0);
804 ASSERT(c <= 0xFF);
6fe7ccc8
A
805 m_buffer8.append(static_cast<LChar>(c));
806}
807
808template <typename T>
809inline void assertCharIsIn8BitRange(T c)
810{
811 UNUSED_PARAM(c);
812 ASSERT(c >= 0);
813 ASSERT(c <= 0xFF);
814}
815
816template <>
817inline void assertCharIsIn8BitRange(UChar c)
818{
819 UNUSED_PARAM(c);
820 ASSERT(c <= 0xFF);
821}
822
823template <>
824inline void assertCharIsIn8BitRange(LChar)
825{
826}
827
828template <typename T>
829inline void Lexer<T>::append8(const T* p, size_t length)
830{
831 size_t currentSize = m_buffer8.size();
832 m_buffer8.grow(currentSize + length);
833 LChar* rawBuffer = m_buffer8.data() + currentSize;
834
835 for (size_t i = 0; i < length; i++) {
836 T c = p[i];
837 assertCharIsIn8BitRange(c);
838 rawBuffer[i] = c;
839 }
9dae56ea
A
840}
841
6fe7ccc8
A
842template <typename T>
843inline void Lexer<T>::append16(const LChar* p, size_t length)
844{
845 size_t currentSize = m_buffer16.size();
846 m_buffer16.grow(currentSize + length);
847 UChar* rawBuffer = m_buffer16.data() + currentSize;
848
849 for (size_t i = 0; i < length; i++)
850 rawBuffer[i] = p[i];
851}
852
853template <typename T>
854inline void Lexer<T>::record16(T c)
9dae56ea 855{
ba379fdc
A
856 m_buffer16.append(c);
857}
858
6fe7ccc8
A
859template <typename T>
860inline void Lexer<T>::record16(int c)
ba379fdc
A
861{
862 ASSERT(c >= 0);
6fe7ccc8
A
863 ASSERT(c <= static_cast<int>(USHRT_MAX));
864 m_buffer16.append(static_cast<UChar>(c));
9dae56ea 865}
81345200 866
ed1e77d3
A
867template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
868{
869 ASSERT(codePoint >= 0);
870 ASSERT(codePoint <= UCHAR_MAX_VALUE);
871 if (U_IS_BMP(codePoint))
872 record16(codePoint);
873 else {
874 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
875 append16(codeUnits, 2);
876 }
877}
878
81345200
A
879#if !ASSERT_DISABLED
880bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
881{
882 if (!ident)
883 return true;
884 /* Just block any use of suspicious identifiers. This is intended to
885 * be used as a safety net while implementing builtins.
886 */
ed1e77d3 887 // FIXME: How can a debug-only assertion be a safety net?
81345200
A
888 if (*ident == vm.propertyNames->builtinNames().callPublicName())
889 return false;
890 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
891 return false;
892 if (*ident == vm.propertyNames->eval)
893 return false;
894 if (*ident == vm.propertyNames->Function)
895 return false;
896 return true;
897}
898#endif
899
6fe7ccc8
A
900template <>
901template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
14957cd0
A
902{
903 const ptrdiff_t remaining = m_codeEnd - m_code;
6fe7ccc8 904 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
14957cd0
A
905 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
906 if (keyword != IDENT) {
907 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
6fe7ccc8 908 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
14957cd0
A
909 }
910 }
81345200
A
911
912 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
913 if (isPrivateName)
914 shift();
915
93a37866
A
916 const LChar* identifierStart = currentSourcePtr();
917 unsigned identifierLineStart = currentLineStartOffset();
6fe7ccc8
A
918
919 while (isIdentPart(m_current))
920 shift();
921
922 if (UNLIKELY(m_current == '\\')) {
93a37866 923 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
6fe7ccc8
A
924 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
925 }
926
927 const Identifier* ident = 0;
928
81345200 929 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
93a37866 930 int identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8 931 ident = makeIdentifier(identifierStart, identifierLength);
81345200
A
932 if (m_parsingBuiltinFunction) {
933 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
934 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
935 return ERRORTOK;
936 }
937 if (isPrivateName)
938 ident = m_vm->propertyNames->getPrivateName(*ident);
939 else if (*ident == m_vm->propertyNames->undefinedKeyword)
940 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
941 if (!ident)
942 return INVALID_PRIVATE_NAME_ERRORTOK;
943 }
6fe7ccc8
A
944 tokenData->ident = ident;
945 } else
946 tokenData->ident = 0;
947
81345200 948 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
6fe7ccc8
A
949 ASSERT(shouldCreateIdentifier);
950 if (remaining < maxTokenLength) {
81345200 951 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
6fe7ccc8
A
952 ASSERT((remaining < maxTokenLength) || !entry);
953 if (!entry)
954 return IDENT;
955 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
956 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
957 }
958 return IDENT;
959 }
960
961 return IDENT;
962}
963
964template <>
965template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
966{
967 const ptrdiff_t remaining = m_codeEnd - m_code;
968 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
969 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
970 if (keyword != IDENT) {
971 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
972 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
973 }
974 }
81345200
A
975
976 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
977 if (isPrivateName)
978 shift();
6fe7ccc8 979
93a37866
A
980 const UChar* identifierStart = currentSourcePtr();
981 int identifierLineStart = currentLineStartOffset();
6fe7ccc8
A
982
983 UChar orAllChars = 0;
984
985 while (isIdentPart(m_current)) {
986 orAllChars |= m_current;
987 shift();
988 }
989
990 if (UNLIKELY(m_current == '\\')) {
81345200 991 ASSERT(!isPrivateName);
93a37866 992 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
6fe7ccc8
A
993 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
994 }
995
996 bool isAll8Bit = false;
997
998 if (!(orAllChars & ~0xff))
999 isAll8Bit = true;
1000
1001 const Identifier* ident = 0;
1002
81345200 1003 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
93a37866 1004 int identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8
A
1005 if (isAll8Bit)
1006 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1007 else
1008 ident = makeIdentifier(identifierStart, identifierLength);
81345200
A
1009 if (m_parsingBuiltinFunction) {
1010 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1011 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1012 return ERRORTOK;
1013 }
1014 if (isPrivateName)
1015 ident = m_vm->propertyNames->getPrivateName(*ident);
1016 else if (*ident == m_vm->propertyNames->undefinedKeyword)
1017 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
1018 if (!ident)
1019 return INVALID_PRIVATE_NAME_ERRORTOK;
1020 }
6fe7ccc8
A
1021 tokenData->ident = ident;
1022 } else
1023 tokenData->ident = 0;
1024
81345200 1025 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
6fe7ccc8
A
1026 ASSERT(shouldCreateIdentifier);
1027 if (remaining < maxTokenLength) {
81345200 1028 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
6fe7ccc8
A
1029 ASSERT((remaining < maxTokenLength) || !entry);
1030 if (!entry)
1031 return IDENT;
1032 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1033 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1034 }
1035 return IDENT;
1036 }
1037
1038 return IDENT;
1039}
1040
ed1e77d3 1041template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
6fe7ccc8 1042{
ed1e77d3 1043 auto identifierStart = currentSourcePtr();
14957cd0
A
1044 bool bufferRequired = false;
1045
1046 while (true) {
1047 if (LIKELY(isIdentPart(m_current))) {
1048 shift();
1049 continue;
1050 }
1051 if (LIKELY(m_current != '\\'))
1052 break;
1053
1054 // \uXXXX unicode characters.
1055 bufferRequired = true;
93a37866
A
1056 if (identifierStart != currentSourcePtr())
1057 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
14957cd0
A
1058 shift();
1059 if (UNLIKELY(m_current != 'u'))
93a37866 1060 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
14957cd0 1061 shift();
ed1e77d3 1062 auto character = parseUnicodeEscape();
93a37866 1063 if (UNLIKELY(!character.isValid()))
ed1e77d3
A
1064 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1065 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
93a37866 1066 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
6fe7ccc8 1067 if (shouldCreateIdentifier)
ed1e77d3 1068 recordUnicodeCodePoint(character.value());
93a37866 1069 identifierStart = currentSourcePtr();
14957cd0 1070 }
6fe7ccc8 1071
14957cd0 1072 int identifierLength;
ed1e77d3 1073 const Identifier* ident = nullptr;
14957cd0 1074 if (shouldCreateIdentifier) {
6fe7ccc8 1075 if (!bufferRequired) {
93a37866 1076 identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8
A
1077 ident = makeIdentifier(identifierStart, identifierLength);
1078 } else {
93a37866
A
1079 if (identifierStart != currentSourcePtr())
1080 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
6fe7ccc8 1081 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
14957cd0
A
1082 }
1083
14957cd0
A
1084 tokenData->ident = ident;
1085 } else
ed1e77d3 1086 tokenData->ident = nullptr;
14957cd0 1087
ed1e77d3
A
1088 m_buffer16.shrink(0);
1089
1090 if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
14957cd0 1091 ASSERT(shouldCreateIdentifier);
ed1e77d3
A
1092 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
1093 if (!entry)
1094 return IDENT;
1095 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1096 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
14957cd0
A
1097 }
1098
14957cd0
A
1099 return IDENT;
1100}
1101
6fe7ccc8 1102static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
14957cd0 1103{
6fe7ccc8 1104 return character < 0xE;
14957cd0
A
1105}
1106
6fe7ccc8 1107static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
14957cd0 1108{
6fe7ccc8
A
1109 return character < 0xE || character > 0xFF;
1110}
1111
1112template <typename T>
93a37866 1113template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
6fe7ccc8
A
1114{
1115 int startingOffset = currentOffset();
93a37866 1116 int startingLineStartOffset = currentLineStartOffset();
6fe7ccc8
A
1117 int startingLineNumber = lineNumber();
1118 T stringQuoteCharacter = m_current;
14957cd0
A
1119 shift();
1120
93a37866 1121 const T* stringStart = currentSourcePtr();
14957cd0
A
1122
1123 while (m_current != stringQuoteCharacter) {
1124 if (UNLIKELY(m_current == '\\')) {
93a37866
A
1125 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1126 append8(stringStart, currentSourcePtr() - stringStart);
14957cd0
A
1127 shift();
1128
93a37866 1129 LChar escape = singleEscape(m_current);
14957cd0 1130
ed1e77d3 1131 // Most common escape sequences first.
14957cd0 1132 if (escape) {
6fe7ccc8
A
1133 if (shouldBuildStrings)
1134 record8(escape);
14957cd0
A
1135 shift();
1136 } else if (UNLIKELY(isLineTerminator(m_current)))
1137 shiftLineTerminator();
1138 else if (m_current == 'x') {
1139 shift();
6fe7ccc8 1140 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
ed1e77d3 1141 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
93a37866 1142 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
6fe7ccc8
A
1143 }
1144 T prev = m_current;
1145 shift();
1146 if (shouldBuildStrings)
1147 record8(convertHex(prev, m_current));
1148 shift();
1149 } else {
93a37866 1150 setOffset(startingOffset, startingLineStartOffset);
6fe7ccc8 1151 setLineNumber(startingLineNumber);
ed1e77d3 1152 m_buffer8.shrink(0);
6fe7ccc8
A
1153 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1154 }
93a37866 1155 stringStart = currentSourcePtr();
6fe7ccc8
A
1156 continue;
1157 }
1158
1159 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
93a37866 1160 setOffset(startingOffset, startingLineStartOffset);
6fe7ccc8 1161 setLineNumber(startingLineNumber);
ed1e77d3 1162 m_buffer8.shrink(0);
6fe7ccc8
A
1163 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1164 }
1165
1166 shift();
1167 }
1168
93a37866
A
1169 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1170 append8(stringStart, currentSourcePtr() - stringStart);
6fe7ccc8
A
1171 if (shouldBuildStrings) {
1172 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
ed1e77d3 1173 m_buffer8.shrink(0);
6fe7ccc8
A
1174 } else
1175 tokenData->ident = 0;
1176
93a37866 1177 return StringParsedSuccessfully;
6fe7ccc8
A
1178}
1179
1180template <typename T>
ed1e77d3
A
1181template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(EscapeParseMode escapeParseMode, bool strictMode, T stringQuoteCharacter) -> StringParseResult
1182{
1183 if (m_current == 'x') {
1184 shift();
1185 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1186 m_lexErrorMessage = ASCIILiteral("\\x can only be followed by a hex character sequence");
1187 return StringCannotBeParsed;
1188 }
1189 T prev = m_current;
1190 shift();
1191 if (shouldBuildStrings)
1192 record16(convertHex(prev, m_current));
1193 shift();
1194 return StringParsedSuccessfully;
1195 }
1196
1197 if (m_current == 'u') {
1198 shift();
1199
1200 if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) {
1201 if (shouldBuildStrings)
1202 record16('u');
1203 return StringParsedSuccessfully;
1204 }
1205
1206 auto character = parseUnicodeEscape();
1207 if (character.isValid()) {
1208 if (shouldBuildStrings)
1209 recordUnicodeCodePoint(character.value());
1210 return StringParsedSuccessfully;
1211 }
1212
1213 m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence");
1214 return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed;
1215 }
1216
1217 if (strictMode) {
1218 if (isASCIIDigit(m_current)) {
1219 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1220 int character1 = m_current;
1221 shift();
1222 if (character1 != '0' || isASCIIDigit(m_current)) {
1223 m_lexErrorMessage = ASCIILiteral("The only valid numeric escape in strict mode is '\\0'");
1224 return StringCannotBeParsed;
1225 }
1226 if (shouldBuildStrings)
1227 record16(0);
1228 return StringParsedSuccessfully;
1229 }
1230 } else {
1231 if (isASCIIOctalDigit(m_current)) {
1232 // Octal character sequences
1233 T character1 = m_current;
1234 shift();
1235 if (isASCIIOctalDigit(m_current)) {
1236 // Two octal characters
1237 T character2 = m_current;
1238 shift();
1239 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1240 if (shouldBuildStrings)
1241 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1242 shift();
1243 } else {
1244 if (shouldBuildStrings)
1245 record16((character1 - '0') * 8 + character2 - '0');
1246 }
1247 } else {
1248 if (shouldBuildStrings)
1249 record16(character1 - '0');
1250 }
1251 return StringParsedSuccessfully;
1252 }
1253 }
1254
1255 if (!atEnd()) {
1256 if (shouldBuildStrings)
1257 record16(m_current);
1258 shift();
1259 return StringParsedSuccessfully;
1260 }
1261
1262 m_lexErrorMessage = ASCIILiteral("Unterminated string constant");
1263 return StringUnterminated;
1264}
1265
1266template <typename T>
1267template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
6fe7ccc8
A
1268{
1269 T stringQuoteCharacter = m_current;
1270 shift();
1271
93a37866 1272 const T* stringStart = currentSourcePtr();
6fe7ccc8
A
1273
1274 while (m_current != stringQuoteCharacter) {
1275 if (UNLIKELY(m_current == '\\')) {
93a37866
A
1276 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1277 append16(stringStart, currentSourcePtr() - stringStart);
6fe7ccc8
A
1278 shift();
1279
93a37866 1280 LChar escape = singleEscape(m_current);
6fe7ccc8
A
1281
1282 // Most common escape sequences first
1283 if (escape) {
1284 if (shouldBuildStrings)
1285 record16(escape);
1286 shift();
1287 } else if (UNLIKELY(isLineTerminator(m_current)))
1288 shiftLineTerminator();
ed1e77d3
A
1289 else {
1290 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::String, strictMode, stringQuoteCharacter);
1291 if (result != StringParsedSuccessfully)
1292 return result;
6fe7ccc8 1293 }
14957cd0 1294
93a37866 1295 stringStart = currentSourcePtr();
14957cd0
A
1296 continue;
1297 }
1298 // Fast check for characters that require special handling.
6fe7ccc8 1299 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
14957cd0
A
1300 // as possible, and lets through all common ASCII characters.
1301 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1302 // New-line or end of input is not allowed
6fe7ccc8 1303 if (atEnd() || isLineTerminator(m_current)) {
ed1e77d3 1304 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
93a37866 1305 return atEnd() ? StringUnterminated : StringCannotBeParsed;
6fe7ccc8 1306 }
14957cd0
A
1307 // Anything else is just a normal character
1308 }
1309 shift();
1310 }
1311
93a37866
A
1312 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1313 append16(stringStart, currentSourcePtr() - stringStart);
14957cd0
A
1314 if (shouldBuildStrings)
1315 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1316 else
1317 tokenData->ident = 0;
1318
ed1e77d3 1319 m_buffer16.shrink(0);
93a37866 1320 return StringParsedSuccessfully;
14957cd0
A
1321}
1322
ed1e77d3
A
1323#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
1324// While the lexer accepts <LF><CR> (not <CR><LF>) sequence
1325// as one line terminator and increments one line number,
1326// TemplateLiteral considers it as two line terminators <LF> and <CR>.
1327//
1328// TemplateLiteral normalizes line terminators as follows.
1329//
1330// <LF> => <LF>
1331// <CR> => <LF>
1332// <CR><LF> => <LF>
1333// <\u2028> => <\u2028>
1334// <\u2029> => <\u2029>
1335//
1336// So, <LF><CR> should be normalized to <LF><LF>.
1337// However, the lexer should increment the line number only once for <LF><CR>.
1338//
1339// To achieve this, LineNumberAdder holds the current status of line terminator sequence.
1340// When TemplateLiteral lexer encounters a line terminator, it notifies to LineNumberAdder.
1341// LineNumberAdder maintains the status and increments the line number when it's necessary.
1342// For example, LineNumberAdder increments the line number only once for <LF><CR> and <CR><LF>.
1343template<typename CharacterType>
1344class LineNumberAdder {
1345public:
1346 LineNumberAdder(int& lineNumber)
1347 : m_lineNumber(lineNumber)
1348 {
1349 }
1350
1351 void clear()
1352 {
1353 m_previous = 0;
1354 }
1355
1356 void add(CharacterType character)
1357 {
1358 ASSERT(Lexer<CharacterType>::isLineTerminator(character));
1359 if ((character + m_previous) == ('\n' + '\r'))
1360 m_previous = 0;
1361 else {
1362 ++m_lineNumber;
1363 m_previous = character;
1364 }
1365 }
1366
1367private:
1368 int& m_lineNumber;
1369 CharacterType m_previous { 0 };
1370};
1371
1372template <typename T>
1373template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1374{
1375 const T* stringStart = currentSourcePtr();
1376 const T* rawStringStart = currentSourcePtr();
1377
1378 LineNumberAdder<T> lineNumberAdder(m_lineNumber);
1379
1380 while (m_current != '`') {
1381 if (UNLIKELY(m_current == '\\')) {
1382 lineNumberAdder.clear();
1383 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1384 append16(stringStart, currentSourcePtr() - stringStart);
1385 shift();
1386
1387 LChar escape = singleEscape(m_current);
1388
1389 // Most common escape sequences first.
1390 if (escape) {
1391 if (shouldBuildStrings)
1392 record16(escape);
1393 shift();
1394 } else if (UNLIKELY(isLineTerminator(m_current))) {
1395 if (m_current == '\r') {
1396 lineNumberAdder.add(m_current);
1397 shift();
1398 if (m_current == '\n') {
1399 lineNumberAdder.add(m_current);
1400 shift();
1401 }
1402 } else {
1403 lineNumberAdder.add(m_current);
1404 shift();
1405 }
1406 } else {
1407 bool strictMode = true;
1408 StringParseResult result = parseComplexEscape<shouldBuildStrings>(EscapeParseMode::Template, strictMode, '`');
1409 if (result != StringParsedSuccessfully)
1410 return result;
1411 }
1412
1413 stringStart = currentSourcePtr();
1414 continue;
1415 }
1416
1417 if (m_current == '$' && peek(1) == '{')
1418 break;
1419
1420 // Fast check for characters that require special handling.
1421 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1422 // as possible, and lets through all common ASCII characters.
1423 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1424 // End of input is not allowed.
1425 // Unlike String, line terminator is allowed.
1426 if (atEnd()) {
1427 m_lexErrorMessage = ASCIILiteral("Unexpected EOF");
1428 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1429 }
1430
1431 if (isLineTerminator(m_current)) {
1432 if (m_current == '\r') {
1433 // Normalize <CR>, <CR><LF> to <LF>.
1434 if (shouldBuildStrings) {
1435 if (stringStart != currentSourcePtr())
1436 append16(stringStart, currentSourcePtr() - stringStart);
1437 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1438 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1439
1440 record16('\n');
1441 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1442 m_bufferForRawTemplateString16.append('\n');
1443 }
1444 lineNumberAdder.add(m_current);
1445 shift();
1446 if (m_current == '\n') {
1447 lineNumberAdder.add(m_current);
1448 shift();
1449 }
1450 stringStart = currentSourcePtr();
1451 rawStringStart = currentSourcePtr();
1452 } else {
1453 lineNumberAdder.add(m_current);
1454 shift();
1455 }
1456 continue;
1457 }
1458 // Anything else is just a normal character
1459 }
1460
1461 lineNumberAdder.clear();
1462 shift();
1463 }
1464
1465 bool isTail = m_current == '`';
1466
1467 if (shouldBuildStrings) {
1468 if (currentSourcePtr() != stringStart)
1469 append16(stringStart, currentSourcePtr() - stringStart);
1470 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1471 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1472 }
1473
1474 if (shouldBuildStrings) {
1475 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1476 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1477 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1478 tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1479 else
1480 tokenData->raw = makeEmptyIdentifier();
1481 } else {
1482 tokenData->cooked = makeEmptyIdentifier();
1483 tokenData->raw = makeEmptyIdentifier();
1484 }
1485 tokenData->isTail = isTail;
1486
1487 m_buffer16.shrink(0);
1488 m_bufferForRawTemplateString16.shrink(0);
1489
1490 if (isTail) {
1491 // Skip `
1492 shift();
1493 } else {
1494 // Skip $ and {
1495 shift();
1496 shift();
1497 }
1498
1499 return StringParsedSuccessfully;
1500}
1501#endif
1502
6fe7ccc8
A
1503template <typename T>
1504ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
14957cd0
A
1505{
1506 // Optimization: most hexadecimal values fit into 4 bytes.
1507 uint32_t hexValue = 0;
1508 int maximumDigits = 7;
1509
14957cd0
A
1510 do {
1511 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1512 shift();
1513 --maximumDigits;
1514 } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1515
1516 if (maximumDigits >= 0) {
1517 returnValue = hexValue;
1518 return;
1519 }
1520
1521 // No more place in the hexValue buffer.
1522 // The values are shifted out and placed into the m_buffer8 vector.
1523 for (int i = 0; i < 8; ++i) {
1524 int digit = hexValue >> 28;
1525 if (digit < 10)
1526 record8(digit + '0');
1527 else
1528 record8(digit - 10 + 'a');
1529 hexValue <<= 4;
1530 }
1531
1532 while (isASCIIHexDigit(m_current)) {
1533 record8(m_current);
1534 shift();
1535 }
1536
1537 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1538}
1539
ed1e77d3
A
1540template <typename T>
1541ALWAYS_INLINE bool Lexer<T>::parseBinary(double& returnValue)
1542{
1543 // Optimization: most binary values fit into 4 bytes.
1544 uint32_t binaryValue = 0;
1545 const unsigned maximumDigits = 32;
1546 int digit = maximumDigits - 1;
1547 // Temporary buffer for the digits. Makes easier
1548 // to reconstruct the input characters when needed.
1549 LChar digits[maximumDigits];
1550
1551 do {
1552 binaryValue = (binaryValue << 1) + (m_current - '0');
1553 digits[digit] = m_current;
1554 shift();
1555 --digit;
1556 } while (isASCIIBinaryDigit(m_current) && digit >= 0);
1557
1558 if (!isASCIIDigit(m_current) && digit >= 0) {
1559 returnValue = binaryValue;
1560 return true;
1561 }
1562
1563 for (int i = maximumDigits - 1; i > digit; --i)
1564 record8(digits[i]);
1565
1566 while (isASCIIBinaryDigit(m_current)) {
1567 record8(m_current);
1568 shift();
1569 }
1570
1571 if (isASCIIDigit(m_current))
1572 return false;
1573
1574 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2);
1575 return true;
1576}
1577
6fe7ccc8
A
1578template <typename T>
1579ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
14957cd0
A
1580{
1581 // Optimization: most octal values fit into 4 bytes.
1582 uint32_t octalValue = 0;
ed1e77d3
A
1583 const unsigned maximumDigits = 10;
1584 int digit = maximumDigits - 1;
14957cd0
A
1585 // Temporary buffer for the digits. Makes easier
1586 // to reconstruct the input characters when needed.
ed1e77d3 1587 LChar digits[maximumDigits];
14957cd0
A
1588
1589 do {
1590 octalValue = octalValue * 8 + (m_current - '0');
ed1e77d3 1591 digits[digit] = m_current;
14957cd0 1592 shift();
ed1e77d3
A
1593 --digit;
1594 } while (isASCIIOctalDigit(m_current) && digit >= 0);
14957cd0 1595
ed1e77d3 1596 if (!isASCIIDigit(m_current) && digit >= 0) {
14957cd0
A
1597 returnValue = octalValue;
1598 return true;
1599 }
1600
ed1e77d3 1601 for (int i = maximumDigits - 1; i > digit; --i)
14957cd0
A
1602 record8(digits[i]);
1603
1604 while (isASCIIOctalDigit(m_current)) {
1605 record8(m_current);
1606 shift();
1607 }
1608
1609 if (isASCIIDigit(m_current))
1610 return false;
1611
1612 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1613 return true;
1614}
1615
6fe7ccc8
A
1616template <typename T>
1617ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
14957cd0
A
1618{
1619 // Optimization: most decimal values fit into 4 bytes.
1620 uint32_t decimalValue = 0;
1621
1622 // Since parseOctal may be executed before parseDecimal,
1623 // the m_buffer8 may hold ascii digits.
1624 if (!m_buffer8.size()) {
ed1e77d3
A
1625 const unsigned maximumDigits = 10;
1626 int digit = maximumDigits - 1;
14957cd0
A
1627 // Temporary buffer for the digits. Makes easier
1628 // to reconstruct the input characters when needed.
ed1e77d3 1629 LChar digits[maximumDigits];
14957cd0
A
1630
1631 do {
1632 decimalValue = decimalValue * 10 + (m_current - '0');
ed1e77d3 1633 digits[digit] = m_current;
14957cd0 1634 shift();
ed1e77d3
A
1635 --digit;
1636 } while (isASCIIDigit(m_current) && digit >= 0);
14957cd0 1637
ed1e77d3 1638 if (digit >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
14957cd0
A
1639 returnValue = decimalValue;
1640 return true;
1641 }
1642
ed1e77d3 1643 for (int i = maximumDigits - 1; i > digit; --i)
14957cd0
A
1644 record8(digits[i]);
1645 }
1646
1647 while (isASCIIDigit(m_current)) {
1648 record8(m_current);
1649 shift();
1650 }
1651
1652 return false;
1653}
1654
6fe7ccc8
A
1655template <typename T>
1656ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
14957cd0
A
1657{
1658 record8('.');
1659 while (isASCIIDigit(m_current)) {
1660 record8(m_current);
1661 shift();
1662 }
1663}
1664
6fe7ccc8
A
1665template <typename T>
1666ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
14957cd0
A
1667{
1668 record8('e');
1669 shift();
1670 if (m_current == '+' || m_current == '-') {
1671 record8(m_current);
1672 shift();
1673 }
1674
1675 if (!isASCIIDigit(m_current))
1676 return false;
1677
1678 do {
1679 record8(m_current);
1680 shift();
1681 } while (isASCIIDigit(m_current));
1682 return true;
1683}
1684
6fe7ccc8
A
1685template <typename T>
1686ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
14957cd0
A
1687{
1688 while (true) {
1689 while (UNLIKELY(m_current == '*')) {
1690 shift();
1691 if (m_current == '/') {
1692 shift();
1693 return true;
1694 }
1695 }
1696
6fe7ccc8 1697 if (atEnd())
14957cd0
A
1698 return false;
1699
6fe7ccc8 1700 if (isLineTerminator(m_current)) {
14957cd0 1701 shiftLineTerminator();
6fe7ccc8
A
1702 m_terminator = true;
1703 } else
14957cd0
A
1704 shift();
1705 }
1706}
1707
6fe7ccc8
A
1708template <typename T>
1709bool Lexer<T>::nextTokenIsColon()
14957cd0 1710{
6fe7ccc8 1711 const T* code = m_code;
14957cd0
A
1712 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1713 code++;
6fe7ccc8 1714
14957cd0
A
1715 return code < m_codeEnd && *code == ':';
1716}
1717
ed1e77d3
A
1718#if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1719template <typename T>
1720void Lexer<T>::setTokenPosition(JSToken* tokenRecord)
1721{
1722 JSTokenData* tokenData = &tokenRecord->m_data;
1723 tokenData->line = lineNumber();
1724 tokenData->offset = currentOffset();
1725 tokenData->lineStartOffset = currentLineStartOffset();
1726 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1727}
1728#endif
1729
6fe7ccc8 1730template <typename T>
81345200 1731JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
9dae56ea 1732{
81345200
A
1733 JSTokenData* tokenData = &tokenRecord->m_data;
1734 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
ed1e77d3
A
1735 m_lastTockenLocation = JSTokenLocation(tokenRecord->m_location);
1736
ba379fdc
A
1737 ASSERT(!m_error);
1738 ASSERT(m_buffer8.isEmpty());
1739 ASSERT(m_buffer16.isEmpty());
1740
14957cd0 1741 JSTokenType token = ERRORTOK;
9dae56ea 1742 m_terminator = false;
ba379fdc
A
1743
1744start:
1745 while (isWhiteSpace(m_current))
14957cd0 1746 shift();
ba379fdc 1747
6fe7ccc8 1748 if (atEnd())
14957cd0 1749 return EOFTOK;
6fe7ccc8 1750
93a37866
A
1751 tokenLocation->startOffset = currentOffset();
1752 ASSERT(currentOffset() >= currentLineStartOffset());
81345200 1753 tokenRecord->m_startPosition = currentPosition();
14957cd0
A
1754
1755 CharacterType type;
6fe7ccc8
A
1756 if (LIKELY(isLatin1(m_current)))
1757 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1758 else if (isNonLatin1IdentStart(m_current))
14957cd0
A
1759 type = CharacterIdentifierStart;
1760 else if (isLineTerminator(m_current))
1761 type = CharacterLineTerminator;
1762 else
1763 type = CharacterInvalid;
1764
1765 switch (type) {
1766 case CharacterGreater:
1767 shift();
1768 if (m_current == '>') {
1769 shift();
1770 if (m_current == '>') {
1771 shift();
1772 if (m_current == '=') {
1773 shift();
ba379fdc
A
1774 token = URSHIFTEQUAL;
1775 break;
9dae56ea 1776 }
ba379fdc 1777 token = URSHIFT;
9dae56ea 1778 break;
ba379fdc 1779 }
14957cd0
A
1780 if (m_current == '=') {
1781 shift();
1782 token = RSHIFTEQUAL;
ba379fdc
A
1783 break;
1784 }
14957cd0 1785 token = RSHIFT;
ba379fdc 1786 break;
14957cd0
A
1787 }
1788 if (m_current == '=') {
1789 shift();
1790 token = GE;
ba379fdc 1791 break;
14957cd0
A
1792 }
1793 token = GT;
1794 break;
ed1e77d3
A
1795 case CharacterEqual: {
1796#if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
1797 if (peek(1) == '>') {
1798 token = ARROWFUNCTION;
1799 tokenData->line = lineNumber();
1800 tokenData->offset = currentOffset();
1801 tokenData->lineStartOffset = currentLineStartOffset();
1802 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1803 shift();
1804 shift();
1805 break;
1806 }
1807#endif
14957cd0
A
1808 shift();
1809 if (m_current == '=') {
1810 shift();
1811 if (m_current == '=') {
1812 shift();
1813 token = STREQ;
9dae56ea 1814 break;
ba379fdc 1815 }
14957cd0 1816 token = EQEQ;
ba379fdc 1817 break;
14957cd0
A
1818 }
1819 token = EQUAL;
1820 break;
ed1e77d3 1821 }
14957cd0
A
1822 case CharacterLess:
1823 shift();
1824 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1825 // <!-- marks the beginning of a line comment (for www usage)
1826 goto inSingleLineComment;
1827 }
1828 if (m_current == '<') {
1829 shift();
1830 if (m_current == '=') {
1831 shift();
1832 token = LSHIFTEQUAL;
9dae56ea 1833 break;
ba379fdc 1834 }
14957cd0 1835 token = LSHIFT;
ba379fdc 1836 break;
14957cd0
A
1837 }
1838 if (m_current == '=') {
1839 shift();
1840 token = LE;
ba379fdc 1841 break;
14957cd0
A
1842 }
1843 token = LT;
1844 break;
1845 case CharacterExclamationMark:
1846 shift();
1847 if (m_current == '=') {
1848 shift();
1849 if (m_current == '=') {
1850 shift();
1851 token = STRNEQ;
9dae56ea 1852 break;
ba379fdc 1853 }
14957cd0 1854 token = NE;
ba379fdc 1855 break;
14957cd0
A
1856 }
1857 token = EXCLAMATION;
1858 break;
1859 case CharacterAdd:
1860 shift();
1861 if (m_current == '+') {
1862 shift();
1863 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1864 break;
1865 }
1866 if (m_current == '=') {
1867 shift();
1868 token = PLUSEQUAL;
ba379fdc 1869 break;
14957cd0
A
1870 }
1871 token = PLUS;
1872 break;
1873 case CharacterSub:
1874 shift();
1875 if (m_current == '-') {
1876 shift();
1877 if (m_atLineStart && m_current == '>') {
1878 shift();
ba379fdc
A
1879 goto inSingleLineComment;
1880 }
14957cd0 1881 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
ba379fdc 1882 break;
14957cd0
A
1883 }
1884 if (m_current == '=') {
1885 shift();
1886 token = MINUSEQUAL;
ba379fdc 1887 break;
14957cd0
A
1888 }
1889 token = MINUS;
1890 break;
1891 case CharacterMultiply:
1892 shift();
1893 if (m_current == '=') {
1894 shift();
1895 token = MULTEQUAL;
ba379fdc 1896 break;
14957cd0
A
1897 }
1898 token = TIMES;
1899 break;
1900 case CharacterSlash:
1901 shift();
1902 if (m_current == '/') {
1903 shift();
1904 goto inSingleLineComment;
1905 }
1906 if (m_current == '*') {
1907 shift();
1908 if (parseMultilineComment())
1909 goto start;
ed1e77d3 1910 m_lexErrorMessage = ASCIILiteral("Multiline comment was not closed properly");
93a37866 1911 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
14957cd0
A
1912 goto returnError;
1913 }
1914 if (m_current == '=') {
1915 shift();
1916 token = DIVEQUAL;
ba379fdc 1917 break;
14957cd0
A
1918 }
1919 token = DIVIDE;
1920 break;
1921 case CharacterAnd:
1922 shift();
1923 if (m_current == '&') {
1924 shift();
1925 token = AND;
ba379fdc 1926 break;
14957cd0
A
1927 }
1928 if (m_current == '=') {
1929 shift();
1930 token = ANDEQUAL;
ba379fdc 1931 break;
14957cd0
A
1932 }
1933 token = BITAND;
1934 break;
1935 case CharacterXor:
1936 shift();
1937 if (m_current == '=') {
1938 shift();
1939 token = XOREQUAL;
ba379fdc 1940 break;
14957cd0
A
1941 }
1942 token = BITXOR;
1943 break;
1944 case CharacterModulo:
1945 shift();
1946 if (m_current == '=') {
1947 shift();
1948 token = MODEQUAL;
ba379fdc 1949 break;
14957cd0
A
1950 }
1951 token = MOD;
1952 break;
1953 case CharacterOr:
1954 shift();
1955 if (m_current == '=') {
1956 shift();
1957 token = OREQUAL;
ba379fdc 1958 break;
14957cd0
A
1959 }
1960 if (m_current == '|') {
1961 shift();
1962 token = OR;
ba379fdc 1963 break;
14957cd0
A
1964 }
1965 token = BITOR;
1966 break;
1967 case CharacterOpenParen:
1968 token = OPENPAREN;
1969 shift();
1970 break;
1971 case CharacterCloseParen:
1972 token = CLOSEPAREN;
1973 shift();
1974 break;
1975 case CharacterOpenBracket:
1976 token = OPENBRACKET;
1977 shift();
1978 break;
1979 case CharacterCloseBracket:
1980 token = CLOSEBRACKET;
1981 shift();
1982 break;
1983 case CharacterComma:
1984 token = COMMA;
1985 shift();
1986 break;
1987 case CharacterColon:
1988 token = COLON;
1989 shift();
1990 break;
1991 case CharacterQuestion:
1992 token = QUESTION;
1993 shift();
1994 break;
1995 case CharacterTilde:
1996 token = TILDE;
1997 shift();
1998 break;
1999 case CharacterSemicolon:
14957cd0
A
2000 shift();
2001 token = SEMICOLON;
2002 break;
2003 case CharacterOpenBrace:
93a37866
A
2004 tokenData->line = lineNumber();
2005 tokenData->offset = currentOffset();
2006 tokenData->lineStartOffset = currentLineStartOffset();
2007 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
14957cd0
A
2008 shift();
2009 token = OPENBRACE;
2010 break;
2011 case CharacterCloseBrace:
93a37866
A
2012 tokenData->line = lineNumber();
2013 tokenData->offset = currentOffset();
2014 tokenData->lineStartOffset = currentLineStartOffset();
2015 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
14957cd0
A
2016 shift();
2017 token = CLOSEBRACE;
2018 break;
2019 case CharacterDot:
2020 shift();
2021 if (!isASCIIDigit(m_current)) {
81345200
A
2022 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2023 shift();
2024 shift();
2025 token = DOTDOTDOT;
2026 break;
2027 }
14957cd0
A
2028 token = DOT;
2029 break;
2030 }
2031 goto inNumberAfterDecimalPoint;
2032 case CharacterZero:
2033 shift();
81345200
A
2034 if ((m_current | 0x20) == 'x') {
2035 if (!isASCIIHexDigit(peek(1))) {
ed1e77d3 2036 m_lexErrorMessage = ASCIILiteral("No hexadecimal digits after '0x'");
81345200
A
2037 token = INVALID_HEX_NUMBER_ERRORTOK;
2038 goto returnError;
2039 }
ed1e77d3
A
2040
2041 // Shift out the 'x' prefix.
2042 shift();
2043
14957cd0 2044 parseHex(tokenData->doubleValue);
81345200 2045 if (isIdentStart(m_current)) {
ed1e77d3 2046 m_lexErrorMessage = ASCIILiteral("No space between hexadecimal literal and identifier");
81345200
A
2047 token = INVALID_HEX_NUMBER_ERRORTOK;
2048 goto returnError;
2049 }
ed1e77d3
A
2050 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2051 m_buffer8.shrink(0);
2052 break;
2053 }
2054 if ((m_current | 0x20) == 'b') {
2055 if (!isASCIIBinaryDigit(peek(1))) {
2056 m_lexErrorMessage = ASCIILiteral("No binary digits after '0b'");
2057 token = INVALID_BINARY_NUMBER_ERRORTOK;
2058 goto returnError;
2059 }
2060
2061 // Shift out the 'b' prefix.
2062 shift();
2063
2064 parseBinary(tokenData->doubleValue);
2065 if (isIdentStart(m_current)) {
2066 m_lexErrorMessage = ASCIILiteral("No space between binary literal and identifier");
2067 token = INVALID_BINARY_NUMBER_ERRORTOK;
2068 goto returnError;
2069 }
2070 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2071 m_buffer8.shrink(0);
2072 break;
2073 }
2074
2075 if ((m_current | 0x20) == 'o') {
2076 if (!isASCIIOctalDigit(peek(1))) {
2077 m_lexErrorMessage = ASCIILiteral("No octal digits after '0o'");
2078 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2079 goto returnError;
2080 }
2081
2082 // Shift out the 'o' prefix.
2083 shift();
2084
2085 parseOctal(tokenData->doubleValue);
2086 if (isIdentStart(m_current)) {
2087 m_lexErrorMessage = ASCIILiteral("No space between octal literal and identifier");
2088 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2089 goto returnError;
2090 }
2091 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2092 m_buffer8.shrink(0);
81345200
A
2093 break;
2094 }
2095
2096 record8('0');
ed1e77d3
A
2097 if (strictMode && isASCIIDigit(m_current)) {
2098 m_lexErrorMessage = ASCIILiteral("Decimal integer literals with a leading zero are forbidden in strict mode");
2099 token = INVALID_OCTAL_NUMBER_ERRORTOK;
2100 goto returnError;
2101 }
81345200
A
2102 if (isASCIIOctalDigit(m_current)) {
2103 if (parseOctal(tokenData->doubleValue)) {
ed1e77d3 2104 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
ba379fdc 2105 }
ba379fdc 2106 }
81345200 2107 FALLTHROUGH;
14957cd0 2108 case CharacterNumber:
ed1e77d3 2109 if (LIKELY(token != INTEGER && token != DOUBLE)) {
14957cd0 2110 if (!parseDecimal(tokenData->doubleValue)) {
ed1e77d3 2111 token = INTEGER;
14957cd0
A
2112 if (m_current == '.') {
2113 shift();
2114inNumberAfterDecimalPoint:
2115 parseNumberAfterDecimalPoint();
ed1e77d3 2116 token = DOUBLE;
14957cd0 2117 }
6fe7ccc8
A
2118 if ((m_current | 0x20) == 'e') {
2119 if (!parseNumberAfterExponentIndicator()) {
ed1e77d3 2120 m_lexErrorMessage = ASCIILiteral("Non-number found after exponent indicator");
93a37866 2121 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
14957cd0 2122 goto returnError;
6fe7ccc8
A
2123 }
2124 }
2125 size_t parsedLength;
2126 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
ed1e77d3
A
2127 if (token == INTEGER)
2128 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2129 } else
2130 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
14957cd0 2131 }
ba379fdc 2132
14957cd0 2133 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
6fe7ccc8 2134 if (UNLIKELY(isIdentStart(m_current))) {
ed1e77d3 2135 m_lexErrorMessage = ASCIILiteral("At least one digit must occur after a decimal point");
93a37866 2136 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
ba379fdc 2137 goto returnError;
6fe7ccc8 2138 }
ed1e77d3 2139 m_buffer8.shrink(0);
14957cd0 2140 break;
ed1e77d3
A
2141 case CharacterQuote: {
2142 StringParseResult result = StringCannotBeParsed;
2143 if (lexerFlags & LexerFlagsDontBuildStrings)
2144 result = parseString<false>(tokenData, strictMode);
2145 else
2146 result = parseString<true>(tokenData, strictMode);
2147
2148 if (UNLIKELY(result != StringParsedSuccessfully)) {
2149 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2150 goto returnError;
ba379fdc 2151 }
14957cd0 2152 shift();
14957cd0
A
2153 token = STRING;
2154 break;
ed1e77d3
A
2155 }
2156#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2157 case CharacterBackQuote: {
2158 // Skip backquote.
2159 shift();
2160 StringParseResult result = StringCannotBeParsed;
2161 if (lexerFlags & LexerFlagsDontBuildStrings)
2162 result = parseTemplateLiteral<false>(tokenData, RawStringsBuildMode::BuildRawStrings);
2163 else
2164 result = parseTemplateLiteral<true>(tokenData, RawStringsBuildMode::BuildRawStrings);
2165
2166 if (UNLIKELY(result != StringParsedSuccessfully)) {
2167 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2168 goto returnError;
2169 }
2170 token = TEMPLATE;
2171 break;
2172 }
2173#endif
14957cd0
A
2174 case CharacterIdentifierStart:
2175 ASSERT(isIdentStart(m_current));
81345200 2176 FALLTHROUGH;
14957cd0 2177 case CharacterBackSlash:
81345200 2178 parseIdent:
6fe7ccc8
A
2179 if (lexerFlags & LexexFlagsDontBuildKeywords)
2180 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
14957cd0 2181 else
6fe7ccc8 2182 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
14957cd0
A
2183 break;
2184 case CharacterLineTerminator:
2185 ASSERT(isLineTerminator(m_current));
ba379fdc 2186 shiftLineTerminator();
14957cd0
A
2187 m_atLineStart = true;
2188 m_terminator = true;
93a37866 2189 m_lineStart = m_code;
14957cd0 2190 goto start;
81345200
A
2191 case CharacterPrivateIdentifierStart:
2192 if (m_parsingBuiltinFunction)
2193 goto parseIdent;
2194
2195 FALLTHROUGH;
14957cd0 2196 case CharacterInvalid:
6fe7ccc8 2197 m_lexErrorMessage = invalidCharacterMessage();
93a37866 2198 token = ERRORTOK;
ba379fdc 2199 goto returnError;
14957cd0 2200 default:
93a37866 2201 RELEASE_ASSERT_NOT_REACHED();
ed1e77d3 2202 m_lexErrorMessage = ASCIILiteral("Internal Error");
93a37866 2203 token = ERRORTOK;
ba379fdc 2204 goto returnError;
ba379fdc 2205 }
9dae56ea 2206
14957cd0
A
2207 m_atLineStart = false;
2208 goto returnToken;
9dae56ea 2209
ba379fdc
A
2210inSingleLineComment:
2211 while (!isLineTerminator(m_current)) {
6fe7ccc8 2212 if (atEnd())
14957cd0
A
2213 return EOFTOK;
2214 shift();
9dae56ea 2215 }
ba379fdc
A
2216 shiftLineTerminator();
2217 m_atLineStart = true;
2218 m_terminator = true;
93a37866 2219 m_lineStart = m_code;
14957cd0
A
2220 if (!lastTokenWasRestrKeyword())
2221 goto start;
ba379fdc 2222
14957cd0 2223 token = SEMICOLON;
ba379fdc
A
2224 // Fall through into returnToken.
2225
14957cd0 2226returnToken:
93a37866
A
2227 tokenLocation->line = m_lineNumber;
2228 tokenLocation->endOffset = currentOffset();
2229 tokenLocation->lineStartOffset = currentLineStartOffset();
2230 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
81345200 2231 tokenRecord->m_endPosition = currentPosition();
ba379fdc
A
2232 m_lastToken = token;
2233 return token;
9dae56ea 2234
ba379fdc
A
2235returnError:
2236 m_error = true;
93a37866
A
2237 tokenLocation->line = m_lineNumber;
2238 tokenLocation->endOffset = currentOffset();
2239 tokenLocation->lineStartOffset = currentLineStartOffset();
2240 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
81345200 2241 tokenRecord->m_endPosition = currentPosition();
93a37866
A
2242 RELEASE_ASSERT(token & ErrorTokenFlag);
2243 return token;
2244}
2245
2246template <typename T>
2247static inline void orCharacter(UChar&, UChar);
2248
2249template <>
2250inline void orCharacter<LChar>(UChar&, UChar) { }
2251
2252template <>
2253inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2254{
2255 orAccumulator |= character;
9dae56ea
A
2256}
2257
6fe7ccc8
A
2258template <typename T>
2259bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
9dae56ea 2260{
ba379fdc
A
2261 ASSERT(m_buffer16.isEmpty());
2262
9dae56ea
A
2263 bool lastWasEscape = false;
2264 bool inBrackets = false;
93a37866 2265 UChar charactersOredTogether = 0;
9dae56ea 2266
f9bf01c6
A
2267 if (patternPrefix) {
2268 ASSERT(!isLineTerminator(patternPrefix));
2269 ASSERT(patternPrefix != '/');
2270 ASSERT(patternPrefix != '[');
2271 record16(patternPrefix);
2272 }
2273
ba379fdc 2274 while (true) {
6fe7ccc8 2275 if (isLineTerminator(m_current) || atEnd()) {
ed1e77d3 2276 m_buffer16.shrink(0);
f9bf01c6 2277 return false;
9dae56ea 2278 }
f9bf01c6 2279
6fe7ccc8
A
2280 T prev = m_current;
2281
14957cd0 2282 shift();
f9bf01c6 2283
6fe7ccc8 2284 if (prev == '/' && !lastWasEscape && !inBrackets)
f9bf01c6
A
2285 break;
2286
6fe7ccc8 2287 record16(prev);
93a37866 2288 orCharacter<T>(charactersOredTogether, prev);
f9bf01c6
A
2289
2290 if (lastWasEscape) {
2291 lastWasEscape = false;
2292 continue;
2293 }
2294
6fe7ccc8 2295 switch (prev) {
f9bf01c6
A
2296 case '[':
2297 inBrackets = true;
2298 break;
2299 case ']':
2300 inBrackets = false;
2301 break;
2302 case '\\':
2303 lastWasEscape = true;
2304 break;
2305 }
9dae56ea
A
2306 }
2307
93a37866
A
2308 pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2309
ed1e77d3 2310 m_buffer16.shrink(0);
93a37866 2311 charactersOredTogether = 0;
f9bf01c6 2312
9dae56ea
A
2313 while (isIdentPart(m_current)) {
2314 record16(m_current);
93a37866 2315 orCharacter<T>(charactersOredTogether, m_current);
14957cd0 2316 shift();
9dae56ea 2317 }
f9bf01c6 2318
93a37866 2319 flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
ed1e77d3 2320 m_buffer16.shrink(0);
9dae56ea
A
2321
2322 return true;
2323}
2324
6fe7ccc8
A
2325template <typename T>
2326bool Lexer<T>::skipRegExp()
f9bf01c6
A
2327{
2328 bool lastWasEscape = false;
2329 bool inBrackets = false;
2330
2331 while (true) {
6fe7ccc8 2332 if (isLineTerminator(m_current) || atEnd())
f9bf01c6
A
2333 return false;
2334
6fe7ccc8
A
2335 T prev = m_current;
2336
14957cd0 2337 shift();
f9bf01c6 2338
6fe7ccc8 2339 if (prev == '/' && !lastWasEscape && !inBrackets)
f9bf01c6
A
2340 break;
2341
2342 if (lastWasEscape) {
2343 lastWasEscape = false;
2344 continue;
2345 }
2346
6fe7ccc8 2347 switch (prev) {
f9bf01c6
A
2348 case '[':
2349 inBrackets = true;
2350 break;
2351 case ']':
2352 inBrackets = false;
2353 break;
2354 case '\\':
2355 lastWasEscape = true;
2356 break;
2357 }
2358 }
2359
2360 while (isIdentPart(m_current))
14957cd0 2361 shift();
f9bf01c6
A
2362
2363 return true;
2364}
2365
ed1e77d3
A
2366#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
2367template <typename T>
2368JSTokenType Lexer<T>::scanTrailingTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2369{
2370 JSTokenData* tokenData = &tokenRecord->m_data;
2371 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
2372 ASSERT(!m_error);
2373 ASSERT(m_buffer16.isEmpty());
2374
2375 // Leading closing brace } is already shifted in the previous token scan.
2376 // So in this re-scan phase, shift() is not needed here.
2377 StringParseResult result = parseTemplateLiteral<true>(tokenData, rawStringsBuildMode);
2378 JSTokenType token = ERRORTOK;
2379 if (UNLIKELY(result != StringParsedSuccessfully)) {
2380 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2381 m_error = true;
2382 } else {
2383 token = TEMPLATE;
2384 m_lastToken = token;
2385 }
2386
2387 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2388 m_atLineStart = false;
2389
2390 // Adjust current tokenLocation data for TemplateString.
2391 tokenLocation->line = m_lineNumber;
2392 tokenLocation->endOffset = currentOffset();
2393 tokenLocation->lineStartOffset = currentLineStartOffset();
2394 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
2395 tokenRecord->m_endPosition = currentPosition();
2396 return token;
2397}
2398#endif
2399
6fe7ccc8
A
2400template <typename T>
2401void Lexer<T>::clear()
9dae56ea 2402{
f9bf01c6 2403 m_arena = 0;
9dae56ea 2404
6fe7ccc8 2405 Vector<LChar> newBuffer8;
9dae56ea
A
2406 m_buffer8.swap(newBuffer8);
2407
2408 Vector<UChar> newBuffer16;
9dae56ea
A
2409 m_buffer16.swap(newBuffer16);
2410
ed1e77d3
A
2411 Vector<UChar> newBufferForRawTemplateString16;
2412 m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2413
9dae56ea 2414 m_isReparsing = false;
ba379fdc
A
2415}
2416
6fe7ccc8
A
2417// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2418template class Lexer<LChar>;
2419template class Lexer<UChar>;
2420
9dae56ea 2421} // namespace JSC