]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.cpp
JavaScriptCore-7600.1.4.16.1.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
93a37866 3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved.
9dae56ea 4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
14957cd0 5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6fe7ccc8 6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
9dae56ea
A
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "Lexer.h"
27
81345200 28#include "JSFunctionInlines.h"
14957cd0 29
81345200 30#include "BuiltinNames.h"
9dae56ea 31#include "JSGlobalObjectFunctions.h"
14957cd0 32#include "Identifier.h"
9dae56ea
A
33#include "NodeInfo.h"
34#include "Nodes.h"
81345200 35#include "JSCInlines.h"
6fe7ccc8 36#include <wtf/dtoa.h>
9dae56ea
A
37#include <ctype.h>
38#include <limits.h>
39#include <string.h>
9dae56ea 40#include <wtf/Assertions.h>
9dae56ea 41
14957cd0 42#include "KeywordLookup.h"
9dae56ea 43#include "Lexer.lut.h"
6fe7ccc8 44#include "Parser.h"
9dae56ea 45
9dae56ea
A
46namespace JSC {
47
81345200 48Keywords::Keywords(VM& vm)
93a37866 49 : m_vm(vm)
6fe7ccc8
A
50 , m_keywordTable(JSC::mainTable)
51{
52}
14957cd0
A
53
54enum CharacterType {
55 // Types for the main switch
56
57 // The first three types are fixed, and also used for identifying
58 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
59 CharacterIdentifierStart,
60 CharacterZero,
61 CharacterNumber,
62
63 CharacterInvalid,
64 CharacterLineTerminator,
65 CharacterExclamationMark,
66 CharacterOpenParen,
67 CharacterCloseParen,
68 CharacterOpenBracket,
69 CharacterCloseBracket,
70 CharacterComma,
71 CharacterColon,
72 CharacterQuestion,
73 CharacterTilde,
74 CharacterQuote,
75 CharacterDot,
76 CharacterSlash,
77 CharacterBackSlash,
78 CharacterSemicolon,
79 CharacterOpenBrace,
80 CharacterCloseBrace,
81
82 CharacterAdd,
83 CharacterSub,
84 CharacterMultiply,
85 CharacterModulo,
86 CharacterAnd,
87 CharacterXor,
88 CharacterOr,
89 CharacterLess,
90 CharacterGreater,
91 CharacterEqual,
92
93 // Other types (only one so far)
94 CharacterWhiteSpace,
81345200 95 CharacterPrivateIdentifierStart
14957cd0
A
96};
97
6fe7ccc8
A
98// 256 Latin-1 codes
99static const unsigned short typesOfLatin1Characters[256] = {
14957cd0
A
100/* 0 - Null */ CharacterInvalid,
101/* 1 - Start of Heading */ CharacterInvalid,
102/* 2 - Start of Text */ CharacterInvalid,
103/* 3 - End of Text */ CharacterInvalid,
104/* 4 - End of Transm. */ CharacterInvalid,
105/* 5 - Enquiry */ CharacterInvalid,
106/* 6 - Acknowledgment */ CharacterInvalid,
107/* 7 - Bell */ CharacterInvalid,
108/* 8 - Back Space */ CharacterInvalid,
109/* 9 - Horizontal Tab */ CharacterWhiteSpace,
110/* 10 - Line Feed */ CharacterLineTerminator,
111/* 11 - Vertical Tab */ CharacterWhiteSpace,
112/* 12 - Form Feed */ CharacterWhiteSpace,
113/* 13 - Carriage Return */ CharacterLineTerminator,
114/* 14 - Shift Out */ CharacterInvalid,
115/* 15 - Shift In */ CharacterInvalid,
116/* 16 - Data Line Escape */ CharacterInvalid,
117/* 17 - Device Control 1 */ CharacterInvalid,
118/* 18 - Device Control 2 */ CharacterInvalid,
119/* 19 - Device Control 3 */ CharacterInvalid,
120/* 20 - Device Control 4 */ CharacterInvalid,
121/* 21 - Negative Ack. */ CharacterInvalid,
122/* 22 - Synchronous Idle */ CharacterInvalid,
123/* 23 - End of Transmit */ CharacterInvalid,
124/* 24 - Cancel */ CharacterInvalid,
125/* 25 - End of Medium */ CharacterInvalid,
126/* 26 - Substitute */ CharacterInvalid,
127/* 27 - Escape */ CharacterInvalid,
128/* 28 - File Separator */ CharacterInvalid,
129/* 29 - Group Separator */ CharacterInvalid,
130/* 30 - Record Separator */ CharacterInvalid,
131/* 31 - Unit Separator */ CharacterInvalid,
132/* 32 - Space */ CharacterWhiteSpace,
133/* 33 - ! */ CharacterExclamationMark,
134/* 34 - " */ CharacterQuote,
135/* 35 - # */ CharacterInvalid,
136/* 36 - $ */ CharacterIdentifierStart,
137/* 37 - % */ CharacterModulo,
138/* 38 - & */ CharacterAnd,
139/* 39 - ' */ CharacterQuote,
140/* 40 - ( */ CharacterOpenParen,
141/* 41 - ) */ CharacterCloseParen,
142/* 42 - * */ CharacterMultiply,
143/* 43 - + */ CharacterAdd,
144/* 44 - , */ CharacterComma,
145/* 45 - - */ CharacterSub,
146/* 46 - . */ CharacterDot,
147/* 47 - / */ CharacterSlash,
148/* 48 - 0 */ CharacterZero,
149/* 49 - 1 */ CharacterNumber,
150/* 50 - 2 */ CharacterNumber,
151/* 51 - 3 */ CharacterNumber,
152/* 52 - 4 */ CharacterNumber,
153/* 53 - 5 */ CharacterNumber,
154/* 54 - 6 */ CharacterNumber,
155/* 55 - 7 */ CharacterNumber,
156/* 56 - 8 */ CharacterNumber,
157/* 57 - 9 */ CharacterNumber,
158/* 58 - : */ CharacterColon,
159/* 59 - ; */ CharacterSemicolon,
160/* 60 - < */ CharacterLess,
161/* 61 - = */ CharacterEqual,
162/* 62 - > */ CharacterGreater,
163/* 63 - ? */ CharacterQuestion,
81345200 164/* 64 - @ */ CharacterPrivateIdentifierStart,
14957cd0
A
165/* 65 - A */ CharacterIdentifierStart,
166/* 66 - B */ CharacterIdentifierStart,
167/* 67 - C */ CharacterIdentifierStart,
168/* 68 - D */ CharacterIdentifierStart,
169/* 69 - E */ CharacterIdentifierStart,
170/* 70 - F */ CharacterIdentifierStart,
171/* 71 - G */ CharacterIdentifierStart,
172/* 72 - H */ CharacterIdentifierStart,
173/* 73 - I */ CharacterIdentifierStart,
174/* 74 - J */ CharacterIdentifierStart,
175/* 75 - K */ CharacterIdentifierStart,
176/* 76 - L */ CharacterIdentifierStart,
177/* 77 - M */ CharacterIdentifierStart,
178/* 78 - N */ CharacterIdentifierStart,
179/* 79 - O */ CharacterIdentifierStart,
180/* 80 - P */ CharacterIdentifierStart,
181/* 81 - Q */ CharacterIdentifierStart,
182/* 82 - R */ CharacterIdentifierStart,
183/* 83 - S */ CharacterIdentifierStart,
184/* 84 - T */ CharacterIdentifierStart,
185/* 85 - U */ CharacterIdentifierStart,
186/* 86 - V */ CharacterIdentifierStart,
187/* 87 - W */ CharacterIdentifierStart,
188/* 88 - X */ CharacterIdentifierStart,
189/* 89 - Y */ CharacterIdentifierStart,
190/* 90 - Z */ CharacterIdentifierStart,
191/* 91 - [ */ CharacterOpenBracket,
192/* 92 - \ */ CharacterBackSlash,
193/* 93 - ] */ CharacterCloseBracket,
194/* 94 - ^ */ CharacterXor,
195/* 95 - _ */ CharacterIdentifierStart,
196/* 96 - ` */ CharacterInvalid,
197/* 97 - a */ CharacterIdentifierStart,
198/* 98 - b */ CharacterIdentifierStart,
199/* 99 - c */ CharacterIdentifierStart,
200/* 100 - d */ CharacterIdentifierStart,
201/* 101 - e */ CharacterIdentifierStart,
202/* 102 - f */ CharacterIdentifierStart,
203/* 103 - g */ CharacterIdentifierStart,
204/* 104 - h */ CharacterIdentifierStart,
205/* 105 - i */ CharacterIdentifierStart,
206/* 106 - j */ CharacterIdentifierStart,
207/* 107 - k */ CharacterIdentifierStart,
208/* 108 - l */ CharacterIdentifierStart,
209/* 109 - m */ CharacterIdentifierStart,
210/* 110 - n */ CharacterIdentifierStart,
211/* 111 - o */ CharacterIdentifierStart,
212/* 112 - p */ CharacterIdentifierStart,
213/* 113 - q */ CharacterIdentifierStart,
214/* 114 - r */ CharacterIdentifierStart,
215/* 115 - s */ CharacterIdentifierStart,
216/* 116 - t */ CharacterIdentifierStart,
217/* 117 - u */ CharacterIdentifierStart,
218/* 118 - v */ CharacterIdentifierStart,
219/* 119 - w */ CharacterIdentifierStart,
220/* 120 - x */ CharacterIdentifierStart,
221/* 121 - y */ CharacterIdentifierStart,
222/* 122 - z */ CharacterIdentifierStart,
223/* 123 - { */ CharacterOpenBrace,
224/* 124 - | */ CharacterOr,
225/* 125 - } */ CharacterCloseBrace,
226/* 126 - ~ */ CharacterTilde,
227/* 127 - Delete */ CharacterInvalid,
6fe7ccc8
A
228/* 128 - Cc category */ CharacterInvalid,
229/* 129 - Cc category */ CharacterInvalid,
230/* 130 - Cc category */ CharacterInvalid,
231/* 131 - Cc category */ CharacterInvalid,
232/* 132 - Cc category */ CharacterInvalid,
233/* 133 - Cc category */ CharacterInvalid,
234/* 134 - Cc category */ CharacterInvalid,
235/* 135 - Cc category */ CharacterInvalid,
236/* 136 - Cc category */ CharacterInvalid,
237/* 137 - Cc category */ CharacterInvalid,
238/* 138 - Cc category */ CharacterInvalid,
239/* 139 - Cc category */ CharacterInvalid,
240/* 140 - Cc category */ CharacterInvalid,
241/* 141 - Cc category */ CharacterInvalid,
242/* 142 - Cc category */ CharacterInvalid,
243/* 143 - Cc category */ CharacterInvalid,
244/* 144 - Cc category */ CharacterInvalid,
245/* 145 - Cc category */ CharacterInvalid,
246/* 146 - Cc category */ CharacterInvalid,
247/* 147 - Cc category */ CharacterInvalid,
248/* 148 - Cc category */ CharacterInvalid,
249/* 149 - Cc category */ CharacterInvalid,
250/* 150 - Cc category */ CharacterInvalid,
251/* 151 - Cc category */ CharacterInvalid,
252/* 152 - Cc category */ CharacterInvalid,
253/* 153 - Cc category */ CharacterInvalid,
254/* 154 - Cc category */ CharacterInvalid,
255/* 155 - Cc category */ CharacterInvalid,
256/* 156 - Cc category */ CharacterInvalid,
257/* 157 - Cc category */ CharacterInvalid,
258/* 158 - Cc category */ CharacterInvalid,
259/* 159 - Cc category */ CharacterInvalid,
260/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
261/* 161 - Po category */ CharacterInvalid,
262/* 162 - Sc category */ CharacterInvalid,
263/* 163 - Sc category */ CharacterInvalid,
264/* 164 - Sc category */ CharacterInvalid,
265/* 165 - Sc category */ CharacterInvalid,
266/* 166 - So category */ CharacterInvalid,
267/* 167 - So category */ CharacterInvalid,
268/* 168 - Sk category */ CharacterInvalid,
269/* 169 - So category */ CharacterInvalid,
270/* 170 - Ll category */ CharacterIdentifierStart,
271/* 171 - Pi category */ CharacterInvalid,
272/* 172 - Sm category */ CharacterInvalid,
273/* 173 - Cf category */ CharacterInvalid,
274/* 174 - So category */ CharacterInvalid,
275/* 175 - Sk category */ CharacterInvalid,
276/* 176 - So category */ CharacterInvalid,
277/* 177 - Sm category */ CharacterInvalid,
278/* 178 - No category */ CharacterInvalid,
279/* 179 - No category */ CharacterInvalid,
280/* 180 - Sk category */ CharacterInvalid,
281/* 181 - Ll category */ CharacterIdentifierStart,
282/* 182 - So category */ CharacterInvalid,
283/* 183 - Po category */ CharacterInvalid,
284/* 184 - Sk category */ CharacterInvalid,
285/* 185 - No category */ CharacterInvalid,
286/* 186 - Ll category */ CharacterIdentifierStart,
287/* 187 - Pf category */ CharacterInvalid,
288/* 188 - No category */ CharacterInvalid,
289/* 189 - No category */ CharacterInvalid,
290/* 190 - No category */ CharacterInvalid,
291/* 191 - Po category */ CharacterInvalid,
292/* 192 - Lu category */ CharacterIdentifierStart,
293/* 193 - Lu category */ CharacterIdentifierStart,
294/* 194 - Lu category */ CharacterIdentifierStart,
295/* 195 - Lu category */ CharacterIdentifierStart,
296/* 196 - Lu category */ CharacterIdentifierStart,
297/* 197 - Lu category */ CharacterIdentifierStart,
298/* 198 - Lu category */ CharacterIdentifierStart,
299/* 199 - Lu category */ CharacterIdentifierStart,
300/* 200 - Lu category */ CharacterIdentifierStart,
301/* 201 - Lu category */ CharacterIdentifierStart,
302/* 202 - Lu category */ CharacterIdentifierStart,
303/* 203 - Lu category */ CharacterIdentifierStart,
304/* 204 - Lu category */ CharacterIdentifierStart,
305/* 205 - Lu category */ CharacterIdentifierStart,
306/* 206 - Lu category */ CharacterIdentifierStart,
307/* 207 - Lu category */ CharacterIdentifierStart,
308/* 208 - Lu category */ CharacterIdentifierStart,
309/* 209 - Lu category */ CharacterIdentifierStart,
310/* 210 - Lu category */ CharacterIdentifierStart,
311/* 211 - Lu category */ CharacterIdentifierStart,
312/* 212 - Lu category */ CharacterIdentifierStart,
313/* 213 - Lu category */ CharacterIdentifierStart,
314/* 214 - Lu category */ CharacterIdentifierStart,
315/* 215 - Sm category */ CharacterInvalid,
316/* 216 - Lu category */ CharacterIdentifierStart,
317/* 217 - Lu category */ CharacterIdentifierStart,
318/* 218 - Lu category */ CharacterIdentifierStart,
319/* 219 - Lu category */ CharacterIdentifierStart,
320/* 220 - Lu category */ CharacterIdentifierStart,
321/* 221 - Lu category */ CharacterIdentifierStart,
322/* 222 - Lu category */ CharacterIdentifierStart,
323/* 223 - Ll category */ CharacterIdentifierStart,
324/* 224 - Ll category */ CharacterIdentifierStart,
325/* 225 - Ll category */ CharacterIdentifierStart,
326/* 226 - Ll category */ CharacterIdentifierStart,
327/* 227 - Ll category */ CharacterIdentifierStart,
328/* 228 - Ll category */ CharacterIdentifierStart,
329/* 229 - Ll category */ CharacterIdentifierStart,
330/* 230 - Ll category */ CharacterIdentifierStart,
331/* 231 - Ll category */ CharacterIdentifierStart,
332/* 232 - Ll category */ CharacterIdentifierStart,
333/* 233 - Ll category */ CharacterIdentifierStart,
334/* 234 - Ll category */ CharacterIdentifierStart,
335/* 235 - Ll category */ CharacterIdentifierStart,
336/* 236 - Ll category */ CharacterIdentifierStart,
337/* 237 - Ll category */ CharacterIdentifierStart,
338/* 238 - Ll category */ CharacterIdentifierStart,
339/* 239 - Ll category */ CharacterIdentifierStart,
340/* 240 - Ll category */ CharacterIdentifierStart,
341/* 241 - Ll category */ CharacterIdentifierStart,
342/* 242 - Ll category */ CharacterIdentifierStart,
343/* 243 - Ll category */ CharacterIdentifierStart,
344/* 244 - Ll category */ CharacterIdentifierStart,
345/* 245 - Ll category */ CharacterIdentifierStart,
346/* 246 - Ll category */ CharacterIdentifierStart,
347/* 247 - Sm category */ CharacterInvalid,
348/* 248 - Ll category */ CharacterIdentifierStart,
349/* 249 - Ll category */ CharacterIdentifierStart,
350/* 250 - Ll category */ CharacterIdentifierStart,
351/* 251 - Ll category */ CharacterIdentifierStart,
352/* 252 - Ll category */ CharacterIdentifierStart,
353/* 253 - Ll category */ CharacterIdentifierStart,
354/* 254 - Ll category */ CharacterIdentifierStart,
355/* 255 - Ll category */ CharacterIdentifierStart
14957cd0 356};
9dae56ea 357
93a37866
A
358// This table provides the character that results from \X where X is the index in the table beginning
359// with SPACE. A table value of 0 means that more processing needs to be done.
360static const LChar singleCharacterEscapeValuesForASCII[128] = {
361/* 0 - Null */ 0,
362/* 1 - Start of Heading */ 0,
363/* 2 - Start of Text */ 0,
364/* 3 - End of Text */ 0,
365/* 4 - End of Transm. */ 0,
366/* 5 - Enquiry */ 0,
367/* 6 - Acknowledgment */ 0,
368/* 7 - Bell */ 0,
369/* 8 - Back Space */ 0,
370/* 9 - Horizontal Tab */ 0,
371/* 10 - Line Feed */ 0,
372/* 11 - Vertical Tab */ 0,
373/* 12 - Form Feed */ 0,
374/* 13 - Carriage Return */ 0,
375/* 14 - Shift Out */ 0,
376/* 15 - Shift In */ 0,
377/* 16 - Data Line Escape */ 0,
378/* 17 - Device Control 1 */ 0,
379/* 18 - Device Control 2 */ 0,
380/* 19 - Device Control 3 */ 0,
381/* 20 - Device Control 4 */ 0,
382/* 21 - Negative Ack. */ 0,
383/* 22 - Synchronous Idle */ 0,
384/* 23 - End of Transmit */ 0,
385/* 24 - Cancel */ 0,
386/* 25 - End of Medium */ 0,
387/* 26 - Substitute */ 0,
388/* 27 - Escape */ 0,
389/* 28 - File Separator */ 0,
390/* 29 - Group Separator */ 0,
391/* 30 - Record Separator */ 0,
392/* 31 - Unit Separator */ 0,
393/* 32 - Space */ ' ',
394/* 33 - ! */ '!',
395/* 34 - " */ '"',
396/* 35 - # */ '#',
397/* 36 - $ */ '$',
398/* 37 - % */ '%',
399/* 38 - & */ '&',
400/* 39 - ' */ '\'',
401/* 40 - ( */ '(',
402/* 41 - ) */ ')',
403/* 42 - * */ '*',
404/* 43 - + */ '+',
405/* 44 - , */ ',',
406/* 45 - - */ '-',
407/* 46 - . */ '.',
408/* 47 - / */ '/',
409/* 48 - 0 */ 0,
410/* 49 - 1 */ 0,
411/* 50 - 2 */ 0,
412/* 51 - 3 */ 0,
413/* 52 - 4 */ 0,
414/* 53 - 5 */ 0,
415/* 54 - 6 */ 0,
416/* 55 - 7 */ 0,
417/* 56 - 8 */ 0,
418/* 57 - 9 */ 0,
419/* 58 - : */ ':',
420/* 59 - ; */ ';',
421/* 60 - < */ '<',
422/* 61 - = */ '=',
423/* 62 - > */ '>',
424/* 63 - ? */ '?',
425/* 64 - @ */ '@',
426/* 65 - A */ 'A',
427/* 66 - B */ 'B',
428/* 67 - C */ 'C',
429/* 68 - D */ 'D',
430/* 69 - E */ 'E',
431/* 70 - F */ 'F',
432/* 71 - G */ 'G',
433/* 72 - H */ 'H',
434/* 73 - I */ 'I',
435/* 74 - J */ 'J',
436/* 75 - K */ 'K',
437/* 76 - L */ 'L',
438/* 77 - M */ 'M',
439/* 78 - N */ 'N',
440/* 79 - O */ 'O',
441/* 80 - P */ 'P',
442/* 81 - Q */ 'Q',
443/* 82 - R */ 'R',
444/* 83 - S */ 'S',
445/* 84 - T */ 'T',
446/* 85 - U */ 'U',
447/* 86 - V */ 'V',
448/* 87 - W */ 'W',
449/* 88 - X */ 'X',
450/* 89 - Y */ 'Y',
451/* 90 - Z */ 'Z',
452/* 91 - [ */ '[',
453/* 92 - \ */ '\\',
454/* 93 - ] */ ']',
455/* 94 - ^ */ '^',
456/* 95 - _ */ '_',
457/* 96 - ` */ '`',
458/* 97 - a */ 'a',
459/* 98 - b */ 0x08,
460/* 99 - c */ 'c',
461/* 100 - d */ 'd',
462/* 101 - e */ 'e',
463/* 102 - f */ 0x0C,
464/* 103 - g */ 'g',
465/* 104 - h */ 'h',
466/* 105 - i */ 'i',
467/* 106 - j */ 'j',
468/* 107 - k */ 'k',
469/* 108 - l */ 'l',
470/* 109 - m */ 'm',
471/* 110 - n */ 0x0A,
472/* 111 - o */ 'o',
473/* 112 - p */ 'p',
474/* 113 - q */ 'q',
475/* 114 - r */ 0x0D,
476/* 115 - s */ 's',
477/* 116 - t */ 0x09,
478/* 117 - u */ 0,
479/* 118 - v */ 0x0B,
480/* 119 - w */ 'w',
481/* 120 - x */ 0,
482/* 121 - y */ 'y',
483/* 122 - z */ 'z',
484/* 123 - { */ '{',
485/* 124 - | */ '|',
486/* 125 - } */ '}',
487/* 126 - ~ */ '~',
488/* 127 - Delete */ 0
489};
490
6fe7ccc8 491template <typename T>
81345200 492Lexer<T>::Lexer(VM* vm, JSParserStrictness strictness)
ba379fdc 493 : m_isReparsing(false)
93a37866 494 , m_vm(vm)
81345200 495 , m_parsingBuiltinFunction(strictness == JSParseBuiltin)
9dae56ea 496{
9dae56ea
A
497}
498
6fe7ccc8
A
499template <typename T>
500Lexer<T>::~Lexer()
9dae56ea 501{
ba379fdc
A
502}
503
6fe7ccc8 504template <typename T>
93a37866 505String Lexer<T>::invalidCharacterMessage() const
ba379fdc 506{
6fe7ccc8
A
507 switch (m_current) {
508 case 0:
509 return "Invalid character: '\\0'";
510 case 10:
511 return "Invalid character: '\\n'";
512 case 11:
513 return "Invalid character: '\\v'";
514 case 13:
515 return "Invalid character: '\\r'";
516 case 35:
517 return "Invalid character: '#'";
518 case 64:
519 return "Invalid character: '@'";
520 case 96:
521 return "Invalid character: '`'";
522 default:
523 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
524 }
ba379fdc
A
525}
526
6fe7ccc8 527template <typename T>
93a37866 528ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
ba379fdc 529{
6fe7ccc8
A
530 ASSERT(m_code <= m_codeEnd);
531 return m_code;
ba379fdc
A
532}
533
6fe7ccc8
A
534template <typename T>
535void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
9dae56ea 536{
6fe7ccc8
A
537 m_arena = &arena->identifierArena();
538
ba379fdc 539 m_lineNumber = source.firstLine();
9dae56ea 540 m_lastToken = -1;
6fe7ccc8 541
93a37866 542 const String& sourceString = source.provider()->source();
9dae56ea 543
93a37866
A
544 if (!sourceString.isNull())
545 setCodeStart(sourceString.impl());
6fe7ccc8
A
546 else
547 m_codeStart = 0;
ba379fdc 548
9dae56ea 549 m_source = &source;
93a37866
A
550 m_sourceOffset = source.startOffset();
551 m_codeStartPlusOffset = m_codeStart + source.startOffset();
552 m_code = m_codeStartPlusOffset;
6fe7ccc8 553 m_codeEnd = m_codeStart + source.endOffset();
9dae56ea
A
554 m_error = false;
555 m_atLineStart = true;
93a37866
A
556 m_lineStart = m_code;
557 m_lexErrorMessage = String();
6fe7ccc8 558
4e4e5a6f
A
559 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
560 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
6fe7ccc8 561
14957cd0
A
562 if (LIKELY(m_code < m_codeEnd))
563 m_current = *m_code;
564 else
6fe7ccc8 565 m_current = 0;
ba379fdc 566 ASSERT(currentOffset() == source.startOffset());
9dae56ea
A
567}
568
6fe7ccc8
A
569template <typename T>
570template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
9dae56ea 571{
6fe7ccc8 572 m_code += shiftAmount;
93a37866 573 ASSERT(currentOffset() >= currentLineStartOffset());
6fe7ccc8
A
574 m_current = *m_code;
575}
576
577template <typename T>
578ALWAYS_INLINE void Lexer<T>::shift()
579{
580 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
581 m_current = 0;
582 ++m_code;
583 if (LIKELY(m_code < m_codeEnd))
14957cd0 584 m_current = *m_code;
14957cd0
A
585}
586
6fe7ccc8
A
587template <typename T>
588ALWAYS_INLINE bool Lexer<T>::atEnd() const
14957cd0 589{
6fe7ccc8
A
590 ASSERT(!m_current || m_code < m_codeEnd);
591 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
14957cd0
A
592}
593
6fe7ccc8
A
594template <typename T>
595ALWAYS_INLINE T Lexer<T>::peek(int offset) const
14957cd0 596{
14957cd0 597 ASSERT(offset > 0 && offset < 5);
6fe7ccc8
A
598 const T* code = m_code + offset;
599 return (code < m_codeEnd) ? *code : 0;
14957cd0
A
600}
601
6fe7ccc8 602template <typename T>
93a37866 603typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
14957cd0 604{
6fe7ccc8
A
605 T char1 = peek(1);
606 T char2 = peek(2);
607 T char3 = peek(3);
14957cd0
A
608
609 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
93a37866 610 return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
14957cd0
A
611
612 int result = convertUnicode(m_current, char1, char2, char3);
613 shift();
614 shift();
615 shift();
616 shift();
93a37866 617 return UnicodeHexValue(result);
ba379fdc
A
618}
619
6fe7ccc8
A
620template <typename T>
621void Lexer<T>::shiftLineTerminator()
ba379fdc
A
622{
623 ASSERT(isLineTerminator(m_current));
624
81345200 625 m_positionBeforeLastNewline = currentPosition();
6fe7ccc8 626 T prev = m_current;
14957cd0
A
627 shift();
628
ba379fdc 629 // Allow both CRLF and LFCR.
6fe7ccc8 630 if (prev + m_current == '\n' + '\r')
14957cd0 631 shift();
ba379fdc
A
632
633 ++m_lineNumber;
634}
635
6fe7ccc8
A
636template <typename T>
637ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
ba379fdc
A
638{
639 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
640}
641
81345200 642static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
ba379fdc 643{
81345200 644 return U_GET_GC_MASK(c) & U_GC_L_MASK;
ba379fdc
A
645}
646
6fe7ccc8 647static ALWAYS_INLINE bool isLatin1(LChar)
ba379fdc 648{
6fe7ccc8 649 return true;
ba379fdc
A
650}
651
6fe7ccc8 652static ALWAYS_INLINE bool isLatin1(UChar c)
ba379fdc 653{
6fe7ccc8 654 return c < 256;
ba379fdc
A
655}
656
6fe7ccc8
A
657static inline bool isIdentStart(LChar c)
658{
659 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
660}
661
662static inline bool isIdentStart(UChar c)
663{
664 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
665}
666
667static NEVER_INLINE bool isNonLatin1IdentPart(int c)
668{
81345200 669 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
6fe7ccc8
A
670}
671
672static ALWAYS_INLINE bool isIdentPart(LChar c)
ba379fdc 673{
14957cd0
A
674 // Character types are divided into two groups depending on whether they can be part of an
675 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
676 // part of an identifier. (See the CharacterType definition for more details.)
6fe7ccc8
A
677 return typesOfLatin1Characters[c] <= CharacterNumber;
678}
679
680static ALWAYS_INLINE bool isIdentPart(UChar c)
681{
682 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
ba379fdc
A
683}
684
93a37866 685static inline LChar singleEscape(int c)
ba379fdc 686{
93a37866
A
687 if (c < 128) {
688 ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
689 return singleCharacterEscapeValuesForASCII[c];
9dae56ea 690 }
93a37866 691 return 0;
9dae56ea
A
692}
693
6fe7ccc8
A
694template <typename T>
695inline void Lexer<T>::record8(int c)
9dae56ea 696{
ba379fdc
A
697 ASSERT(c >= 0);
698 ASSERT(c <= 0xFF);
6fe7ccc8
A
699 m_buffer8.append(static_cast<LChar>(c));
700}
701
702template <typename T>
703inline void assertCharIsIn8BitRange(T c)
704{
705 UNUSED_PARAM(c);
706 ASSERT(c >= 0);
707 ASSERT(c <= 0xFF);
708}
709
710template <>
711inline void assertCharIsIn8BitRange(UChar c)
712{
713 UNUSED_PARAM(c);
714 ASSERT(c <= 0xFF);
715}
716
717template <>
718inline void assertCharIsIn8BitRange(LChar)
719{
720}
721
722template <typename T>
723inline void Lexer<T>::append8(const T* p, size_t length)
724{
725 size_t currentSize = m_buffer8.size();
726 m_buffer8.grow(currentSize + length);
727 LChar* rawBuffer = m_buffer8.data() + currentSize;
728
729 for (size_t i = 0; i < length; i++) {
730 T c = p[i];
731 assertCharIsIn8BitRange(c);
732 rawBuffer[i] = c;
733 }
9dae56ea
A
734}
735
6fe7ccc8
A
736template <typename T>
737inline void Lexer<T>::append16(const LChar* p, size_t length)
738{
739 size_t currentSize = m_buffer16.size();
740 m_buffer16.grow(currentSize + length);
741 UChar* rawBuffer = m_buffer16.data() + currentSize;
742
743 for (size_t i = 0; i < length; i++)
744 rawBuffer[i] = p[i];
745}
746
747template <typename T>
748inline void Lexer<T>::record16(T c)
9dae56ea 749{
ba379fdc
A
750 m_buffer16.append(c);
751}
752
6fe7ccc8
A
753template <typename T>
754inline void Lexer<T>::record16(int c)
ba379fdc
A
755{
756 ASSERT(c >= 0);
6fe7ccc8
A
757 ASSERT(c <= static_cast<int>(USHRT_MAX));
758 m_buffer16.append(static_cast<UChar>(c));
9dae56ea 759}
81345200
A
760
761#if !ASSERT_DISABLED
762bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
763{
764 if (!ident)
765 return true;
766 /* Just block any use of suspicious identifiers. This is intended to
767 * be used as a safety net while implementing builtins.
768 */
769 if (*ident == vm.propertyNames->builtinNames().callPublicName())
770 return false;
771 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
772 return false;
773 if (*ident == vm.propertyNames->eval)
774 return false;
775 if (*ident == vm.propertyNames->Function)
776 return false;
777 return true;
778}
779#endif
780
6fe7ccc8
A
781template <>
782template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
14957cd0
A
783{
784 const ptrdiff_t remaining = m_codeEnd - m_code;
6fe7ccc8 785 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
14957cd0
A
786 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
787 if (keyword != IDENT) {
788 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
6fe7ccc8 789 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
14957cd0
A
790 }
791 }
81345200
A
792
793 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
794 if (isPrivateName)
795 shift();
796
93a37866
A
797 const LChar* identifierStart = currentSourcePtr();
798 unsigned identifierLineStart = currentLineStartOffset();
6fe7ccc8
A
799
800 while (isIdentPart(m_current))
801 shift();
802
803 if (UNLIKELY(m_current == '\\')) {
93a37866 804 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
6fe7ccc8
A
805 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
806 }
807
808 const Identifier* ident = 0;
809
81345200 810 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
93a37866 811 int identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8 812 ident = makeIdentifier(identifierStart, identifierLength);
81345200
A
813 if (m_parsingBuiltinFunction) {
814 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
815 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
816 return ERRORTOK;
817 }
818 if (isPrivateName)
819 ident = m_vm->propertyNames->getPrivateName(*ident);
820 else if (*ident == m_vm->propertyNames->undefinedKeyword)
821 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
822 if (!ident)
823 return INVALID_PRIVATE_NAME_ERRORTOK;
824 }
6fe7ccc8
A
825 tokenData->ident = ident;
826 } else
827 tokenData->ident = 0;
828
81345200 829 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
6fe7ccc8
A
830 ASSERT(shouldCreateIdentifier);
831 if (remaining < maxTokenLength) {
81345200 832 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
6fe7ccc8
A
833 ASSERT((remaining < maxTokenLength) || !entry);
834 if (!entry)
835 return IDENT;
836 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
837 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
838 }
839 return IDENT;
840 }
841
842 return IDENT;
843}
844
845template <>
846template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
847{
848 const ptrdiff_t remaining = m_codeEnd - m_code;
849 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
850 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
851 if (keyword != IDENT) {
852 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
853 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
854 }
855 }
81345200
A
856
857 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
858 if (isPrivateName)
859 shift();
6fe7ccc8 860
93a37866
A
861 const UChar* identifierStart = currentSourcePtr();
862 int identifierLineStart = currentLineStartOffset();
6fe7ccc8
A
863
864 UChar orAllChars = 0;
865
866 while (isIdentPart(m_current)) {
867 orAllChars |= m_current;
868 shift();
869 }
870
871 if (UNLIKELY(m_current == '\\')) {
81345200 872 ASSERT(!isPrivateName);
93a37866 873 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
6fe7ccc8
A
874 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
875 }
876
877 bool isAll8Bit = false;
878
879 if (!(orAllChars & ~0xff))
880 isAll8Bit = true;
881
882 const Identifier* ident = 0;
883
81345200 884 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
93a37866 885 int identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8
A
886 if (isAll8Bit)
887 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
888 else
889 ident = makeIdentifier(identifierStart, identifierLength);
81345200
A
890 if (m_parsingBuiltinFunction) {
891 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
892 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
893 return ERRORTOK;
894 }
895 if (isPrivateName)
896 ident = m_vm->propertyNames->getPrivateName(*ident);
897 else if (*ident == m_vm->propertyNames->undefinedKeyword)
898 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
899 if (!ident)
900 return INVALID_PRIVATE_NAME_ERRORTOK;
901 }
6fe7ccc8
A
902 tokenData->ident = ident;
903 } else
904 tokenData->ident = 0;
905
81345200 906 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
6fe7ccc8
A
907 ASSERT(shouldCreateIdentifier);
908 if (remaining < maxTokenLength) {
81345200 909 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
6fe7ccc8
A
910 ASSERT((remaining < maxTokenLength) || !entry);
911 if (!entry)
912 return IDENT;
913 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
914 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
915 }
916 return IDENT;
917 }
918
919 return IDENT;
920}
921
922template <typename T>
923template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
924{
925 const ptrdiff_t remaining = m_codeEnd - m_code;
93a37866 926 const T* identifierStart = currentSourcePtr();
14957cd0
A
927 bool bufferRequired = false;
928
929 while (true) {
930 if (LIKELY(isIdentPart(m_current))) {
931 shift();
932 continue;
933 }
934 if (LIKELY(m_current != '\\'))
935 break;
936
937 // \uXXXX unicode characters.
938 bufferRequired = true;
93a37866
A
939 if (identifierStart != currentSourcePtr())
940 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
14957cd0
A
941 shift();
942 if (UNLIKELY(m_current != 'u'))
93a37866 943 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
14957cd0 944 shift();
93a37866
A
945 UnicodeHexValue character = parseFourDigitUnicodeHex();
946 if (UNLIKELY(!character.isValid()))
947 return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
948 UChar ucharacter = static_cast<UChar>(character.value());
6fe7ccc8 949 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
93a37866 950 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
6fe7ccc8
A
951 if (shouldCreateIdentifier)
952 record16(ucharacter);
93a37866 953 identifierStart = currentSourcePtr();
14957cd0 954 }
6fe7ccc8 955
14957cd0
A
956 int identifierLength;
957 const Identifier* ident = 0;
958 if (shouldCreateIdentifier) {
6fe7ccc8 959 if (!bufferRequired) {
93a37866 960 identifierLength = currentSourcePtr() - identifierStart;
6fe7ccc8
A
961 ident = makeIdentifier(identifierStart, identifierLength);
962 } else {
93a37866
A
963 if (identifierStart != currentSourcePtr())
964 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
6fe7ccc8 965 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
14957cd0
A
966 }
967
14957cd0
A
968 tokenData->ident = ident;
969 } else
970 tokenData->ident = 0;
971
6fe7ccc8 972 if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
14957cd0
A
973 ASSERT(shouldCreateIdentifier);
974 // Keywords must not be recognized if there was an \uXXXX in the identifier.
975 if (remaining < maxTokenLength) {
81345200 976 const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
14957cd0 977 ASSERT((remaining < maxTokenLength) || !entry);
6fe7ccc8
A
978 if (!entry)
979 return IDENT;
980 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
981 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
14957cd0
A
982 }
983 return IDENT;
984 }
985
986 m_buffer16.resize(0);
987 return IDENT;
988}
989
6fe7ccc8 990static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
14957cd0 991{
6fe7ccc8 992 return character < 0xE;
14957cd0
A
993}
994
6fe7ccc8 995static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
14957cd0 996{
6fe7ccc8
A
997 return character < 0xE || character > 0xFF;
998}
999
1000template <typename T>
93a37866 1001template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
6fe7ccc8
A
1002{
1003 int startingOffset = currentOffset();
93a37866 1004 int startingLineStartOffset = currentLineStartOffset();
6fe7ccc8
A
1005 int startingLineNumber = lineNumber();
1006 T stringQuoteCharacter = m_current;
14957cd0
A
1007 shift();
1008
93a37866 1009 const T* stringStart = currentSourcePtr();
14957cd0
A
1010
1011 while (m_current != stringQuoteCharacter) {
1012 if (UNLIKELY(m_current == '\\')) {
93a37866
A
1013 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1014 append8(stringStart, currentSourcePtr() - stringStart);
14957cd0
A
1015 shift();
1016
93a37866 1017 LChar escape = singleEscape(m_current);
14957cd0
A
1018
1019 // Most common escape sequences first
1020 if (escape) {
6fe7ccc8
A
1021 if (shouldBuildStrings)
1022 record8(escape);
14957cd0
A
1023 shift();
1024 } else if (UNLIKELY(isLineTerminator(m_current)))
1025 shiftLineTerminator();
1026 else if (m_current == 'x') {
1027 shift();
6fe7ccc8
A
1028 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1029 m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
93a37866 1030 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
6fe7ccc8
A
1031 }
1032 T prev = m_current;
1033 shift();
1034 if (shouldBuildStrings)
1035 record8(convertHex(prev, m_current));
1036 shift();
1037 } else {
93a37866 1038 setOffset(startingOffset, startingLineStartOffset);
6fe7ccc8
A
1039 setLineNumber(startingLineNumber);
1040 m_buffer8.resize(0);
1041 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1042 }
93a37866 1043 stringStart = currentSourcePtr();
6fe7ccc8
A
1044 continue;
1045 }
1046
1047 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
93a37866 1048 setOffset(startingOffset, startingLineStartOffset);
6fe7ccc8
A
1049 setLineNumber(startingLineNumber);
1050 m_buffer8.resize(0);
1051 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1052 }
1053
1054 shift();
1055 }
1056
93a37866
A
1057 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1058 append8(stringStart, currentSourcePtr() - stringStart);
6fe7ccc8
A
1059 if (shouldBuildStrings) {
1060 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1061 m_buffer8.resize(0);
1062 } else
1063 tokenData->ident = 0;
1064
93a37866 1065 return StringParsedSuccessfully;
6fe7ccc8
A
1066}
1067
1068template <typename T>
93a37866 1069template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
6fe7ccc8
A
1070{
1071 T stringQuoteCharacter = m_current;
1072 shift();
1073
93a37866 1074 const T* stringStart = currentSourcePtr();
6fe7ccc8
A
1075
1076 while (m_current != stringQuoteCharacter) {
1077 if (UNLIKELY(m_current == '\\')) {
93a37866
A
1078 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1079 append16(stringStart, currentSourcePtr() - stringStart);
6fe7ccc8
A
1080 shift();
1081
93a37866 1082 LChar escape = singleEscape(m_current);
6fe7ccc8
A
1083
1084 // Most common escape sequences first
1085 if (escape) {
1086 if (shouldBuildStrings)
1087 record16(escape);
1088 shift();
1089 } else if (UNLIKELY(isLineTerminator(m_current)))
1090 shiftLineTerminator();
1091 else if (m_current == 'x') {
1092 shift();
1093 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1094 m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
93a37866 1095 return StringCannotBeParsed;
6fe7ccc8
A
1096 }
1097 T prev = m_current;
1098 shift();
1099 if (shouldBuildStrings)
1100 record16(convertHex(prev, m_current));
1101 shift();
14957cd0
A
1102 } else if (m_current == 'u') {
1103 shift();
93a37866
A
1104 UnicodeHexValue character = parseFourDigitUnicodeHex();
1105 if (character.isValid()) {
14957cd0 1106 if (shouldBuildStrings)
93a37866 1107 record16(character.value());
14957cd0
A
1108 } else if (m_current == stringQuoteCharacter) {
1109 if (shouldBuildStrings)
1110 record16('u');
6fe7ccc8
A
1111 } else {
1112 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
93a37866 1113 return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
6fe7ccc8 1114 }
14957cd0
A
1115 } else if (strictMode && isASCIIDigit(m_current)) {
1116 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1117 int character1 = m_current;
1118 shift();
6fe7ccc8
A
1119 if (character1 != '0' || isASCIIDigit(m_current)) {
1120 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
93a37866 1121 return StringCannotBeParsed;
6fe7ccc8 1122 }
14957cd0
A
1123 if (shouldBuildStrings)
1124 record16(0);
1125 } else if (!strictMode && isASCIIOctalDigit(m_current)) {
1126 // Octal character sequences
6fe7ccc8 1127 T character1 = m_current;
14957cd0
A
1128 shift();
1129 if (isASCIIOctalDigit(m_current)) {
1130 // Two octal characters
6fe7ccc8 1131 T character2 = m_current;
14957cd0
A
1132 shift();
1133 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1134 if (shouldBuildStrings)
1135 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1136 shift();
1137 } else {
1138 if (shouldBuildStrings)
1139 record16((character1 - '0') * 8 + character2 - '0');
1140 }
1141 } else {
1142 if (shouldBuildStrings)
1143 record16(character1 - '0');
1144 }
6fe7ccc8 1145 } else if (!atEnd()) {
14957cd0
A
1146 if (shouldBuildStrings)
1147 record16(m_current);
1148 shift();
6fe7ccc8
A
1149 } else {
1150 m_lexErrorMessage = "Unterminated string constant";
93a37866 1151 return StringUnterminated;
6fe7ccc8 1152 }
14957cd0 1153
93a37866 1154 stringStart = currentSourcePtr();
14957cd0
A
1155 continue;
1156 }
1157 // Fast check for characters that require special handling.
6fe7ccc8 1158 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
14957cd0
A
1159 // as possible, and lets through all common ASCII characters.
1160 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1161 // New-line or end of input is not allowed
6fe7ccc8
A
1162 if (atEnd() || isLineTerminator(m_current)) {
1163 m_lexErrorMessage = "Unexpected EOF";
93a37866 1164 return atEnd() ? StringUnterminated : StringCannotBeParsed;
6fe7ccc8 1165 }
14957cd0
A
1166 // Anything else is just a normal character
1167 }
1168 shift();
1169 }
1170
93a37866
A
1171 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1172 append16(stringStart, currentSourcePtr() - stringStart);
14957cd0
A
1173 if (shouldBuildStrings)
1174 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1175 else
1176 tokenData->ident = 0;
1177
1178 m_buffer16.resize(0);
93a37866 1179 return StringParsedSuccessfully;
14957cd0
A
1180}
1181
6fe7ccc8
A
1182template <typename T>
1183ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
14957cd0
A
1184{
1185 // Optimization: most hexadecimal values fit into 4 bytes.
1186 uint32_t hexValue = 0;
1187 int maximumDigits = 7;
1188
1189 // Shift out the 'x' prefix.
1190 shift();
1191
1192 do {
1193 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1194 shift();
1195 --maximumDigits;
1196 } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1197
1198 if (maximumDigits >= 0) {
1199 returnValue = hexValue;
1200 return;
1201 }
1202
1203 // No more place in the hexValue buffer.
1204 // The values are shifted out and placed into the m_buffer8 vector.
1205 for (int i = 0; i < 8; ++i) {
1206 int digit = hexValue >> 28;
1207 if (digit < 10)
1208 record8(digit + '0');
1209 else
1210 record8(digit - 10 + 'a');
1211 hexValue <<= 4;
1212 }
1213
1214 while (isASCIIHexDigit(m_current)) {
1215 record8(m_current);
1216 shift();
1217 }
1218
1219 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1220}
1221
6fe7ccc8
A
1222template <typename T>
1223ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
14957cd0
A
1224{
1225 // Optimization: most octal values fit into 4 bytes.
1226 uint32_t octalValue = 0;
1227 int maximumDigits = 9;
1228 // Temporary buffer for the digits. Makes easier
1229 // to reconstruct the input characters when needed.
6fe7ccc8 1230 LChar digits[10];
14957cd0
A
1231
1232 do {
1233 octalValue = octalValue * 8 + (m_current - '0');
1234 digits[maximumDigits] = m_current;
1235 shift();
1236 --maximumDigits;
1237 } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
1238
1239 if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
1240 returnValue = octalValue;
1241 return true;
1242 }
1243
1244 for (int i = 9; i > maximumDigits; --i)
1245 record8(digits[i]);
1246
1247 while (isASCIIOctalDigit(m_current)) {
1248 record8(m_current);
1249 shift();
1250 }
1251
1252 if (isASCIIDigit(m_current))
1253 return false;
1254
1255 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1256 return true;
1257}
1258
6fe7ccc8
A
1259template <typename T>
1260ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
14957cd0
A
1261{
1262 // Optimization: most decimal values fit into 4 bytes.
1263 uint32_t decimalValue = 0;
1264
1265 // Since parseOctal may be executed before parseDecimal,
1266 // the m_buffer8 may hold ascii digits.
1267 if (!m_buffer8.size()) {
1268 int maximumDigits = 9;
1269 // Temporary buffer for the digits. Makes easier
1270 // to reconstruct the input characters when needed.
6fe7ccc8 1271 LChar digits[10];
14957cd0
A
1272
1273 do {
1274 decimalValue = decimalValue * 10 + (m_current - '0');
1275 digits[maximumDigits] = m_current;
1276 shift();
1277 --maximumDigits;
1278 } while (isASCIIDigit(m_current) && maximumDigits >= 0);
1279
1280 if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1281 returnValue = decimalValue;
1282 return true;
1283 }
1284
1285 for (int i = 9; i > maximumDigits; --i)
1286 record8(digits[i]);
1287 }
1288
1289 while (isASCIIDigit(m_current)) {
1290 record8(m_current);
1291 shift();
1292 }
1293
1294 return false;
1295}
1296
6fe7ccc8
A
1297template <typename T>
1298ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
14957cd0
A
1299{
1300 record8('.');
1301 while (isASCIIDigit(m_current)) {
1302 record8(m_current);
1303 shift();
1304 }
1305}
1306
6fe7ccc8
A
1307template <typename T>
1308ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
14957cd0
A
1309{
1310 record8('e');
1311 shift();
1312 if (m_current == '+' || m_current == '-') {
1313 record8(m_current);
1314 shift();
1315 }
1316
1317 if (!isASCIIDigit(m_current))
1318 return false;
1319
1320 do {
1321 record8(m_current);
1322 shift();
1323 } while (isASCIIDigit(m_current));
1324 return true;
1325}
1326
6fe7ccc8
A
1327template <typename T>
1328ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
14957cd0
A
1329{
1330 while (true) {
1331 while (UNLIKELY(m_current == '*')) {
1332 shift();
1333 if (m_current == '/') {
1334 shift();
1335 return true;
1336 }
1337 }
1338
6fe7ccc8 1339 if (atEnd())
14957cd0
A
1340 return false;
1341
6fe7ccc8 1342 if (isLineTerminator(m_current)) {
14957cd0 1343 shiftLineTerminator();
6fe7ccc8
A
1344 m_terminator = true;
1345 } else
14957cd0
A
1346 shift();
1347 }
1348}
1349
6fe7ccc8
A
1350template <typename T>
1351bool Lexer<T>::nextTokenIsColon()
14957cd0 1352{
6fe7ccc8 1353 const T* code = m_code;
14957cd0
A
1354 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1355 code++;
6fe7ccc8 1356
14957cd0
A
1357 return code < m_codeEnd && *code == ':';
1358}
1359
6fe7ccc8 1360template <typename T>
81345200 1361JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
9dae56ea 1362{
81345200
A
1363 JSTokenData* tokenData = &tokenRecord->m_data;
1364 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
ba379fdc
A
1365 ASSERT(!m_error);
1366 ASSERT(m_buffer8.isEmpty());
1367 ASSERT(m_buffer16.isEmpty());
1368
14957cd0 1369 JSTokenType token = ERRORTOK;
9dae56ea 1370 m_terminator = false;
ba379fdc
A
1371
1372start:
1373 while (isWhiteSpace(m_current))
14957cd0 1374 shift();
ba379fdc 1375
6fe7ccc8 1376 if (atEnd())
14957cd0 1377 return EOFTOK;
6fe7ccc8 1378
93a37866
A
1379 tokenLocation->startOffset = currentOffset();
1380 ASSERT(currentOffset() >= currentLineStartOffset());
81345200 1381 tokenRecord->m_startPosition = currentPosition();
14957cd0
A
1382
1383 CharacterType type;
6fe7ccc8
A
1384 if (LIKELY(isLatin1(m_current)))
1385 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1386 else if (isNonLatin1IdentStart(m_current))
14957cd0
A
1387 type = CharacterIdentifierStart;
1388 else if (isLineTerminator(m_current))
1389 type = CharacterLineTerminator;
1390 else
1391 type = CharacterInvalid;
1392
1393 switch (type) {
1394 case CharacterGreater:
1395 shift();
1396 if (m_current == '>') {
1397 shift();
1398 if (m_current == '>') {
1399 shift();
1400 if (m_current == '=') {
1401 shift();
ba379fdc
A
1402 token = URSHIFTEQUAL;
1403 break;
9dae56ea 1404 }
ba379fdc 1405 token = URSHIFT;
9dae56ea 1406 break;
ba379fdc 1407 }
14957cd0
A
1408 if (m_current == '=') {
1409 shift();
1410 token = RSHIFTEQUAL;
ba379fdc
A
1411 break;
1412 }
14957cd0 1413 token = RSHIFT;
ba379fdc 1414 break;
14957cd0
A
1415 }
1416 if (m_current == '=') {
1417 shift();
1418 token = GE;
ba379fdc 1419 break;
14957cd0
A
1420 }
1421 token = GT;
1422 break;
1423 case CharacterEqual:
1424 shift();
1425 if (m_current == '=') {
1426 shift();
1427 if (m_current == '=') {
1428 shift();
1429 token = STREQ;
9dae56ea 1430 break;
ba379fdc 1431 }
14957cd0 1432 token = EQEQ;
ba379fdc 1433 break;
14957cd0
A
1434 }
1435 token = EQUAL;
1436 break;
1437 case CharacterLess:
1438 shift();
1439 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1440 // <!-- marks the beginning of a line comment (for www usage)
1441 goto inSingleLineComment;
1442 }
1443 if (m_current == '<') {
1444 shift();
1445 if (m_current == '=') {
1446 shift();
1447 token = LSHIFTEQUAL;
9dae56ea 1448 break;
ba379fdc 1449 }
14957cd0 1450 token = LSHIFT;
ba379fdc 1451 break;
14957cd0
A
1452 }
1453 if (m_current == '=') {
1454 shift();
1455 token = LE;
ba379fdc 1456 break;
14957cd0
A
1457 }
1458 token = LT;
1459 break;
1460 case CharacterExclamationMark:
1461 shift();
1462 if (m_current == '=') {
1463 shift();
1464 if (m_current == '=') {
1465 shift();
1466 token = STRNEQ;
9dae56ea 1467 break;
ba379fdc 1468 }
14957cd0 1469 token = NE;
ba379fdc 1470 break;
14957cd0
A
1471 }
1472 token = EXCLAMATION;
1473 break;
1474 case CharacterAdd:
1475 shift();
1476 if (m_current == '+') {
1477 shift();
1478 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1479 break;
1480 }
1481 if (m_current == '=') {
1482 shift();
1483 token = PLUSEQUAL;
ba379fdc 1484 break;
14957cd0
A
1485 }
1486 token = PLUS;
1487 break;
1488 case CharacterSub:
1489 shift();
1490 if (m_current == '-') {
1491 shift();
1492 if (m_atLineStart && m_current == '>') {
1493 shift();
ba379fdc
A
1494 goto inSingleLineComment;
1495 }
14957cd0 1496 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
ba379fdc 1497 break;
14957cd0
A
1498 }
1499 if (m_current == '=') {
1500 shift();
1501 token = MINUSEQUAL;
ba379fdc 1502 break;
14957cd0
A
1503 }
1504 token = MINUS;
1505 break;
1506 case CharacterMultiply:
1507 shift();
1508 if (m_current == '=') {
1509 shift();
1510 token = MULTEQUAL;
ba379fdc 1511 break;
14957cd0
A
1512 }
1513 token = TIMES;
1514 break;
1515 case CharacterSlash:
1516 shift();
1517 if (m_current == '/') {
1518 shift();
1519 goto inSingleLineComment;
1520 }
1521 if (m_current == '*') {
1522 shift();
1523 if (parseMultilineComment())
1524 goto start;
6fe7ccc8 1525 m_lexErrorMessage = "Multiline comment was not closed properly";
93a37866 1526 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
14957cd0
A
1527 goto returnError;
1528 }
1529 if (m_current == '=') {
1530 shift();
1531 token = DIVEQUAL;
ba379fdc 1532 break;
14957cd0
A
1533 }
1534 token = DIVIDE;
1535 break;
1536 case CharacterAnd:
1537 shift();
1538 if (m_current == '&') {
1539 shift();
1540 token = AND;
ba379fdc 1541 break;
14957cd0
A
1542 }
1543 if (m_current == '=') {
1544 shift();
1545 token = ANDEQUAL;
ba379fdc 1546 break;
14957cd0
A
1547 }
1548 token = BITAND;
1549 break;
1550 case CharacterXor:
1551 shift();
1552 if (m_current == '=') {
1553 shift();
1554 token = XOREQUAL;
ba379fdc 1555 break;
14957cd0
A
1556 }
1557 token = BITXOR;
1558 break;
1559 case CharacterModulo:
1560 shift();
1561 if (m_current == '=') {
1562 shift();
1563 token = MODEQUAL;
ba379fdc 1564 break;
14957cd0
A
1565 }
1566 token = MOD;
1567 break;
1568 case CharacterOr:
1569 shift();
1570 if (m_current == '=') {
1571 shift();
1572 token = OREQUAL;
ba379fdc 1573 break;
14957cd0
A
1574 }
1575 if (m_current == '|') {
1576 shift();
1577 token = OR;
ba379fdc 1578 break;
14957cd0
A
1579 }
1580 token = BITOR;
1581 break;
1582 case CharacterOpenParen:
1583 token = OPENPAREN;
1584 shift();
1585 break;
1586 case CharacterCloseParen:
1587 token = CLOSEPAREN;
1588 shift();
1589 break;
1590 case CharacterOpenBracket:
1591 token = OPENBRACKET;
1592 shift();
1593 break;
1594 case CharacterCloseBracket:
1595 token = CLOSEBRACKET;
1596 shift();
1597 break;
1598 case CharacterComma:
1599 token = COMMA;
1600 shift();
1601 break;
1602 case CharacterColon:
1603 token = COLON;
1604 shift();
1605 break;
1606 case CharacterQuestion:
1607 token = QUESTION;
1608 shift();
1609 break;
1610 case CharacterTilde:
1611 token = TILDE;
1612 shift();
1613 break;
1614 case CharacterSemicolon:
14957cd0
A
1615 shift();
1616 token = SEMICOLON;
1617 break;
1618 case CharacterOpenBrace:
93a37866
A
1619 tokenData->line = lineNumber();
1620 tokenData->offset = currentOffset();
1621 tokenData->lineStartOffset = currentLineStartOffset();
1622 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
14957cd0
A
1623 shift();
1624 token = OPENBRACE;
1625 break;
1626 case CharacterCloseBrace:
93a37866
A
1627 tokenData->line = lineNumber();
1628 tokenData->offset = currentOffset();
1629 tokenData->lineStartOffset = currentLineStartOffset();
1630 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
14957cd0
A
1631 shift();
1632 token = CLOSEBRACE;
1633 break;
1634 case CharacterDot:
1635 shift();
1636 if (!isASCIIDigit(m_current)) {
81345200
A
1637 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
1638 shift();
1639 shift();
1640 token = DOTDOTDOT;
1641 break;
1642 }
14957cd0
A
1643 token = DOT;
1644 break;
1645 }
1646 goto inNumberAfterDecimalPoint;
1647 case CharacterZero:
1648 shift();
81345200
A
1649 if ((m_current | 0x20) == 'x') {
1650 if (!isASCIIHexDigit(peek(1))) {
1651 m_lexErrorMessage = "No hexadecimal digits after '0x'";
1652 token = INVALID_HEX_NUMBER_ERRORTOK;
1653 goto returnError;
1654 }
14957cd0 1655 parseHex(tokenData->doubleValue);
81345200
A
1656 if (isIdentStart(m_current)) {
1657 m_lexErrorMessage = "No space between hexadecimal literal and identifier";
1658 token = INVALID_HEX_NUMBER_ERRORTOK;
1659 goto returnError;
1660 }
14957cd0 1661 token = NUMBER;
81345200
A
1662 m_buffer8.resize(0);
1663 break;
1664 }
1665
1666 record8('0');
1667 if (isASCIIOctalDigit(m_current)) {
1668 if (parseOctal(tokenData->doubleValue)) {
1669 if (strictMode) {
1670 m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1671 token = INVALID_OCTAL_NUMBER_ERRORTOK;
1672 goto returnError;
9dae56ea 1673 }
81345200 1674 token = NUMBER;
ba379fdc 1675 }
ba379fdc 1676 }
81345200 1677 FALLTHROUGH;
14957cd0
A
1678 case CharacterNumber:
1679 if (LIKELY(token != NUMBER)) {
1680 if (!parseDecimal(tokenData->doubleValue)) {
1681 if (m_current == '.') {
1682 shift();
1683inNumberAfterDecimalPoint:
1684 parseNumberAfterDecimalPoint();
1685 }
6fe7ccc8
A
1686 if ((m_current | 0x20) == 'e') {
1687 if (!parseNumberAfterExponentIndicator()) {
1688 m_lexErrorMessage = "Non-number found after exponent indicator";
93a37866 1689 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
14957cd0 1690 goto returnError;
6fe7ccc8
A
1691 }
1692 }
1693 size_t parsedLength;
1694 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
14957cd0
A
1695 }
1696 token = NUMBER;
1697 }
ba379fdc 1698
14957cd0 1699 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
6fe7ccc8
A
1700 if (UNLIKELY(isIdentStart(m_current))) {
1701 m_lexErrorMessage = "At least one digit must occur after a decimal point";
93a37866 1702 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
ba379fdc 1703 goto returnError;
6fe7ccc8 1704 }
14957cd0 1705 m_buffer8.resize(0);
14957cd0
A
1706 break;
1707 case CharacterQuote:
6fe7ccc8 1708 if (lexerFlags & LexerFlagsDontBuildStrings) {
93a37866
A
1709 StringParseResult result = parseString<false>(tokenData, strictMode);
1710 if (UNLIKELY(result != StringParsedSuccessfully)) {
1711 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
14957cd0 1712 goto returnError;
93a37866 1713 }
14957cd0 1714 } else {
93a37866
A
1715 StringParseResult result = parseString<true>(tokenData, strictMode);
1716 if (UNLIKELY(result != StringParsedSuccessfully)) {
1717 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
14957cd0 1718 goto returnError;
93a37866 1719 }
ba379fdc 1720 }
14957cd0 1721 shift();
14957cd0
A
1722 token = STRING;
1723 break;
1724 case CharacterIdentifierStart:
1725 ASSERT(isIdentStart(m_current));
81345200 1726 FALLTHROUGH;
14957cd0 1727 case CharacterBackSlash:
81345200 1728 parseIdent:
6fe7ccc8
A
1729 if (lexerFlags & LexexFlagsDontBuildKeywords)
1730 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
14957cd0 1731 else
6fe7ccc8 1732 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
14957cd0
A
1733 break;
1734 case CharacterLineTerminator:
1735 ASSERT(isLineTerminator(m_current));
ba379fdc 1736 shiftLineTerminator();
14957cd0
A
1737 m_atLineStart = true;
1738 m_terminator = true;
93a37866 1739 m_lineStart = m_code;
14957cd0 1740 goto start;
81345200
A
1741 case CharacterPrivateIdentifierStart:
1742 if (m_parsingBuiltinFunction)
1743 goto parseIdent;
1744
1745 FALLTHROUGH;
14957cd0 1746 case CharacterInvalid:
6fe7ccc8 1747 m_lexErrorMessage = invalidCharacterMessage();
93a37866 1748 token = ERRORTOK;
ba379fdc 1749 goto returnError;
14957cd0 1750 default:
93a37866 1751 RELEASE_ASSERT_NOT_REACHED();
6fe7ccc8 1752 m_lexErrorMessage = "Internal Error";
93a37866 1753 token = ERRORTOK;
ba379fdc 1754 goto returnError;
ba379fdc 1755 }
9dae56ea 1756
14957cd0
A
1757 m_atLineStart = false;
1758 goto returnToken;
9dae56ea 1759
ba379fdc
A
1760inSingleLineComment:
1761 while (!isLineTerminator(m_current)) {
6fe7ccc8 1762 if (atEnd())
14957cd0
A
1763 return EOFTOK;
1764 shift();
9dae56ea 1765 }
ba379fdc
A
1766 shiftLineTerminator();
1767 m_atLineStart = true;
1768 m_terminator = true;
93a37866 1769 m_lineStart = m_code;
14957cd0
A
1770 if (!lastTokenWasRestrKeyword())
1771 goto start;
ba379fdc 1772
14957cd0 1773 token = SEMICOLON;
ba379fdc
A
1774 // Fall through into returnToken.
1775
14957cd0 1776returnToken:
93a37866
A
1777 tokenLocation->line = m_lineNumber;
1778 tokenLocation->endOffset = currentOffset();
1779 tokenLocation->lineStartOffset = currentLineStartOffset();
1780 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
81345200 1781 tokenRecord->m_endPosition = currentPosition();
ba379fdc
A
1782 m_lastToken = token;
1783 return token;
9dae56ea 1784
ba379fdc
A
1785returnError:
1786 m_error = true;
93a37866
A
1787 tokenLocation->line = m_lineNumber;
1788 tokenLocation->endOffset = currentOffset();
1789 tokenLocation->lineStartOffset = currentLineStartOffset();
1790 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
81345200 1791 tokenRecord->m_endPosition = currentPosition();
93a37866
A
1792 RELEASE_ASSERT(token & ErrorTokenFlag);
1793 return token;
1794}
1795
1796template <typename T>
1797static inline void orCharacter(UChar&, UChar);
1798
1799template <>
1800inline void orCharacter<LChar>(UChar&, UChar) { }
1801
1802template <>
1803inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
1804{
1805 orAccumulator |= character;
9dae56ea
A
1806}
1807
6fe7ccc8
A
1808template <typename T>
1809bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
9dae56ea 1810{
ba379fdc
A
1811 ASSERT(m_buffer16.isEmpty());
1812
9dae56ea
A
1813 bool lastWasEscape = false;
1814 bool inBrackets = false;
93a37866 1815 UChar charactersOredTogether = 0;
9dae56ea 1816
f9bf01c6
A
1817 if (patternPrefix) {
1818 ASSERT(!isLineTerminator(patternPrefix));
1819 ASSERT(patternPrefix != '/');
1820 ASSERT(patternPrefix != '[');
1821 record16(patternPrefix);
1822 }
1823
ba379fdc 1824 while (true) {
6fe7ccc8 1825 if (isLineTerminator(m_current) || atEnd()) {
ba379fdc 1826 m_buffer16.resize(0);
f9bf01c6 1827 return false;
9dae56ea 1828 }
f9bf01c6 1829
6fe7ccc8
A
1830 T prev = m_current;
1831
14957cd0 1832 shift();
f9bf01c6 1833
6fe7ccc8 1834 if (prev == '/' && !lastWasEscape && !inBrackets)
f9bf01c6
A
1835 break;
1836
6fe7ccc8 1837 record16(prev);
93a37866 1838 orCharacter<T>(charactersOredTogether, prev);
f9bf01c6
A
1839
1840 if (lastWasEscape) {
1841 lastWasEscape = false;
1842 continue;
1843 }
1844
6fe7ccc8 1845 switch (prev) {
f9bf01c6
A
1846 case '[':
1847 inBrackets = true;
1848 break;
1849 case ']':
1850 inBrackets = false;
1851 break;
1852 case '\\':
1853 lastWasEscape = true;
1854 break;
1855 }
9dae56ea
A
1856 }
1857
93a37866
A
1858 pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
1859
f9bf01c6 1860 m_buffer16.resize(0);
93a37866 1861 charactersOredTogether = 0;
f9bf01c6 1862
9dae56ea
A
1863 while (isIdentPart(m_current)) {
1864 record16(m_current);
93a37866 1865 orCharacter<T>(charactersOredTogether, m_current);
14957cd0 1866 shift();
9dae56ea 1867 }
f9bf01c6 1868
93a37866 1869 flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
ba379fdc 1870 m_buffer16.resize(0);
9dae56ea
A
1871
1872 return true;
1873}
1874
6fe7ccc8
A
1875template <typename T>
1876bool Lexer<T>::skipRegExp()
f9bf01c6
A
1877{
1878 bool lastWasEscape = false;
1879 bool inBrackets = false;
1880
1881 while (true) {
6fe7ccc8 1882 if (isLineTerminator(m_current) || atEnd())
f9bf01c6
A
1883 return false;
1884
6fe7ccc8
A
1885 T prev = m_current;
1886
14957cd0 1887 shift();
f9bf01c6 1888
6fe7ccc8 1889 if (prev == '/' && !lastWasEscape && !inBrackets)
f9bf01c6
A
1890 break;
1891
1892 if (lastWasEscape) {
1893 lastWasEscape = false;
1894 continue;
1895 }
1896
6fe7ccc8 1897 switch (prev) {
f9bf01c6
A
1898 case '[':
1899 inBrackets = true;
1900 break;
1901 case ']':
1902 inBrackets = false;
1903 break;
1904 case '\\':
1905 lastWasEscape = true;
1906 break;
1907 }
1908 }
1909
1910 while (isIdentPart(m_current))
14957cd0 1911 shift();
f9bf01c6
A
1912
1913 return true;
1914}
1915
6fe7ccc8
A
1916template <typename T>
1917void Lexer<T>::clear()
9dae56ea 1918{
f9bf01c6 1919 m_arena = 0;
9dae56ea 1920
6fe7ccc8 1921 Vector<LChar> newBuffer8;
9dae56ea
A
1922 m_buffer8.swap(newBuffer8);
1923
1924 Vector<UChar> newBuffer16;
9dae56ea
A
1925 m_buffer16.swap(newBuffer16);
1926
1927 m_isReparsing = false;
ba379fdc
A
1928}
1929
6fe7ccc8
A
1930// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1931template class Lexer<LChar>;
1932template class Lexer<UChar>;
1933
9dae56ea 1934} // namespace JSC