]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.cpp
JavaScriptCore-1097.13.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "Lexer.h"
27
28 #include "JSFunction.h"
29
30 #include "JSGlobalObjectFunctions.h"
31 #include "Identifier.h"
32 #include "NodeInfo.h"
33 #include "Nodes.h"
34 #include <wtf/dtoa.h>
35 #include <ctype.h>
36 #include <limits.h>
37 #include <string.h>
38 #include <wtf/Assertions.h>
39
40 using namespace WTF;
41 using namespace Unicode;
42
43 #include "KeywordLookup.h"
44 #include "Lexer.lut.h"
45 #include "Parser.h"
46
47 namespace JSC {
48
49 Keywords::Keywords(JSGlobalData* globalData)
50 : m_globalData(globalData)
51 , m_keywordTable(JSC::mainTable)
52 {
53 }
54
55 enum CharacterType {
56 // Types for the main switch
57
58 // The first three types are fixed, and also used for identifying
59 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
60 CharacterIdentifierStart,
61 CharacterZero,
62 CharacterNumber,
63
64 CharacterInvalid,
65 CharacterLineTerminator,
66 CharacterExclamationMark,
67 CharacterOpenParen,
68 CharacterCloseParen,
69 CharacterOpenBracket,
70 CharacterCloseBracket,
71 CharacterComma,
72 CharacterColon,
73 CharacterQuestion,
74 CharacterTilde,
75 CharacterQuote,
76 CharacterDot,
77 CharacterSlash,
78 CharacterBackSlash,
79 CharacterSemicolon,
80 CharacterOpenBrace,
81 CharacterCloseBrace,
82
83 CharacterAdd,
84 CharacterSub,
85 CharacterMultiply,
86 CharacterModulo,
87 CharacterAnd,
88 CharacterXor,
89 CharacterOr,
90 CharacterLess,
91 CharacterGreater,
92 CharacterEqual,
93
94 // Other types (only one so far)
95 CharacterWhiteSpace,
96 };
97
98 // 256 Latin-1 codes
99 static const unsigned short typesOfLatin1Characters[256] = {
100 /* 0 - Null */ CharacterInvalid,
101 /* 1 - Start of Heading */ CharacterInvalid,
102 /* 2 - Start of Text */ CharacterInvalid,
103 /* 3 - End of Text */ CharacterInvalid,
104 /* 4 - End of Transm. */ CharacterInvalid,
105 /* 5 - Enquiry */ CharacterInvalid,
106 /* 6 - Acknowledgment */ CharacterInvalid,
107 /* 7 - Bell */ CharacterInvalid,
108 /* 8 - Back Space */ CharacterInvalid,
109 /* 9 - Horizontal Tab */ CharacterWhiteSpace,
110 /* 10 - Line Feed */ CharacterLineTerminator,
111 /* 11 - Vertical Tab */ CharacterWhiteSpace,
112 /* 12 - Form Feed */ CharacterWhiteSpace,
113 /* 13 - Carriage Return */ CharacterLineTerminator,
114 /* 14 - Shift Out */ CharacterInvalid,
115 /* 15 - Shift In */ CharacterInvalid,
116 /* 16 - Data Line Escape */ CharacterInvalid,
117 /* 17 - Device Control 1 */ CharacterInvalid,
118 /* 18 - Device Control 2 */ CharacterInvalid,
119 /* 19 - Device Control 3 */ CharacterInvalid,
120 /* 20 - Device Control 4 */ CharacterInvalid,
121 /* 21 - Negative Ack. */ CharacterInvalid,
122 /* 22 - Synchronous Idle */ CharacterInvalid,
123 /* 23 - End of Transmit */ CharacterInvalid,
124 /* 24 - Cancel */ CharacterInvalid,
125 /* 25 - End of Medium */ CharacterInvalid,
126 /* 26 - Substitute */ CharacterInvalid,
127 /* 27 - Escape */ CharacterInvalid,
128 /* 28 - File Separator */ CharacterInvalid,
129 /* 29 - Group Separator */ CharacterInvalid,
130 /* 30 - Record Separator */ CharacterInvalid,
131 /* 31 - Unit Separator */ CharacterInvalid,
132 /* 32 - Space */ CharacterWhiteSpace,
133 /* 33 - ! */ CharacterExclamationMark,
134 /* 34 - " */ CharacterQuote,
135 /* 35 - # */ CharacterInvalid,
136 /* 36 - $ */ CharacterIdentifierStart,
137 /* 37 - % */ CharacterModulo,
138 /* 38 - & */ CharacterAnd,
139 /* 39 - ' */ CharacterQuote,
140 /* 40 - ( */ CharacterOpenParen,
141 /* 41 - ) */ CharacterCloseParen,
142 /* 42 - * */ CharacterMultiply,
143 /* 43 - + */ CharacterAdd,
144 /* 44 - , */ CharacterComma,
145 /* 45 - - */ CharacterSub,
146 /* 46 - . */ CharacterDot,
147 /* 47 - / */ CharacterSlash,
148 /* 48 - 0 */ CharacterZero,
149 /* 49 - 1 */ CharacterNumber,
150 /* 50 - 2 */ CharacterNumber,
151 /* 51 - 3 */ CharacterNumber,
152 /* 52 - 4 */ CharacterNumber,
153 /* 53 - 5 */ CharacterNumber,
154 /* 54 - 6 */ CharacterNumber,
155 /* 55 - 7 */ CharacterNumber,
156 /* 56 - 8 */ CharacterNumber,
157 /* 57 - 9 */ CharacterNumber,
158 /* 58 - : */ CharacterColon,
159 /* 59 - ; */ CharacterSemicolon,
160 /* 60 - < */ CharacterLess,
161 /* 61 - = */ CharacterEqual,
162 /* 62 - > */ CharacterGreater,
163 /* 63 - ? */ CharacterQuestion,
164 /* 64 - @ */ CharacterInvalid,
165 /* 65 - A */ CharacterIdentifierStart,
166 /* 66 - B */ CharacterIdentifierStart,
167 /* 67 - C */ CharacterIdentifierStart,
168 /* 68 - D */ CharacterIdentifierStart,
169 /* 69 - E */ CharacterIdentifierStart,
170 /* 70 - F */ CharacterIdentifierStart,
171 /* 71 - G */ CharacterIdentifierStart,
172 /* 72 - H */ CharacterIdentifierStart,
173 /* 73 - I */ CharacterIdentifierStart,
174 /* 74 - J */ CharacterIdentifierStart,
175 /* 75 - K */ CharacterIdentifierStart,
176 /* 76 - L */ CharacterIdentifierStart,
177 /* 77 - M */ CharacterIdentifierStart,
178 /* 78 - N */ CharacterIdentifierStart,
179 /* 79 - O */ CharacterIdentifierStart,
180 /* 80 - P */ CharacterIdentifierStart,
181 /* 81 - Q */ CharacterIdentifierStart,
182 /* 82 - R */ CharacterIdentifierStart,
183 /* 83 - S */ CharacterIdentifierStart,
184 /* 84 - T */ CharacterIdentifierStart,
185 /* 85 - U */ CharacterIdentifierStart,
186 /* 86 - V */ CharacterIdentifierStart,
187 /* 87 - W */ CharacterIdentifierStart,
188 /* 88 - X */ CharacterIdentifierStart,
189 /* 89 - Y */ CharacterIdentifierStart,
190 /* 90 - Z */ CharacterIdentifierStart,
191 /* 91 - [ */ CharacterOpenBracket,
192 /* 92 - \ */ CharacterBackSlash,
193 /* 93 - ] */ CharacterCloseBracket,
194 /* 94 - ^ */ CharacterXor,
195 /* 95 - _ */ CharacterIdentifierStart,
196 /* 96 - ` */ CharacterInvalid,
197 /* 97 - a */ CharacterIdentifierStart,
198 /* 98 - b */ CharacterIdentifierStart,
199 /* 99 - c */ CharacterIdentifierStart,
200 /* 100 - d */ CharacterIdentifierStart,
201 /* 101 - e */ CharacterIdentifierStart,
202 /* 102 - f */ CharacterIdentifierStart,
203 /* 103 - g */ CharacterIdentifierStart,
204 /* 104 - h */ CharacterIdentifierStart,
205 /* 105 - i */ CharacterIdentifierStart,
206 /* 106 - j */ CharacterIdentifierStart,
207 /* 107 - k */ CharacterIdentifierStart,
208 /* 108 - l */ CharacterIdentifierStart,
209 /* 109 - m */ CharacterIdentifierStart,
210 /* 110 - n */ CharacterIdentifierStart,
211 /* 111 - o */ CharacterIdentifierStart,
212 /* 112 - p */ CharacterIdentifierStart,
213 /* 113 - q */ CharacterIdentifierStart,
214 /* 114 - r */ CharacterIdentifierStart,
215 /* 115 - s */ CharacterIdentifierStart,
216 /* 116 - t */ CharacterIdentifierStart,
217 /* 117 - u */ CharacterIdentifierStart,
218 /* 118 - v */ CharacterIdentifierStart,
219 /* 119 - w */ CharacterIdentifierStart,
220 /* 120 - x */ CharacterIdentifierStart,
221 /* 121 - y */ CharacterIdentifierStart,
222 /* 122 - z */ CharacterIdentifierStart,
223 /* 123 - { */ CharacterOpenBrace,
224 /* 124 - | */ CharacterOr,
225 /* 125 - } */ CharacterCloseBrace,
226 /* 126 - ~ */ CharacterTilde,
227 /* 127 - Delete */ CharacterInvalid,
228 /* 128 - Cc category */ CharacterInvalid,
229 /* 129 - Cc category */ CharacterInvalid,
230 /* 130 - Cc category */ CharacterInvalid,
231 /* 131 - Cc category */ CharacterInvalid,
232 /* 132 - Cc category */ CharacterInvalid,
233 /* 133 - Cc category */ CharacterInvalid,
234 /* 134 - Cc category */ CharacterInvalid,
235 /* 135 - Cc category */ CharacterInvalid,
236 /* 136 - Cc category */ CharacterInvalid,
237 /* 137 - Cc category */ CharacterInvalid,
238 /* 138 - Cc category */ CharacterInvalid,
239 /* 139 - Cc category */ CharacterInvalid,
240 /* 140 - Cc category */ CharacterInvalid,
241 /* 141 - Cc category */ CharacterInvalid,
242 /* 142 - Cc category */ CharacterInvalid,
243 /* 143 - Cc category */ CharacterInvalid,
244 /* 144 - Cc category */ CharacterInvalid,
245 /* 145 - Cc category */ CharacterInvalid,
246 /* 146 - Cc category */ CharacterInvalid,
247 /* 147 - Cc category */ CharacterInvalid,
248 /* 148 - Cc category */ CharacterInvalid,
249 /* 149 - Cc category */ CharacterInvalid,
250 /* 150 - Cc category */ CharacterInvalid,
251 /* 151 - Cc category */ CharacterInvalid,
252 /* 152 - Cc category */ CharacterInvalid,
253 /* 153 - Cc category */ CharacterInvalid,
254 /* 154 - Cc category */ CharacterInvalid,
255 /* 155 - Cc category */ CharacterInvalid,
256 /* 156 - Cc category */ CharacterInvalid,
257 /* 157 - Cc category */ CharacterInvalid,
258 /* 158 - Cc category */ CharacterInvalid,
259 /* 159 - Cc category */ CharacterInvalid,
260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
261 /* 161 - Po category */ CharacterInvalid,
262 /* 162 - Sc category */ CharacterInvalid,
263 /* 163 - Sc category */ CharacterInvalid,
264 /* 164 - Sc category */ CharacterInvalid,
265 /* 165 - Sc category */ CharacterInvalid,
266 /* 166 - So category */ CharacterInvalid,
267 /* 167 - So category */ CharacterInvalid,
268 /* 168 - Sk category */ CharacterInvalid,
269 /* 169 - So category */ CharacterInvalid,
270 /* 170 - Ll category */ CharacterIdentifierStart,
271 /* 171 - Pi category */ CharacterInvalid,
272 /* 172 - Sm category */ CharacterInvalid,
273 /* 173 - Cf category */ CharacterInvalid,
274 /* 174 - So category */ CharacterInvalid,
275 /* 175 - Sk category */ CharacterInvalid,
276 /* 176 - So category */ CharacterInvalid,
277 /* 177 - Sm category */ CharacterInvalid,
278 /* 178 - No category */ CharacterInvalid,
279 /* 179 - No category */ CharacterInvalid,
280 /* 180 - Sk category */ CharacterInvalid,
281 /* 181 - Ll category */ CharacterIdentifierStart,
282 /* 182 - So category */ CharacterInvalid,
283 /* 183 - Po category */ CharacterInvalid,
284 /* 184 - Sk category */ CharacterInvalid,
285 /* 185 - No category */ CharacterInvalid,
286 /* 186 - Ll category */ CharacterIdentifierStart,
287 /* 187 - Pf category */ CharacterInvalid,
288 /* 188 - No category */ CharacterInvalid,
289 /* 189 - No category */ CharacterInvalid,
290 /* 190 - No category */ CharacterInvalid,
291 /* 191 - Po category */ CharacterInvalid,
292 /* 192 - Lu category */ CharacterIdentifierStart,
293 /* 193 - Lu category */ CharacterIdentifierStart,
294 /* 194 - Lu category */ CharacterIdentifierStart,
295 /* 195 - Lu category */ CharacterIdentifierStart,
296 /* 196 - Lu category */ CharacterIdentifierStart,
297 /* 197 - Lu category */ CharacterIdentifierStart,
298 /* 198 - Lu category */ CharacterIdentifierStart,
299 /* 199 - Lu category */ CharacterIdentifierStart,
300 /* 200 - Lu category */ CharacterIdentifierStart,
301 /* 201 - Lu category */ CharacterIdentifierStart,
302 /* 202 - Lu category */ CharacterIdentifierStart,
303 /* 203 - Lu category */ CharacterIdentifierStart,
304 /* 204 - Lu category */ CharacterIdentifierStart,
305 /* 205 - Lu category */ CharacterIdentifierStart,
306 /* 206 - Lu category */ CharacterIdentifierStart,
307 /* 207 - Lu category */ CharacterIdentifierStart,
308 /* 208 - Lu category */ CharacterIdentifierStart,
309 /* 209 - Lu category */ CharacterIdentifierStart,
310 /* 210 - Lu category */ CharacterIdentifierStart,
311 /* 211 - Lu category */ CharacterIdentifierStart,
312 /* 212 - Lu category */ CharacterIdentifierStart,
313 /* 213 - Lu category */ CharacterIdentifierStart,
314 /* 214 - Lu category */ CharacterIdentifierStart,
315 /* 215 - Sm category */ CharacterInvalid,
316 /* 216 - Lu category */ CharacterIdentifierStart,
317 /* 217 - Lu category */ CharacterIdentifierStart,
318 /* 218 - Lu category */ CharacterIdentifierStart,
319 /* 219 - Lu category */ CharacterIdentifierStart,
320 /* 220 - Lu category */ CharacterIdentifierStart,
321 /* 221 - Lu category */ CharacterIdentifierStart,
322 /* 222 - Lu category */ CharacterIdentifierStart,
323 /* 223 - Ll category */ CharacterIdentifierStart,
324 /* 224 - Ll category */ CharacterIdentifierStart,
325 /* 225 - Ll category */ CharacterIdentifierStart,
326 /* 226 - Ll category */ CharacterIdentifierStart,
327 /* 227 - Ll category */ CharacterIdentifierStart,
328 /* 228 - Ll category */ CharacterIdentifierStart,
329 /* 229 - Ll category */ CharacterIdentifierStart,
330 /* 230 - Ll category */ CharacterIdentifierStart,
331 /* 231 - Ll category */ CharacterIdentifierStart,
332 /* 232 - Ll category */ CharacterIdentifierStart,
333 /* 233 - Ll category */ CharacterIdentifierStart,
334 /* 234 - Ll category */ CharacterIdentifierStart,
335 /* 235 - Ll category */ CharacterIdentifierStart,
336 /* 236 - Ll category */ CharacterIdentifierStart,
337 /* 237 - Ll category */ CharacterIdentifierStart,
338 /* 238 - Ll category */ CharacterIdentifierStart,
339 /* 239 - Ll category */ CharacterIdentifierStart,
340 /* 240 - Ll category */ CharacterIdentifierStart,
341 /* 241 - Ll category */ CharacterIdentifierStart,
342 /* 242 - Ll category */ CharacterIdentifierStart,
343 /* 243 - Ll category */ CharacterIdentifierStart,
344 /* 244 - Ll category */ CharacterIdentifierStart,
345 /* 245 - Ll category */ CharacterIdentifierStart,
346 /* 246 - Ll category */ CharacterIdentifierStart,
347 /* 247 - Sm category */ CharacterInvalid,
348 /* 248 - Ll category */ CharacterIdentifierStart,
349 /* 249 - Ll category */ CharacterIdentifierStart,
350 /* 250 - Ll category */ CharacterIdentifierStart,
351 /* 251 - Ll category */ CharacterIdentifierStart,
352 /* 252 - Ll category */ CharacterIdentifierStart,
353 /* 253 - Ll category */ CharacterIdentifierStart,
354 /* 254 - Ll category */ CharacterIdentifierStart,
355 /* 255 - Ll category */ CharacterIdentifierStart
356 };
357
358 template <typename T>
359 Lexer<T>::Lexer(JSGlobalData* globalData)
360 : m_isReparsing(false)
361 , m_globalData(globalData)
362 {
363 }
364
365 template <typename T>
366 Lexer<T>::~Lexer()
367 {
368 }
369
370 template <typename T>
371 UString Lexer<T>::invalidCharacterMessage() const
372 {
373 switch (m_current) {
374 case 0:
375 return "Invalid character: '\\0'";
376 case 10:
377 return "Invalid character: '\\n'";
378 case 11:
379 return "Invalid character: '\\v'";
380 case 13:
381 return "Invalid character: '\\r'";
382 case 35:
383 return "Invalid character: '#'";
384 case 64:
385 return "Invalid character: '@'";
386 case 96:
387 return "Invalid character: '`'";
388 default:
389 return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
390 }
391 }
392
393 template <typename T>
394 ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const
395 {
396 ASSERT(m_code <= m_codeEnd);
397 return m_code;
398 }
399
400 template <typename T>
401 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
402 {
403 m_arena = &arena->identifierArena();
404
405 m_lineNumber = source.firstLine();
406 m_lastToken = -1;
407
408 const StringImpl* sourceString = source.provider()->data();
409
410 if (sourceString)
411 setCodeStart(sourceString);
412 else
413 m_codeStart = 0;
414
415 m_source = &source;
416 m_code = m_codeStart + source.startOffset();
417 m_codeEnd = m_codeStart + source.endOffset();
418 m_error = false;
419 m_atLineStart = true;
420 m_lexErrorMessage = UString();
421
422 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
423 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
424
425 if (LIKELY(m_code < m_codeEnd))
426 m_current = *m_code;
427 else
428 m_current = 0;
429 ASSERT(currentOffset() == source.startOffset());
430 }
431
432 template <typename T>
433 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
434 {
435 m_code += shiftAmount;
436 m_current = *m_code;
437 }
438
439 template <typename T>
440 ALWAYS_INLINE void Lexer<T>::shift()
441 {
442 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
443 m_current = 0;
444 ++m_code;
445 if (LIKELY(m_code < m_codeEnd))
446 m_current = *m_code;
447 }
448
449 template <typename T>
450 ALWAYS_INLINE bool Lexer<T>::atEnd() const
451 {
452 ASSERT(!m_current || m_code < m_codeEnd);
453 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
454 }
455
456 template <typename T>
457 ALWAYS_INLINE T Lexer<T>::peek(int offset) const
458 {
459 ASSERT(offset > 0 && offset < 5);
460 const T* code = m_code + offset;
461 return (code < m_codeEnd) ? *code : 0;
462 }
463
464 template <typename T>
465 int Lexer<T>::parseFourDigitUnicodeHex()
466 {
467 T char1 = peek(1);
468 T char2 = peek(2);
469 T char3 = peek(3);
470
471 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
472 return -1;
473
474 int result = convertUnicode(m_current, char1, char2, char3);
475 shift();
476 shift();
477 shift();
478 shift();
479 return result;
480 }
481
482 template <typename T>
483 void Lexer<T>::shiftLineTerminator()
484 {
485 ASSERT(isLineTerminator(m_current));
486
487 T prev = m_current;
488 shift();
489
490 // Allow both CRLF and LFCR.
491 if (prev + m_current == '\n' + '\r')
492 shift();
493
494 ++m_lineNumber;
495 }
496
497 template <typename T>
498 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
499 {
500 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
501 }
502
503 static NEVER_INLINE bool isNonLatin1IdentStart(int c)
504 {
505 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
506 }
507
508 static ALWAYS_INLINE bool isLatin1(LChar)
509 {
510 return true;
511 }
512
513 static ALWAYS_INLINE bool isLatin1(UChar c)
514 {
515 return c < 256;
516 }
517
518 static inline bool isIdentStart(LChar c)
519 {
520 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
521 }
522
523 static inline bool isIdentStart(UChar c)
524 {
525 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
526 }
527
528 static NEVER_INLINE bool isNonLatin1IdentPart(int c)
529 {
530 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
531 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) || c == 0x200C || c == 0x200D;
532 }
533
534 static ALWAYS_INLINE bool isIdentPart(LChar c)
535 {
536 // Character types are divided into two groups depending on whether they can be part of an
537 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
538 // part of an identifier. (See the CharacterType definition for more details.)
539 return typesOfLatin1Characters[c] <= CharacterNumber;
540 }
541
542 static ALWAYS_INLINE bool isIdentPart(UChar c)
543 {
544 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
545 }
546
547 static inline int singleEscape(int c)
548 {
549 switch (c) {
550 case 'b':
551 return 0x08;
552 case 't':
553 return 0x09;
554 case 'n':
555 return 0x0A;
556 case 'v':
557 return 0x0B;
558 case 'f':
559 return 0x0C;
560 case 'r':
561 return 0x0D;
562 case '\\':
563 return '\\';
564 case '\'':
565 return '\'';
566 case '"':
567 return '"';
568 default:
569 return 0;
570 }
571 }
572
573 template <typename T>
574 inline void Lexer<T>::record8(int c)
575 {
576 ASSERT(c >= 0);
577 ASSERT(c <= 0xFF);
578 m_buffer8.append(static_cast<LChar>(c));
579 }
580
581 template <typename T>
582 inline void assertCharIsIn8BitRange(T c)
583 {
584 UNUSED_PARAM(c);
585 ASSERT(c >= 0);
586 ASSERT(c <= 0xFF);
587 }
588
589 template <>
590 inline void assertCharIsIn8BitRange(UChar c)
591 {
592 UNUSED_PARAM(c);
593 ASSERT(c <= 0xFF);
594 }
595
596 template <>
597 inline void assertCharIsIn8BitRange(LChar)
598 {
599 }
600
601 template <typename T>
602 inline void Lexer<T>::append8(const T* p, size_t length)
603 {
604 size_t currentSize = m_buffer8.size();
605 m_buffer8.grow(currentSize + length);
606 LChar* rawBuffer = m_buffer8.data() + currentSize;
607
608 for (size_t i = 0; i < length; i++) {
609 T c = p[i];
610 assertCharIsIn8BitRange(c);
611 rawBuffer[i] = c;
612 }
613 }
614
615 template <typename T>
616 inline void Lexer<T>::append16(const LChar* p, size_t length)
617 {
618 size_t currentSize = m_buffer16.size();
619 m_buffer16.grow(currentSize + length);
620 UChar* rawBuffer = m_buffer16.data() + currentSize;
621
622 for (size_t i = 0; i < length; i++)
623 rawBuffer[i] = p[i];
624 }
625
626 template <typename T>
627 inline void Lexer<T>::record16(T c)
628 {
629 m_buffer16.append(c);
630 }
631
632 template <typename T>
633 inline void Lexer<T>::record16(int c)
634 {
635 ASSERT(c >= 0);
636 ASSERT(c <= static_cast<int>(USHRT_MAX));
637 m_buffer16.append(static_cast<UChar>(c));
638 }
639
640 template <>
641 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
642 {
643 const ptrdiff_t remaining = m_codeEnd - m_code;
644 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
645 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
646 if (keyword != IDENT) {
647 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
648 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
649 }
650 }
651
652 const LChar* identifierStart = currentCharacter();
653
654 while (isIdentPart(m_current))
655 shift();
656
657 if (UNLIKELY(m_current == '\\')) {
658 setOffsetFromCharOffset(identifierStart);
659 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
660 }
661
662 const Identifier* ident = 0;
663
664 if (shouldCreateIdentifier) {
665 int identifierLength = currentCharacter() - identifierStart;
666 ident = makeIdentifier(identifierStart, identifierLength);
667
668 tokenData->ident = ident;
669 } else
670 tokenData->ident = 0;
671
672 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
673 ASSERT(shouldCreateIdentifier);
674 if (remaining < maxTokenLength) {
675 const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
676 ASSERT((remaining < maxTokenLength) || !entry);
677 if (!entry)
678 return IDENT;
679 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
680 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
681 }
682 return IDENT;
683 }
684
685 return IDENT;
686 }
687
688 template <>
689 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
690 {
691 const ptrdiff_t remaining = m_codeEnd - m_code;
692 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
693 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
694 if (keyword != IDENT) {
695 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
696 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
697 }
698 }
699
700 const UChar* identifierStart = currentCharacter();
701
702 UChar orAllChars = 0;
703
704 while (isIdentPart(m_current)) {
705 orAllChars |= m_current;
706 shift();
707 }
708
709 if (UNLIKELY(m_current == '\\')) {
710 setOffsetFromCharOffset(identifierStart);
711 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
712 }
713
714 bool isAll8Bit = false;
715
716 if (!(orAllChars & ~0xff))
717 isAll8Bit = true;
718
719 const Identifier* ident = 0;
720
721 if (shouldCreateIdentifier) {
722 int identifierLength = currentCharacter() - identifierStart;
723 if (isAll8Bit)
724 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
725 else
726 ident = makeIdentifier(identifierStart, identifierLength);
727
728 tokenData->ident = ident;
729 } else
730 tokenData->ident = 0;
731
732 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
733 ASSERT(shouldCreateIdentifier);
734 if (remaining < maxTokenLength) {
735 const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
736 ASSERT((remaining < maxTokenLength) || !entry);
737 if (!entry)
738 return IDENT;
739 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
740 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
741 }
742 return IDENT;
743 }
744
745 return IDENT;
746 }
747
748 template <typename T>
749 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
750 {
751 const ptrdiff_t remaining = m_codeEnd - m_code;
752 const T* identifierStart = currentCharacter();
753 bool bufferRequired = false;
754
755 while (true) {
756 if (LIKELY(isIdentPart(m_current))) {
757 shift();
758 continue;
759 }
760 if (LIKELY(m_current != '\\'))
761 break;
762
763 // \uXXXX unicode characters.
764 bufferRequired = true;
765 if (identifierStart != currentCharacter())
766 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
767 shift();
768 if (UNLIKELY(m_current != 'u'))
769 return ERRORTOK;
770 shift();
771 int character = parseFourDigitUnicodeHex();
772 if (UNLIKELY(character == -1))
773 return ERRORTOK;
774 UChar ucharacter = static_cast<UChar>(character);
775 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
776 return ERRORTOK;
777 if (shouldCreateIdentifier)
778 record16(ucharacter);
779 identifierStart = currentCharacter();
780 }
781
782 int identifierLength;
783 const Identifier* ident = 0;
784 if (shouldCreateIdentifier) {
785 if (!bufferRequired) {
786 identifierLength = currentCharacter() - identifierStart;
787 ident = makeIdentifier(identifierStart, identifierLength);
788 } else {
789 if (identifierStart != currentCharacter())
790 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
791 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
792 }
793
794 tokenData->ident = ident;
795 } else
796 tokenData->ident = 0;
797
798 if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
799 ASSERT(shouldCreateIdentifier);
800 // Keywords must not be recognized if there was an \uXXXX in the identifier.
801 if (remaining < maxTokenLength) {
802 const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
803 ASSERT((remaining < maxTokenLength) || !entry);
804 if (!entry)
805 return IDENT;
806 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
807 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
808 }
809 return IDENT;
810 }
811
812 m_buffer16.resize(0);
813 return IDENT;
814 }
815
816 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
817 {
818 return character < 0xE;
819 }
820
821 static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
822 {
823 return character < 0xE || character > 0xFF;
824 }
825
826 template <typename T>
827 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
828 {
829 int startingOffset = currentOffset();
830 int startingLineNumber = lineNumber();
831 T stringQuoteCharacter = m_current;
832 shift();
833
834 const T* stringStart = currentCharacter();
835
836 while (m_current != stringQuoteCharacter) {
837 if (UNLIKELY(m_current == '\\')) {
838 if (stringStart != currentCharacter() && shouldBuildStrings)
839 append8(stringStart, currentCharacter() - stringStart);
840 shift();
841
842 int escape = singleEscape(m_current);
843
844 // Most common escape sequences first
845 if (escape) {
846 if (shouldBuildStrings)
847 record8(escape);
848 shift();
849 } else if (UNLIKELY(isLineTerminator(m_current)))
850 shiftLineTerminator();
851 else if (m_current == 'x') {
852 shift();
853 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
854 m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
855 return false;
856 }
857 T prev = m_current;
858 shift();
859 if (shouldBuildStrings)
860 record8(convertHex(prev, m_current));
861 shift();
862 } else {
863 setOffset(startingOffset);
864 setLineNumber(startingLineNumber);
865 m_buffer8.resize(0);
866 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
867 }
868 stringStart = currentCharacter();
869 continue;
870 }
871
872 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
873 setOffset(startingOffset);
874 setLineNumber(startingLineNumber);
875 m_buffer8.resize(0);
876 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
877 }
878
879 shift();
880 }
881
882 if (currentCharacter() != stringStart && shouldBuildStrings)
883 append8(stringStart, currentCharacter() - stringStart);
884 if (shouldBuildStrings) {
885 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
886 m_buffer8.resize(0);
887 } else
888 tokenData->ident = 0;
889
890 return true;
891 }
892
893 template <typename T>
894 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
895 {
896 T stringQuoteCharacter = m_current;
897 shift();
898
899 const T* stringStart = currentCharacter();
900
901 while (m_current != stringQuoteCharacter) {
902 if (UNLIKELY(m_current == '\\')) {
903 if (stringStart != currentCharacter() && shouldBuildStrings)
904 append16(stringStart, currentCharacter() - stringStart);
905 shift();
906
907 int escape = singleEscape(m_current);
908
909 // Most common escape sequences first
910 if (escape) {
911 if (shouldBuildStrings)
912 record16(escape);
913 shift();
914 } else if (UNLIKELY(isLineTerminator(m_current)))
915 shiftLineTerminator();
916 else if (m_current == 'x') {
917 shift();
918 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
919 m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
920 return false;
921 }
922 T prev = m_current;
923 shift();
924 if (shouldBuildStrings)
925 record16(convertHex(prev, m_current));
926 shift();
927 } else if (m_current == 'u') {
928 shift();
929 int character = parseFourDigitUnicodeHex();
930 if (character != -1) {
931 if (shouldBuildStrings)
932 record16(character);
933 } else if (m_current == stringQuoteCharacter) {
934 if (shouldBuildStrings)
935 record16('u');
936 } else {
937 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
938 return false;
939 }
940 } else if (strictMode && isASCIIDigit(m_current)) {
941 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
942 int character1 = m_current;
943 shift();
944 if (character1 != '0' || isASCIIDigit(m_current)) {
945 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
946 return false;
947 }
948 if (shouldBuildStrings)
949 record16(0);
950 } else if (!strictMode && isASCIIOctalDigit(m_current)) {
951 // Octal character sequences
952 T character1 = m_current;
953 shift();
954 if (isASCIIOctalDigit(m_current)) {
955 // Two octal characters
956 T character2 = m_current;
957 shift();
958 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
959 if (shouldBuildStrings)
960 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
961 shift();
962 } else {
963 if (shouldBuildStrings)
964 record16((character1 - '0') * 8 + character2 - '0');
965 }
966 } else {
967 if (shouldBuildStrings)
968 record16(character1 - '0');
969 }
970 } else if (!atEnd()) {
971 if (shouldBuildStrings)
972 record16(m_current);
973 shift();
974 } else {
975 m_lexErrorMessage = "Unterminated string constant";
976 return false;
977 }
978
979 stringStart = currentCharacter();
980 continue;
981 }
982 // Fast check for characters that require special handling.
983 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
984 // as possible, and lets through all common ASCII characters.
985 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
986 // New-line or end of input is not allowed
987 if (atEnd() || isLineTerminator(m_current)) {
988 m_lexErrorMessage = "Unexpected EOF";
989 return false;
990 }
991 // Anything else is just a normal character
992 }
993 shift();
994 }
995
996 if (currentCharacter() != stringStart && shouldBuildStrings)
997 append16(stringStart, currentCharacter() - stringStart);
998 if (shouldBuildStrings)
999 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1000 else
1001 tokenData->ident = 0;
1002
1003 m_buffer16.resize(0);
1004 return true;
1005 }
1006
1007 template <typename T>
1008 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
1009 {
1010 // Optimization: most hexadecimal values fit into 4 bytes.
1011 uint32_t hexValue = 0;
1012 int maximumDigits = 7;
1013
1014 // Shift out the 'x' prefix.
1015 shift();
1016
1017 do {
1018 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1019 shift();
1020 --maximumDigits;
1021 } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
1022
1023 if (maximumDigits >= 0) {
1024 returnValue = hexValue;
1025 return;
1026 }
1027
1028 // No more place in the hexValue buffer.
1029 // The values are shifted out and placed into the m_buffer8 vector.
1030 for (int i = 0; i < 8; ++i) {
1031 int digit = hexValue >> 28;
1032 if (digit < 10)
1033 record8(digit + '0');
1034 else
1035 record8(digit - 10 + 'a');
1036 hexValue <<= 4;
1037 }
1038
1039 while (isASCIIHexDigit(m_current)) {
1040 record8(m_current);
1041 shift();
1042 }
1043
1044 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
1045 }
1046
1047 template <typename T>
1048 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
1049 {
1050 // Optimization: most octal values fit into 4 bytes.
1051 uint32_t octalValue = 0;
1052 int maximumDigits = 9;
1053 // Temporary buffer for the digits. Makes easier
1054 // to reconstruct the input characters when needed.
1055 LChar digits[10];
1056
1057 do {
1058 octalValue = octalValue * 8 + (m_current - '0');
1059 digits[maximumDigits] = m_current;
1060 shift();
1061 --maximumDigits;
1062 } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
1063
1064 if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
1065 returnValue = octalValue;
1066 return true;
1067 }
1068
1069 for (int i = 9; i > maximumDigits; --i)
1070 record8(digits[i]);
1071
1072 while (isASCIIOctalDigit(m_current)) {
1073 record8(m_current);
1074 shift();
1075 }
1076
1077 if (isASCIIDigit(m_current))
1078 return false;
1079
1080 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
1081 return true;
1082 }
1083
1084 template <typename T>
1085 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
1086 {
1087 // Optimization: most decimal values fit into 4 bytes.
1088 uint32_t decimalValue = 0;
1089
1090 // Since parseOctal may be executed before parseDecimal,
1091 // the m_buffer8 may hold ascii digits.
1092 if (!m_buffer8.size()) {
1093 int maximumDigits = 9;
1094 // Temporary buffer for the digits. Makes easier
1095 // to reconstruct the input characters when needed.
1096 LChar digits[10];
1097
1098 do {
1099 decimalValue = decimalValue * 10 + (m_current - '0');
1100 digits[maximumDigits] = m_current;
1101 shift();
1102 --maximumDigits;
1103 } while (isASCIIDigit(m_current) && maximumDigits >= 0);
1104
1105 if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
1106 returnValue = decimalValue;
1107 return true;
1108 }
1109
1110 for (int i = 9; i > maximumDigits; --i)
1111 record8(digits[i]);
1112 }
1113
1114 while (isASCIIDigit(m_current)) {
1115 record8(m_current);
1116 shift();
1117 }
1118
1119 return false;
1120 }
1121
1122 template <typename T>
1123 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
1124 {
1125 record8('.');
1126 while (isASCIIDigit(m_current)) {
1127 record8(m_current);
1128 shift();
1129 }
1130 }
1131
1132 template <typename T>
1133 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1134 {
1135 record8('e');
1136 shift();
1137 if (m_current == '+' || m_current == '-') {
1138 record8(m_current);
1139 shift();
1140 }
1141
1142 if (!isASCIIDigit(m_current))
1143 return false;
1144
1145 do {
1146 record8(m_current);
1147 shift();
1148 } while (isASCIIDigit(m_current));
1149 return true;
1150 }
1151
1152 template <typename T>
1153 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1154 {
1155 while (true) {
1156 while (UNLIKELY(m_current == '*')) {
1157 shift();
1158 if (m_current == '/') {
1159 shift();
1160 return true;
1161 }
1162 }
1163
1164 if (atEnd())
1165 return false;
1166
1167 if (isLineTerminator(m_current)) {
1168 shiftLineTerminator();
1169 m_terminator = true;
1170 } else
1171 shift();
1172 }
1173 }
1174
1175 template <typename T>
1176 bool Lexer<T>::nextTokenIsColon()
1177 {
1178 const T* code = m_code;
1179 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1180 code++;
1181
1182 return code < m_codeEnd && *code == ':';
1183 }
1184
1185 template <typename T>
1186 JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
1187 {
1188 ASSERT(!m_error);
1189 ASSERT(m_buffer8.isEmpty());
1190 ASSERT(m_buffer16.isEmpty());
1191
1192 JSTokenType token = ERRORTOK;
1193 m_terminator = false;
1194
1195 start:
1196 while (isWhiteSpace(m_current))
1197 shift();
1198
1199 if (atEnd())
1200 return EOFTOK;
1201
1202 tokenInfo->startOffset = currentOffset();
1203
1204 CharacterType type;
1205 if (LIKELY(isLatin1(m_current)))
1206 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1207 else if (isNonLatin1IdentStart(m_current))
1208 type = CharacterIdentifierStart;
1209 else if (isLineTerminator(m_current))
1210 type = CharacterLineTerminator;
1211 else
1212 type = CharacterInvalid;
1213
1214 switch (type) {
1215 case CharacterGreater:
1216 shift();
1217 if (m_current == '>') {
1218 shift();
1219 if (m_current == '>') {
1220 shift();
1221 if (m_current == '=') {
1222 shift();
1223 token = URSHIFTEQUAL;
1224 break;
1225 }
1226 token = URSHIFT;
1227 break;
1228 }
1229 if (m_current == '=') {
1230 shift();
1231 token = RSHIFTEQUAL;
1232 break;
1233 }
1234 token = RSHIFT;
1235 break;
1236 }
1237 if (m_current == '=') {
1238 shift();
1239 token = GE;
1240 break;
1241 }
1242 token = GT;
1243 break;
1244 case CharacterEqual:
1245 shift();
1246 if (m_current == '=') {
1247 shift();
1248 if (m_current == '=') {
1249 shift();
1250 token = STREQ;
1251 break;
1252 }
1253 token = EQEQ;
1254 break;
1255 }
1256 token = EQUAL;
1257 break;
1258 case CharacterLess:
1259 shift();
1260 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1261 // <!-- marks the beginning of a line comment (for www usage)
1262 goto inSingleLineComment;
1263 }
1264 if (m_current == '<') {
1265 shift();
1266 if (m_current == '=') {
1267 shift();
1268 token = LSHIFTEQUAL;
1269 break;
1270 }
1271 token = LSHIFT;
1272 break;
1273 }
1274 if (m_current == '=') {
1275 shift();
1276 token = LE;
1277 break;
1278 }
1279 token = LT;
1280 break;
1281 case CharacterExclamationMark:
1282 shift();
1283 if (m_current == '=') {
1284 shift();
1285 if (m_current == '=') {
1286 shift();
1287 token = STRNEQ;
1288 break;
1289 }
1290 token = NE;
1291 break;
1292 }
1293 token = EXCLAMATION;
1294 break;
1295 case CharacterAdd:
1296 shift();
1297 if (m_current == '+') {
1298 shift();
1299 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
1300 break;
1301 }
1302 if (m_current == '=') {
1303 shift();
1304 token = PLUSEQUAL;
1305 break;
1306 }
1307 token = PLUS;
1308 break;
1309 case CharacterSub:
1310 shift();
1311 if (m_current == '-') {
1312 shift();
1313 if (m_atLineStart && m_current == '>') {
1314 shift();
1315 goto inSingleLineComment;
1316 }
1317 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
1318 break;
1319 }
1320 if (m_current == '=') {
1321 shift();
1322 token = MINUSEQUAL;
1323 break;
1324 }
1325 token = MINUS;
1326 break;
1327 case CharacterMultiply:
1328 shift();
1329 if (m_current == '=') {
1330 shift();
1331 token = MULTEQUAL;
1332 break;
1333 }
1334 token = TIMES;
1335 break;
1336 case CharacterSlash:
1337 shift();
1338 if (m_current == '/') {
1339 shift();
1340 goto inSingleLineComment;
1341 }
1342 if (m_current == '*') {
1343 shift();
1344 if (parseMultilineComment())
1345 goto start;
1346 m_lexErrorMessage = "Multiline comment was not closed properly";
1347 goto returnError;
1348 }
1349 if (m_current == '=') {
1350 shift();
1351 token = DIVEQUAL;
1352 break;
1353 }
1354 token = DIVIDE;
1355 break;
1356 case CharacterAnd:
1357 shift();
1358 if (m_current == '&') {
1359 shift();
1360 token = AND;
1361 break;
1362 }
1363 if (m_current == '=') {
1364 shift();
1365 token = ANDEQUAL;
1366 break;
1367 }
1368 token = BITAND;
1369 break;
1370 case CharacterXor:
1371 shift();
1372 if (m_current == '=') {
1373 shift();
1374 token = XOREQUAL;
1375 break;
1376 }
1377 token = BITXOR;
1378 break;
1379 case CharacterModulo:
1380 shift();
1381 if (m_current == '=') {
1382 shift();
1383 token = MODEQUAL;
1384 break;
1385 }
1386 token = MOD;
1387 break;
1388 case CharacterOr:
1389 shift();
1390 if (m_current == '=') {
1391 shift();
1392 token = OREQUAL;
1393 break;
1394 }
1395 if (m_current == '|') {
1396 shift();
1397 token = OR;
1398 break;
1399 }
1400 token = BITOR;
1401 break;
1402 case CharacterOpenParen:
1403 token = OPENPAREN;
1404 shift();
1405 break;
1406 case CharacterCloseParen:
1407 token = CLOSEPAREN;
1408 shift();
1409 break;
1410 case CharacterOpenBracket:
1411 token = OPENBRACKET;
1412 shift();
1413 break;
1414 case CharacterCloseBracket:
1415 token = CLOSEBRACKET;
1416 shift();
1417 break;
1418 case CharacterComma:
1419 token = COMMA;
1420 shift();
1421 break;
1422 case CharacterColon:
1423 token = COLON;
1424 shift();
1425 break;
1426 case CharacterQuestion:
1427 token = QUESTION;
1428 shift();
1429 break;
1430 case CharacterTilde:
1431 token = TILDE;
1432 shift();
1433 break;
1434 case CharacterSemicolon:
1435 shift();
1436 token = SEMICOLON;
1437 break;
1438 case CharacterOpenBrace:
1439 tokenData->intValue = currentOffset();
1440 shift();
1441 token = OPENBRACE;
1442 break;
1443 case CharacterCloseBrace:
1444 tokenData->intValue = currentOffset();
1445 shift();
1446 token = CLOSEBRACE;
1447 break;
1448 case CharacterDot:
1449 shift();
1450 if (!isASCIIDigit(m_current)) {
1451 token = DOT;
1452 break;
1453 }
1454 goto inNumberAfterDecimalPoint;
1455 case CharacterZero:
1456 shift();
1457 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1458 parseHex(tokenData->doubleValue);
1459 token = NUMBER;
1460 } else {
1461 record8('0');
1462 if (isASCIIOctalDigit(m_current)) {
1463 if (parseOctal(tokenData->doubleValue)) {
1464 if (strictMode) {
1465 m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1466 goto returnError;
1467 }
1468 token = NUMBER;
1469 }
1470 }
1471 }
1472 // Fall through into CharacterNumber
1473 case CharacterNumber:
1474 if (LIKELY(token != NUMBER)) {
1475 if (!parseDecimal(tokenData->doubleValue)) {
1476 if (m_current == '.') {
1477 shift();
1478 inNumberAfterDecimalPoint:
1479 parseNumberAfterDecimalPoint();
1480 }
1481 if ((m_current | 0x20) == 'e') {
1482 if (!parseNumberAfterExponentIndicator()) {
1483 m_lexErrorMessage = "Non-number found after exponent indicator";
1484 goto returnError;
1485 }
1486 }
1487 size_t parsedLength;
1488 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
1489 }
1490 token = NUMBER;
1491 }
1492
1493 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1494 if (UNLIKELY(isIdentStart(m_current))) {
1495 m_lexErrorMessage = "At least one digit must occur after a decimal point";
1496 goto returnError;
1497 }
1498 m_buffer8.resize(0);
1499 break;
1500 case CharacterQuote:
1501 if (lexerFlags & LexerFlagsDontBuildStrings) {
1502 if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
1503 goto returnError;
1504 } else {
1505 if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
1506 goto returnError;
1507 }
1508 shift();
1509 token = STRING;
1510 break;
1511 case CharacterIdentifierStart:
1512 ASSERT(isIdentStart(m_current));
1513 // Fall through into CharacterBackSlash.
1514 case CharacterBackSlash:
1515 if (lexerFlags & LexexFlagsDontBuildKeywords)
1516 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
1517 else
1518 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
1519 break;
1520 case CharacterLineTerminator:
1521 ASSERT(isLineTerminator(m_current));
1522 shiftLineTerminator();
1523 m_atLineStart = true;
1524 m_terminator = true;
1525 goto start;
1526 case CharacterInvalid:
1527 m_lexErrorMessage = invalidCharacterMessage();
1528 goto returnError;
1529 default:
1530 ASSERT_NOT_REACHED();
1531 m_lexErrorMessage = "Internal Error";
1532 goto returnError;
1533 }
1534
1535 m_atLineStart = false;
1536 goto returnToken;
1537
1538 inSingleLineComment:
1539 while (!isLineTerminator(m_current)) {
1540 if (atEnd())
1541 return EOFTOK;
1542 shift();
1543 }
1544 shiftLineTerminator();
1545 m_atLineStart = true;
1546 m_terminator = true;
1547 if (!lastTokenWasRestrKeyword())
1548 goto start;
1549
1550 token = SEMICOLON;
1551 // Fall through into returnToken.
1552
1553 returnToken:
1554 tokenInfo->line = m_lineNumber;
1555 tokenInfo->endOffset = currentOffset();
1556 m_lastToken = token;
1557 return token;
1558
1559 returnError:
1560 m_error = true;
1561 tokenInfo->line = m_lineNumber;
1562 tokenInfo->endOffset = currentOffset();
1563 return ERRORTOK;
1564 }
1565
1566 template <typename T>
1567 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1568 {
1569 ASSERT(m_buffer16.isEmpty());
1570
1571 bool lastWasEscape = false;
1572 bool inBrackets = false;
1573
1574 if (patternPrefix) {
1575 ASSERT(!isLineTerminator(patternPrefix));
1576 ASSERT(patternPrefix != '/');
1577 ASSERT(patternPrefix != '[');
1578 record16(patternPrefix);
1579 }
1580
1581 while (true) {
1582 if (isLineTerminator(m_current) || atEnd()) {
1583 m_buffer16.resize(0);
1584 return false;
1585 }
1586
1587 T prev = m_current;
1588
1589 shift();
1590
1591 if (prev == '/' && !lastWasEscape && !inBrackets)
1592 break;
1593
1594 record16(prev);
1595
1596 if (lastWasEscape) {
1597 lastWasEscape = false;
1598 continue;
1599 }
1600
1601 switch (prev) {
1602 case '[':
1603 inBrackets = true;
1604 break;
1605 case ']':
1606 inBrackets = false;
1607 break;
1608 case '\\':
1609 lastWasEscape = true;
1610 break;
1611 }
1612 }
1613
1614 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1615 m_buffer16.resize(0);
1616
1617 while (isIdentPart(m_current)) {
1618 record16(m_current);
1619 shift();
1620 }
1621
1622 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1623 m_buffer16.resize(0);
1624
1625 return true;
1626 }
1627
1628 template <typename T>
1629 bool Lexer<T>::skipRegExp()
1630 {
1631 bool lastWasEscape = false;
1632 bool inBrackets = false;
1633
1634 while (true) {
1635 if (isLineTerminator(m_current) || atEnd())
1636 return false;
1637
1638 T prev = m_current;
1639
1640 shift();
1641
1642 if (prev == '/' && !lastWasEscape && !inBrackets)
1643 break;
1644
1645 if (lastWasEscape) {
1646 lastWasEscape = false;
1647 continue;
1648 }
1649
1650 switch (prev) {
1651 case '[':
1652 inBrackets = true;
1653 break;
1654 case ']':
1655 inBrackets = false;
1656 break;
1657 case '\\':
1658 lastWasEscape = true;
1659 break;
1660 }
1661 }
1662
1663 while (isIdentPart(m_current))
1664 shift();
1665
1666 return true;
1667 }
1668
1669 template <typename T>
1670 void Lexer<T>::clear()
1671 {
1672 m_arena = 0;
1673
1674 Vector<LChar> newBuffer8;
1675 m_buffer8.swap(newBuffer8);
1676
1677 Vector<UChar> newBuffer16;
1678 m_buffer16.swap(newBuffer16);
1679
1680 m_isReparsing = false;
1681 }
1682
1683 template <typename T>
1684 SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine)
1685 {
1686 ASSERT((*m_source->provider()->data())[openBrace] == '{');
1687 ASSERT((*m_source->provider()->data())[closeBrace] == '}');
1688 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1689 }
1690
1691 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
1692 template class Lexer<LChar>;
1693 template class Lexer<UChar>;
1694
1695 } // namespace JSC