]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.cpp
JavaScriptCore-554.1.tar.gz
[apple/javascriptcore.git] / parser / Lexer.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "Lexer.h"
25
26#include "JSFunction.h"
27#include "JSGlobalObjectFunctions.h"
28#include "NodeInfo.h"
29#include "Nodes.h"
30#include "dtoa.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
9dae56ea 34#include <wtf/Assertions.h>
9dae56ea
A
35
36using namespace WTF;
37using namespace Unicode;
38
ba379fdc 39// We can't specify the namespace in yacc's C output, so do it here instead.
9dae56ea
A
40using namespace JSC;
41
42#ifndef KDE_USE_FINAL
43#include "Grammar.h"
44#endif
45
46#include "Lookup.h"
47#include "Lexer.lut.h"
48
ba379fdc 49// A bridge for yacc from the C world to the C++ world.
9dae56ea
A
50int jscyylex(void* lvalp, void* llocp, void* globalData)
51{
52 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53}
54
55namespace JSC {
56
ba379fdc 57static const UChar byteOrderMark = 0xFEFF;
9dae56ea
A
58
59Lexer::Lexer(JSGlobalData* globalData)
ba379fdc 60 : m_isReparsing(false)
9dae56ea 61 , m_globalData(globalData)
ba379fdc 62 , m_keywordTable(JSC::mainTable)
9dae56ea
A
63{
64 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
65 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
66}
67
68Lexer::~Lexer()
69{
ba379fdc
A
70 m_keywordTable.deleteTable();
71}
72
73inline const UChar* Lexer::currentCharacter() const
74{
75 return m_code - 4;
76}
77
78inline int Lexer::currentOffset() const
79{
80 return currentCharacter() - m_codeStart;
81}
82
83ALWAYS_INLINE void Lexer::shift1()
84{
85 m_current = m_next1;
86 m_next1 = m_next2;
87 m_next2 = m_next3;
88 if (LIKELY(m_code < m_codeEnd))
89 m_next3 = m_code[0];
90 else
91 m_next3 = -1;
92
93 ++m_code;
94}
95
96ALWAYS_INLINE void Lexer::shift2()
97{
98 m_current = m_next2;
99 m_next1 = m_next3;
100 if (LIKELY(m_code + 1 < m_codeEnd)) {
101 m_next2 = m_code[0];
102 m_next3 = m_code[1];
103 } else {
104 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
105 m_next3 = -1;
106 }
107
108 m_code += 2;
109}
110
111ALWAYS_INLINE void Lexer::shift3()
112{
113 m_current = m_next3;
114 if (LIKELY(m_code + 2 < m_codeEnd)) {
115 m_next1 = m_code[0];
116 m_next2 = m_code[1];
117 m_next3 = m_code[2];
118 } else {
119 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
120 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
121 m_next3 = -1;
122 }
123
124 m_code += 3;
125}
126
127ALWAYS_INLINE void Lexer::shift4()
128{
129 if (LIKELY(m_code + 3 < m_codeEnd)) {
130 m_current = m_code[0];
131 m_next1 = m_code[1];
132 m_next2 = m_code[2];
133 m_next3 = m_code[3];
134 } else {
135 m_current = m_code < m_codeEnd ? m_code[0] : -1;
136 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
137 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
138 m_next3 = -1;
139 }
140
141 m_code += 4;
9dae56ea
A
142}
143
144void Lexer::setCode(const SourceCode& source)
145{
ba379fdc 146 m_lineNumber = source.firstLine();
9dae56ea 147 m_delimited = false;
9dae56ea
A
148 m_lastToken = -1;
149
ba379fdc
A
150 const UChar* data = source.provider()->data();
151
9dae56ea 152 m_source = &source;
ba379fdc
A
153 m_codeStart = data;
154 m_code = data + source.startOffset();
155 m_codeEnd = data + source.endOffset();
9dae56ea
A
156 m_error = false;
157 m_atLineStart = true;
158
ba379fdc
A
159 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
160 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
161 if (source.provider()->hasBOMs()) {
162 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
163 if (UNLIKELY(*p == byteOrderMark)) {
164 copyCodeWithoutBOMs();
165 break;
166 }
167 }
168 }
169
170 // Read the first characters into the 4-character buffer.
171 shift4();
172 ASSERT(currentOffset() == source.startOffset());
9dae56ea
A
173}
174
ba379fdc 175void Lexer::copyCodeWithoutBOMs()
9dae56ea 176{
ba379fdc
A
177 // Note: In this case, the character offset data for debugging will be incorrect.
178 // If it's important to correctly debug code with extraneous BOMs, then the caller
179 // should strip the BOMs when creating the SourceProvider object and do its own
180 // mapping of offsets within the stripped text to original text offset.
181
182 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
183 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
184 UChar c = *p;
185 if (c != byteOrderMark)
186 m_codeWithoutBOMs.append(c);
187 }
188 ptrdiff_t startDelta = m_codeStart - m_code;
189 m_code = m_codeWithoutBOMs.data();
190 m_codeStart = m_code + startDelta;
191 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
192}
193
194void Lexer::shiftLineTerminator()
195{
196 ASSERT(isLineTerminator(m_current));
197
198 // Allow both CRLF and LFCR.
199 if (m_current + m_next1 == '\n' + '\r')
200 shift2();
201 else
202 shift1();
203
204 ++m_lineNumber;
205}
206
207ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
208{
209 m_identifiers.append(Identifier(m_globalData, characters, length));
210 return &m_identifiers.last();
211}
212
213inline bool Lexer::lastTokenWasRestrKeyword() const
214{
215 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
216}
217
218static NEVER_INLINE bool isNonASCIIIdentStart(int c)
219{
220 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
221}
222
223static inline bool isIdentStart(int c)
224{
225 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
226}
227
228static NEVER_INLINE bool isNonASCIIIdentPart(int c)
229{
230 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
231 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
232}
233
234static inline bool isIdentPart(int c)
235{
236 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
237}
238
239static inline int singleEscape(int c)
240{
241 switch (c) {
242 case 'b':
243 return 0x08;
244 case 't':
245 return 0x09;
246 case 'n':
247 return 0x0A;
248 case 'v':
249 return 0x0B;
250 case 'f':
251 return 0x0C;
252 case 'r':
253 return 0x0D;
254 default:
255 return c;
9dae56ea
A
256 }
257}
258
ba379fdc 259inline void Lexer::record8(int c)
9dae56ea 260{
ba379fdc
A
261 ASSERT(c >= 0);
262 ASSERT(c <= 0xFF);
263 m_buffer8.append(static_cast<char>(c));
9dae56ea
A
264}
265
ba379fdc 266inline void Lexer::record16(UChar c)
9dae56ea 267{
ba379fdc
A
268 m_buffer16.append(c);
269}
270
271inline void Lexer::record16(int c)
272{
273 ASSERT(c >= 0);
274 ASSERT(c <= USHRT_MAX);
275 record16(UChar(static_cast<unsigned short>(c)));
9dae56ea
A
276}
277
278int Lexer::lex(void* p1, void* p2)
279{
ba379fdc
A
280 ASSERT(!m_error);
281 ASSERT(m_buffer8.isEmpty());
282 ASSERT(m_buffer16.isEmpty());
283
9dae56ea
A
284 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
285 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
286 int token = 0;
9dae56ea 287 m_terminator = false;
ba379fdc
A
288
289start:
290 while (isWhiteSpace(m_current))
291 shift1();
292
293 int startOffset = currentOffset();
294
295 if (m_current == -1) {
296 if (!m_terminator && !m_delimited && !m_isReparsing) {
297 // automatic semicolon insertion if program incomplete
298 token = ';';
299 goto doneSemicolon;
9dae56ea 300 }
ba379fdc
A
301 return 0;
302 }
303
304 m_delimited = false;
305 switch (m_current) {
306 case '>':
307 if (m_next1 == '>' && m_next2 == '>') {
308 if (m_next3 == '=') {
309 shift4();
310 token = URSHIFTEQUAL;
311 break;
9dae56ea 312 }
ba379fdc
A
313 shift3();
314 token = URSHIFT;
9dae56ea 315 break;
ba379fdc
A
316 }
317 if (m_next1 == '>') {
318 if (m_next2 == '=') {
319 shift3();
320 token = RSHIFTEQUAL;
321 break;
322 }
323 shift2();
324 token = RSHIFT;
9dae56ea 325 break;
ba379fdc
A
326 }
327 if (m_next1 == '=') {
328 shift2();
329 token = GE;
330 break;
331 }
332 shift1();
333 token = '>';
334 break;
335 case '=':
336 if (m_next1 == '=') {
337 if (m_next2 == '=') {
338 shift3();
339 token = STREQ;
340 break;
9dae56ea 341 }
ba379fdc
A
342 shift2();
343 token = EQEQ;
9dae56ea 344 break;
ba379fdc
A
345 }
346 shift1();
347 token = '=';
348 break;
349 case '!':
350 if (m_next1 == '=') {
351 if (m_next2 == '=') {
352 shift3();
353 token = STRNEQ;
354 break;
9dae56ea 355 }
ba379fdc
A
356 shift2();
357 token = NE;
9dae56ea 358 break;
ba379fdc
A
359 }
360 shift1();
361 token = '!';
362 break;
363 case '<':
364 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
365 // <!-- marks the beginning of a line comment (for www usage)
366 shift4();
367 goto inSingleLineComment;
368 }
369 if (m_next1 == '<') {
370 if (m_next2 == '=') {
371 shift3();
372 token = LSHIFTEQUAL;
373 break;
374 }
375 shift2();
376 token = LSHIFT;
9dae56ea 377 break;
ba379fdc
A
378 }
379 if (m_next1 == '=') {
380 shift2();
381 token = LE;
9dae56ea 382 break;
ba379fdc
A
383 }
384 shift1();
385 token = '<';
386 break;
387 case '+':
388 if (m_next1 == '+') {
389 shift2();
390 if (m_terminator) {
391 token = AUTOPLUSPLUS;
392 break;
9dae56ea 393 }
ba379fdc 394 token = PLUSPLUS;
9dae56ea 395 break;
ba379fdc
A
396 }
397 if (m_next1 == '=') {
398 shift2();
399 token = PLUSEQUAL;
9dae56ea 400 break;
ba379fdc
A
401 }
402 shift1();
403 token = '+';
404 break;
405 case '-':
406 if (m_next1 == '-') {
407 if (m_atLineStart && m_next2 == '>') {
408 shift3();
409 goto inSingleLineComment;
410 }
411 shift2();
412 if (m_terminator) {
413 token = AUTOMINUSMINUS;
414 break;
415 }
416 token = MINUSMINUS;
9dae56ea 417 break;
ba379fdc
A
418 }
419 if (m_next1 == '=') {
420 shift2();
421 token = MINUSEQUAL;
9dae56ea 422 break;
ba379fdc
A
423 }
424 shift1();
425 token = '-';
426 break;
427 case '*':
428 if (m_next1 == '=') {
429 shift2();
430 token = MULTEQUAL;
9dae56ea 431 break;
ba379fdc
A
432 }
433 shift1();
434 token = '*';
435 break;
436 case '/':
437 if (m_next1 == '/') {
438 shift2();
439 goto inSingleLineComment;
440 }
441 if (m_next1 == '*')
442 goto inMultiLineComment;
443 if (m_next1 == '=') {
444 shift2();
445 token = DIVEQUAL;
9dae56ea 446 break;
ba379fdc
A
447 }
448 shift1();
449 token = '/';
450 break;
451 case '&':
452 if (m_next1 == '&') {
453 shift2();
454 token = AND;
9dae56ea 455 break;
ba379fdc
A
456 }
457 if (m_next1 == '=') {
458 shift2();
459 token = ANDEQUAL;
9dae56ea 460 break;
ba379fdc
A
461 }
462 shift1();
463 token = '&';
464 break;
465 case '^':
466 if (m_next1 == '=') {
467 shift2();
468 token = XOREQUAL;
9dae56ea 469 break;
ba379fdc
A
470 }
471 shift1();
472 token = '^';
473 break;
474 case '%':
475 if (m_next1 == '=') {
476 shift2();
477 token = MODEQUAL;
9dae56ea 478 break;
ba379fdc
A
479 }
480 shift1();
481 token = '%';
482 break;
483 case '|':
484 if (m_next1 == '=') {
485 shift2();
486 token = OREQUAL;
9dae56ea 487 break;
ba379fdc
A
488 }
489 if (m_next1 == '|') {
490 shift2();
491 token = OR;
9dae56ea 492 break;
ba379fdc
A
493 }
494 shift1();
495 token = '|';
496 break;
497 case '.':
498 if (isASCIIDigit(m_next1)) {
499 record8('.');
500 shift1();
501 goto inNumberAfterDecimalPoint;
502 }
503 token = '.';
504 shift1();
505 break;
506 case ',':
507 case '~':
508 case '?':
509 case ':':
510 case '(':
511 case ')':
512 case '[':
513 case ']':
514 token = m_current;
515 shift1();
516 break;
517 case ';':
518 shift1();
519 m_delimited = true;
520 token = ';';
521 break;
522 case '{':
523 lvalp->intValue = currentOffset();
524 shift1();
525 token = OPENBRACE;
526 break;
527 case '}':
528 lvalp->intValue = currentOffset();
529 shift1();
530 m_delimited = true;
531 token = CLOSEBRACE;
532 break;
533 case '\\':
534 goto startIdentifierWithBackslash;
535 case '0':
536 goto startNumberWithZeroDigit;
537 case '1':
538 case '2':
539 case '3':
540 case '4':
541 case '5':
542 case '6':
543 case '7':
544 case '8':
545 case '9':
546 goto startNumber;
547 case '"':
548 case '\'':
549 goto startString;
550 default:
551 if (isIdentStart(m_current))
552 goto startIdentifierOrKeyword;
553 if (isLineTerminator(m_current)) {
554 shiftLineTerminator();
555 m_atLineStart = true;
556 m_terminator = true;
557 if (lastTokenWasRestrKeyword()) {
558 token = ';';
559 goto doneSemicolon;
9dae56ea 560 }
ba379fdc
A
561 goto start;
562 }
563 goto returnError;
9dae56ea
A
564 }
565
ba379fdc
A
566 m_atLineStart = false;
567 goto returnToken;
9dae56ea 568
ba379fdc
A
569startString: {
570 int stringQuoteCharacter = m_current;
571 shift1();
9dae56ea 572
ba379fdc
A
573 const UChar* stringStart = currentCharacter();
574 while (m_current != stringQuoteCharacter) {
575 // Fast check for characters that require special handling.
576 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
577 // as possible, and lets through all common ASCII characters.
578 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
579 m_buffer16.append(stringStart, currentCharacter() - stringStart);
580 goto inString;
581 }
582 shift1();
583 }
584 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
585 shift1();
586 m_atLineStart = false;
587 m_delimited = false;
588 token = STRING;
589 goto returnToken;
590
591inString:
592 while (m_current != stringQuoteCharacter) {
593 if (m_current == '\\')
594 goto inStringEscapeSequence;
595 if (UNLIKELY(isLineTerminator(m_current)))
596 goto returnError;
597 if (UNLIKELY(m_current == -1))
598 goto returnError;
599 record16(m_current);
600 shift1();
601 }
602 goto doneString;
603
604inStringEscapeSequence:
605 shift1();
606 if (m_current == 'x') {
607 shift1();
608 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
609 record16(convertHex(m_current, m_next1));
610 shift2();
611 goto inString;
9dae56ea 612 }
ba379fdc
A
613 record16('x');
614 if (m_current == stringQuoteCharacter)
615 goto doneString;
616 goto inString;
617 }
618 if (m_current == 'u') {
619 shift1();
620 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
621 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
622 shift4();
623 goto inString;
624 }
625 if (m_current == stringQuoteCharacter) {
626 record16('u');
627 goto doneString;
628 }
629 goto returnError;
630 }
631 if (isASCIIOctalDigit(m_current)) {
632 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
633 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
634 shift3();
635 goto inString;
636 }
637 if (isASCIIOctalDigit(m_next1)) {
638 record16((m_current - '0') * 8 + m_next1 - '0');
639 shift2();
640 goto inString;
641 }
642 record16(m_current - '0');
643 shift1();
644 goto inString;
645 }
646 if (isLineTerminator(m_current)) {
647 shiftLineTerminator();
648 goto inString;
649 }
650 record16(singleEscape(m_current));
651 shift1();
652 goto inString;
653}
9dae56ea 654
ba379fdc
A
655startIdentifierWithBackslash:
656 shift1();
657 if (UNLIKELY(m_current != 'u'))
658 goto returnError;
659 shift1();
660 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
661 goto returnError;
662 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
663 if (UNLIKELY(!isIdentStart(token)))
664 goto returnError;
665 goto inIdentifierAfterCharacterCheck;
666
667startIdentifierOrKeyword: {
668 const UChar* identifierStart = currentCharacter();
669 shift1();
670 while (isIdentPart(m_current))
671 shift1();
672 if (LIKELY(m_current != '\\')) {
673 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
674 goto doneIdentifierOrKeyword;
675 }
676 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
677}
9dae56ea 678
ba379fdc
A
679 do {
680 shift1();
681 if (UNLIKELY(m_current != 'u'))
682 goto returnError;
683 shift1();
684 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
685 goto returnError;
686 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
687 if (UNLIKELY(!isIdentPart(token)))
688 goto returnError;
689inIdentifierAfterCharacterCheck:
690 record16(token);
691 shift4();
692
693 while (isIdentPart(m_current)) {
694 record16(m_current);
695 shift1();
9dae56ea 696 }
ba379fdc
A
697 } while (UNLIKELY(m_current == '\\'));
698 goto doneIdentifier;
9dae56ea 699
ba379fdc
A
700inSingleLineComment:
701 while (!isLineTerminator(m_current)) {
702 if (UNLIKELY(m_current == -1))
703 return 0;
704 shift1();
9dae56ea 705 }
ba379fdc
A
706 shiftLineTerminator();
707 m_atLineStart = true;
708 m_terminator = true;
709 if (lastTokenWasRestrKeyword())
710 goto doneSemicolon;
711 goto start;
712
713inMultiLineComment:
714 shift2();
715 while (m_current != '*' || m_next1 != '/') {
716 if (isLineTerminator(m_current))
717 shiftLineTerminator();
718 else {
719 shift1();
720 if (UNLIKELY(m_current == -1))
721 goto returnError;
722 }
9dae56ea 723 }
ba379fdc
A
724 shift2();
725 m_atLineStart = false;
726 goto start;
727
728startNumberWithZeroDigit:
729 shift1();
730 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
731 shift1();
732 goto inHex;
733 }
734 if (m_current == '.') {
735 record8('0');
736 record8('.');
737 shift1();
738 goto inNumberAfterDecimalPoint;
739 }
740 if ((m_current | 0x20) == 'e') {
741 record8('0');
742 record8('e');
743 shift1();
744 goto inExponentIndicator;
745 }
746 if (isASCIIOctalDigit(m_current))
747 goto inOctal;
748 if (isASCIIDigit(m_current))
749 goto startNumber;
750 lvalp->doubleValue = 0;
751 goto doneNumeric;
752
753inNumberAfterDecimalPoint:
754 while (isASCIIDigit(m_current)) {
755 record8(m_current);
756 shift1();
757 }
758 if ((m_current | 0x20) == 'e') {
759 record8('e');
760 shift1();
761 goto inExponentIndicator;
762 }
763 goto doneNumber;
764
765inExponentIndicator:
766 if (m_current == '+' || m_current == '-') {
767 record8(m_current);
768 shift1();
769 }
770 if (!isASCIIDigit(m_current))
771 goto returnError;
772 do {
773 record8(m_current);
774 shift1();
775 } while (isASCIIDigit(m_current));
776 goto doneNumber;
777
778inOctal: {
779 do {
780 record8(m_current);
781 shift1();
782 } while (isASCIIOctalDigit(m_current));
783 if (isASCIIDigit(m_current))
784 goto startNumber;
9dae56ea 785
ba379fdc 786 double dval = 0;
9dae56ea 787
ba379fdc
A
788 const char* end = m_buffer8.end();
789 for (const char* p = m_buffer8.data(); p < end; ++p) {
790 dval *= 8;
791 dval += *p - '0';
9dae56ea 792 }
ba379fdc
A
793 if (dval >= mantissaOverflowLowerBound)
794 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
9dae56ea 795
ba379fdc 796 m_buffer8.resize(0);
9dae56ea 797
ba379fdc
A
798 lvalp->doubleValue = dval;
799 goto doneNumeric;
9dae56ea
A
800}
801
ba379fdc
A
802inHex: {
803 do {
804 record8(m_current);
805 shift1();
806 } while (isASCIIHexDigit(m_current));
9dae56ea 807
ba379fdc 808 double dval = 0;
9dae56ea 809
ba379fdc
A
810 const char* end = m_buffer8.end();
811 for (const char* p = m_buffer8.data(); p < end; ++p) {
812 dval *= 16;
813 dval += toASCIIHexValue(*p);
814 }
815 if (dval >= mantissaOverflowLowerBound)
816 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
9dae56ea 817
ba379fdc 818 m_buffer8.resize(0);
9dae56ea 819
ba379fdc
A
820 lvalp->doubleValue = dval;
821 goto doneNumeric;
9dae56ea
A
822}
823
ba379fdc
A
824startNumber:
825 record8(m_current);
826 shift1();
827 while (isASCIIDigit(m_current)) {
828 record8(m_current);
829 shift1();
9dae56ea 830 }
ba379fdc
A
831 if (m_current == '.') {
832 record8('.');
833 shift1();
834 goto inNumberAfterDecimalPoint;
9dae56ea 835 }
ba379fdc
A
836 if ((m_current | 0x20) == 'e') {
837 record8('e');
838 shift1();
839 goto inExponentIndicator;
9dae56ea
A
840 }
841
ba379fdc 842 // Fall through into doneNumber.
9dae56ea 843
ba379fdc
A
844doneNumber:
845 // Null-terminate string for strtod.
846 m_buffer8.append('\0');
847 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
848 m_buffer8.resize(0);
9dae56ea 849
ba379fdc 850 // Fall through into doneNumeric.
9dae56ea 851
ba379fdc
A
852doneNumeric:
853 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
854 if (UNLIKELY(isIdentStart(m_current)))
855 goto returnError;
9dae56ea 856
ba379fdc
A
857 m_atLineStart = false;
858 m_delimited = false;
859 token = NUMBER;
860 goto returnToken;
9dae56ea 861
ba379fdc
A
862doneSemicolon:
863 token = ';';
864 m_delimited = true;
865 goto returnToken;
9dae56ea 866
ba379fdc
A
867doneIdentifier:
868 m_atLineStart = false;
869 m_delimited = false;
870 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
871 m_buffer16.resize(0);
872 token = IDENT;
873 goto returnToken;
874
875doneIdentifierOrKeyword: {
876 m_atLineStart = false;
877 m_delimited = false;
878 m_buffer16.resize(0);
879 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
880 token = entry ? entry->lexerValue() : IDENT;
881 goto returnToken;
9dae56ea
A
882}
883
ba379fdc
A
884doneString:
885 // Atomize constant strings in case they're later used in property lookup.
886 shift1();
887 m_atLineStart = false;
888 m_delimited = false;
889 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
890 m_buffer16.resize(0);
891 token = STRING;
892
893 // Fall through into returnToken.
894
895returnToken: {
896 int lineNumber = m_lineNumber;
897 llocp->first_line = lineNumber;
898 llocp->last_line = lineNumber;
899 llocp->first_column = startOffset;
900 llocp->last_column = currentOffset();
901
902 m_lastToken = token;
903 return token;
9dae56ea
A
904}
905
ba379fdc
A
906returnError:
907 m_error = true;
908 return -1;
9dae56ea
A
909}
910
911bool Lexer::scanRegExp()
912{
ba379fdc
A
913 ASSERT(m_buffer16.isEmpty());
914
9dae56ea
A
915 bool lastWasEscape = false;
916 bool inBrackets = false;
917
ba379fdc
A
918 while (true) {
919 if (isLineTerminator(m_current) || m_current == -1)
9dae56ea 920 return false;
ba379fdc 921 if (m_current != '/' || lastWasEscape || inBrackets) {
9dae56ea
A
922 // keep track of '[' and ']'
923 if (!lastWasEscape) {
ba379fdc 924 if (m_current == '[' && !inBrackets)
9dae56ea 925 inBrackets = true;
ba379fdc 926 if (m_current == ']' && inBrackets)
9dae56ea
A
927 inBrackets = false;
928 }
929 record16(m_current);
ba379fdc 930 lastWasEscape = !lastWasEscape && m_current == '\\';
9dae56ea
A
931 } else { // end of regexp
932 m_pattern = UString(m_buffer16);
ba379fdc
A
933 m_buffer16.resize(0);
934 shift1();
9dae56ea
A
935 break;
936 }
ba379fdc 937 shift1();
9dae56ea
A
938 }
939
940 while (isIdentPart(m_current)) {
941 record16(m_current);
ba379fdc 942 shift1();
9dae56ea
A
943 }
944 m_flags = UString(m_buffer16);
ba379fdc 945 m_buffer16.resize(0);
9dae56ea
A
946
947 return true;
948}
949
950void Lexer::clear()
951{
952 m_identifiers.clear();
ba379fdc 953 m_codeWithoutBOMs.clear();
9dae56ea
A
954
955 Vector<char> newBuffer8;
956 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
957 m_buffer8.swap(newBuffer8);
958
959 Vector<UChar> newBuffer16;
960 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
961 m_buffer16.swap(newBuffer16);
962
963 m_isReparsing = false;
964
ba379fdc
A
965 m_pattern = UString();
966 m_flags = UString();
967}
968
969SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
970{
971 if (m_codeWithoutBOMs.isEmpty())
972 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
973
974 const UChar* data = m_source->provider()->data();
975
976 ASSERT(openBrace < closeBrace);
977
978 int numBOMsBeforeOpenBrace = 0;
979 int numBOMsBetweenBraces = 0;
980
981 int i;
982 for (i = m_source->startOffset(); i < openBrace; ++i)
983 numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
984 for (; i < closeBrace; ++i)
985 numBOMsBetweenBraces += data[i] == byteOrderMark;
986
987 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
988 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
9dae56ea
A
989}
990
991} // namespace JSC