]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.cpp
bdeb04936c2b1d4af970ea7482b7e55d9c093e22
[apple/javascriptcore.git] / parser / Lexer.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #include "config.h"
24 #include "Lexer.h"
25
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35
36 using namespace WTF;
37 using namespace Unicode;
38
39 // We can't specify the namespace in yacc's C output, so do it here instead.
40 using namespace JSC;
41
42 #include "Grammar.h"
43 #include "Lookup.h"
44 #include "Lexer.lut.h"
45
46 namespace JSC {
47
48 static const UChar byteOrderMark = 0xFEFF;
49
50 Lexer::Lexer(JSGlobalData* globalData)
51 : m_isReparsing(false)
52 , m_globalData(globalData)
53 , m_keywordTable(JSC::mainTable)
54 {
55 }
56
57 Lexer::~Lexer()
58 {
59 m_keywordTable.deleteTable();
60 }
61
62 inline const UChar* Lexer::currentCharacter() const
63 {
64 return m_code - 4;
65 }
66
67 inline int Lexer::currentOffset() const
68 {
69 return currentCharacter() - m_codeStart;
70 }
71
72 ALWAYS_INLINE void Lexer::shift1()
73 {
74 m_current = m_next1;
75 m_next1 = m_next2;
76 m_next2 = m_next3;
77 if (LIKELY(m_code < m_codeEnd))
78 m_next3 = m_code[0];
79 else
80 m_next3 = -1;
81
82 ++m_code;
83 }
84
85 ALWAYS_INLINE void Lexer::shift2()
86 {
87 m_current = m_next2;
88 m_next1 = m_next3;
89 if (LIKELY(m_code + 1 < m_codeEnd)) {
90 m_next2 = m_code[0];
91 m_next3 = m_code[1];
92 } else {
93 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
94 m_next3 = -1;
95 }
96
97 m_code += 2;
98 }
99
100 ALWAYS_INLINE void Lexer::shift3()
101 {
102 m_current = m_next3;
103 if (LIKELY(m_code + 2 < m_codeEnd)) {
104 m_next1 = m_code[0];
105 m_next2 = m_code[1];
106 m_next3 = m_code[2];
107 } else {
108 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
109 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
110 m_next3 = -1;
111 }
112
113 m_code += 3;
114 }
115
116 ALWAYS_INLINE void Lexer::shift4()
117 {
118 if (LIKELY(m_code + 3 < m_codeEnd)) {
119 m_current = m_code[0];
120 m_next1 = m_code[1];
121 m_next2 = m_code[2];
122 m_next3 = m_code[3];
123 } else {
124 m_current = m_code < m_codeEnd ? m_code[0] : -1;
125 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
126 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
127 m_next3 = -1;
128 }
129
130 m_code += 4;
131 }
132
133 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
134 {
135 m_arena = &arena.identifierArena();
136
137 m_lineNumber = source.firstLine();
138 m_delimited = false;
139 m_lastToken = -1;
140
141 const UChar* data = source.provider()->data();
142
143 m_source = &source;
144 m_codeStart = data;
145 m_code = data + source.startOffset();
146 m_codeEnd = data + source.endOffset();
147 m_error = false;
148 m_atLineStart = true;
149
150 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
151 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
152
153 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
154 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
155 if (source.provider()->hasBOMs()) {
156 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
157 if (UNLIKELY(*p == byteOrderMark)) {
158 copyCodeWithoutBOMs();
159 break;
160 }
161 }
162 }
163
164 // Read the first characters into the 4-character buffer.
165 shift4();
166 ASSERT(currentOffset() == source.startOffset());
167 }
168
169 void Lexer::copyCodeWithoutBOMs()
170 {
171 // Note: In this case, the character offset data for debugging will be incorrect.
172 // If it's important to correctly debug code with extraneous BOMs, then the caller
173 // should strip the BOMs when creating the SourceProvider object and do its own
174 // mapping of offsets within the stripped text to original text offset.
175
176 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
177 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
178 UChar c = *p;
179 if (c != byteOrderMark)
180 m_codeWithoutBOMs.append(c);
181 }
182 ptrdiff_t startDelta = m_codeStart - m_code;
183 m_code = m_codeWithoutBOMs.data();
184 m_codeStart = m_code + startDelta;
185 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
186 }
187
188 void Lexer::shiftLineTerminator()
189 {
190 ASSERT(isLineTerminator(m_current));
191
192 // Allow both CRLF and LFCR.
193 if (m_current + m_next1 == '\n' + '\r')
194 shift2();
195 else
196 shift1();
197
198 ++m_lineNumber;
199 }
200
201 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
202 {
203 return &m_arena->makeIdentifier(m_globalData, characters, length);
204 }
205
206 inline bool Lexer::lastTokenWasRestrKeyword() const
207 {
208 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
209 }
210
211 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
212 {
213 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
214 }
215
216 static inline bool isIdentStart(int c)
217 {
218 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
219 }
220
221 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
222 {
223 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
224 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
225 }
226
227 static inline bool isIdentPart(int c)
228 {
229 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
230 }
231
232 static inline int singleEscape(int c)
233 {
234 switch (c) {
235 case 'b':
236 return 0x08;
237 case 't':
238 return 0x09;
239 case 'n':
240 return 0x0A;
241 case 'v':
242 return 0x0B;
243 case 'f':
244 return 0x0C;
245 case 'r':
246 return 0x0D;
247 default:
248 return c;
249 }
250 }
251
252 inline void Lexer::record8(int c)
253 {
254 ASSERT(c >= 0);
255 ASSERT(c <= 0xFF);
256 m_buffer8.append(static_cast<char>(c));
257 }
258
259 inline void Lexer::record16(UChar c)
260 {
261 m_buffer16.append(c);
262 }
263
264 inline void Lexer::record16(int c)
265 {
266 ASSERT(c >= 0);
267 ASSERT(c <= USHRT_MAX);
268 record16(UChar(static_cast<unsigned short>(c)));
269 }
270
271 int Lexer::lex(void* p1, void* p2)
272 {
273 ASSERT(!m_error);
274 ASSERT(m_buffer8.isEmpty());
275 ASSERT(m_buffer16.isEmpty());
276
277 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
278 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
279 int token = 0;
280 m_terminator = false;
281
282 start:
283 while (isWhiteSpace(m_current))
284 shift1();
285
286 int startOffset = currentOffset();
287
288 if (m_current == -1) {
289 if (!m_terminator && !m_delimited && !m_isReparsing) {
290 // automatic semicolon insertion if program incomplete
291 token = ';';
292 goto doneSemicolon;
293 }
294 return 0;
295 }
296
297 m_delimited = false;
298 switch (m_current) {
299 case '>':
300 if (m_next1 == '>' && m_next2 == '>') {
301 if (m_next3 == '=') {
302 shift4();
303 token = URSHIFTEQUAL;
304 break;
305 }
306 shift3();
307 token = URSHIFT;
308 break;
309 }
310 if (m_next1 == '>') {
311 if (m_next2 == '=') {
312 shift3();
313 token = RSHIFTEQUAL;
314 break;
315 }
316 shift2();
317 token = RSHIFT;
318 break;
319 }
320 if (m_next1 == '=') {
321 shift2();
322 token = GE;
323 break;
324 }
325 shift1();
326 token = '>';
327 break;
328 case '=':
329 if (m_next1 == '=') {
330 if (m_next2 == '=') {
331 shift3();
332 token = STREQ;
333 break;
334 }
335 shift2();
336 token = EQEQ;
337 break;
338 }
339 shift1();
340 token = '=';
341 break;
342 case '!':
343 if (m_next1 == '=') {
344 if (m_next2 == '=') {
345 shift3();
346 token = STRNEQ;
347 break;
348 }
349 shift2();
350 token = NE;
351 break;
352 }
353 shift1();
354 token = '!';
355 break;
356 case '<':
357 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
358 // <!-- marks the beginning of a line comment (for www usage)
359 shift4();
360 goto inSingleLineComment;
361 }
362 if (m_next1 == '<') {
363 if (m_next2 == '=') {
364 shift3();
365 token = LSHIFTEQUAL;
366 break;
367 }
368 shift2();
369 token = LSHIFT;
370 break;
371 }
372 if (m_next1 == '=') {
373 shift2();
374 token = LE;
375 break;
376 }
377 shift1();
378 token = '<';
379 break;
380 case '+':
381 if (m_next1 == '+') {
382 shift2();
383 if (m_terminator) {
384 token = AUTOPLUSPLUS;
385 break;
386 }
387 token = PLUSPLUS;
388 break;
389 }
390 if (m_next1 == '=') {
391 shift2();
392 token = PLUSEQUAL;
393 break;
394 }
395 shift1();
396 token = '+';
397 break;
398 case '-':
399 if (m_next1 == '-') {
400 if (m_atLineStart && m_next2 == '>') {
401 shift3();
402 goto inSingleLineComment;
403 }
404 shift2();
405 if (m_terminator) {
406 token = AUTOMINUSMINUS;
407 break;
408 }
409 token = MINUSMINUS;
410 break;
411 }
412 if (m_next1 == '=') {
413 shift2();
414 token = MINUSEQUAL;
415 break;
416 }
417 shift1();
418 token = '-';
419 break;
420 case '*':
421 if (m_next1 == '=') {
422 shift2();
423 token = MULTEQUAL;
424 break;
425 }
426 shift1();
427 token = '*';
428 break;
429 case '/':
430 if (m_next1 == '/') {
431 shift2();
432 goto inSingleLineComment;
433 }
434 if (m_next1 == '*')
435 goto inMultiLineComment;
436 if (m_next1 == '=') {
437 shift2();
438 token = DIVEQUAL;
439 break;
440 }
441 shift1();
442 token = '/';
443 break;
444 case '&':
445 if (m_next1 == '&') {
446 shift2();
447 token = AND;
448 break;
449 }
450 if (m_next1 == '=') {
451 shift2();
452 token = ANDEQUAL;
453 break;
454 }
455 shift1();
456 token = '&';
457 break;
458 case '^':
459 if (m_next1 == '=') {
460 shift2();
461 token = XOREQUAL;
462 break;
463 }
464 shift1();
465 token = '^';
466 break;
467 case '%':
468 if (m_next1 == '=') {
469 shift2();
470 token = MODEQUAL;
471 break;
472 }
473 shift1();
474 token = '%';
475 break;
476 case '|':
477 if (m_next1 == '=') {
478 shift2();
479 token = OREQUAL;
480 break;
481 }
482 if (m_next1 == '|') {
483 shift2();
484 token = OR;
485 break;
486 }
487 shift1();
488 token = '|';
489 break;
490 case '.':
491 if (isASCIIDigit(m_next1)) {
492 record8('.');
493 shift1();
494 goto inNumberAfterDecimalPoint;
495 }
496 token = '.';
497 shift1();
498 break;
499 case ',':
500 case '~':
501 case '?':
502 case ':':
503 case '(':
504 case ')':
505 case '[':
506 case ']':
507 token = m_current;
508 shift1();
509 break;
510 case ';':
511 shift1();
512 m_delimited = true;
513 token = ';';
514 break;
515 case '{':
516 lvalp->intValue = currentOffset();
517 shift1();
518 token = OPENBRACE;
519 break;
520 case '}':
521 lvalp->intValue = currentOffset();
522 shift1();
523 m_delimited = true;
524 token = CLOSEBRACE;
525 break;
526 case '\\':
527 goto startIdentifierWithBackslash;
528 case '0':
529 goto startNumberWithZeroDigit;
530 case '1':
531 case '2':
532 case '3':
533 case '4':
534 case '5':
535 case '6':
536 case '7':
537 case '8':
538 case '9':
539 goto startNumber;
540 case '"':
541 case '\'':
542 goto startString;
543 default:
544 if (isIdentStart(m_current))
545 goto startIdentifierOrKeyword;
546 if (isLineTerminator(m_current)) {
547 shiftLineTerminator();
548 m_atLineStart = true;
549 m_terminator = true;
550 if (lastTokenWasRestrKeyword()) {
551 token = ';';
552 goto doneSemicolon;
553 }
554 goto start;
555 }
556 goto returnError;
557 }
558
559 m_atLineStart = false;
560 goto returnToken;
561
562 startString: {
563 int stringQuoteCharacter = m_current;
564 shift1();
565
566 const UChar* stringStart = currentCharacter();
567 while (m_current != stringQuoteCharacter) {
568 // Fast check for characters that require special handling.
569 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
570 // as possible, and lets through all common ASCII characters.
571 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
572 m_buffer16.append(stringStart, currentCharacter() - stringStart);
573 goto inString;
574 }
575 shift1();
576 }
577 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
578 shift1();
579 m_atLineStart = false;
580 m_delimited = false;
581 token = STRING;
582 goto returnToken;
583
584 inString:
585 while (m_current != stringQuoteCharacter) {
586 if (m_current == '\\')
587 goto inStringEscapeSequence;
588 if (UNLIKELY(isLineTerminator(m_current)))
589 goto returnError;
590 if (UNLIKELY(m_current == -1))
591 goto returnError;
592 record16(m_current);
593 shift1();
594 }
595 goto doneString;
596
597 inStringEscapeSequence:
598 shift1();
599 if (m_current == 'x') {
600 shift1();
601 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
602 record16(convertHex(m_current, m_next1));
603 shift2();
604 goto inString;
605 }
606 record16('x');
607 if (m_current == stringQuoteCharacter)
608 goto doneString;
609 goto inString;
610 }
611 if (m_current == 'u') {
612 shift1();
613 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
614 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
615 shift4();
616 goto inString;
617 }
618 if (m_current == stringQuoteCharacter) {
619 record16('u');
620 goto doneString;
621 }
622 goto returnError;
623 }
624 if (isASCIIOctalDigit(m_current)) {
625 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
626 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
627 shift3();
628 goto inString;
629 }
630 if (isASCIIOctalDigit(m_next1)) {
631 record16((m_current - '0') * 8 + m_next1 - '0');
632 shift2();
633 goto inString;
634 }
635 record16(m_current - '0');
636 shift1();
637 goto inString;
638 }
639 if (isLineTerminator(m_current)) {
640 shiftLineTerminator();
641 goto inString;
642 }
643 if (m_current == -1)
644 goto returnError;
645 record16(singleEscape(m_current));
646 shift1();
647 goto inString;
648 }
649
650 startIdentifierWithBackslash:
651 shift1();
652 if (UNLIKELY(m_current != 'u'))
653 goto returnError;
654 shift1();
655 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
656 goto returnError;
657 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
658 if (UNLIKELY(!isIdentStart(token)))
659 goto returnError;
660 goto inIdentifierAfterCharacterCheck;
661
662 startIdentifierOrKeyword: {
663 const UChar* identifierStart = currentCharacter();
664 shift1();
665 while (isIdentPart(m_current))
666 shift1();
667 if (LIKELY(m_current != '\\')) {
668 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
669 goto doneIdentifierOrKeyword;
670 }
671 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
672 }
673
674 do {
675 shift1();
676 if (UNLIKELY(m_current != 'u'))
677 goto returnError;
678 shift1();
679 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
680 goto returnError;
681 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
682 if (UNLIKELY(!isIdentPart(token)))
683 goto returnError;
684 inIdentifierAfterCharacterCheck:
685 record16(token);
686 shift4();
687
688 while (isIdentPart(m_current)) {
689 record16(m_current);
690 shift1();
691 }
692 } while (UNLIKELY(m_current == '\\'));
693 goto doneIdentifier;
694
695 inSingleLineComment:
696 while (!isLineTerminator(m_current)) {
697 if (UNLIKELY(m_current == -1))
698 return 0;
699 shift1();
700 }
701 shiftLineTerminator();
702 m_atLineStart = true;
703 m_terminator = true;
704 if (lastTokenWasRestrKeyword())
705 goto doneSemicolon;
706 goto start;
707
708 inMultiLineComment:
709 shift2();
710 while (m_current != '*' || m_next1 != '/') {
711 if (isLineTerminator(m_current))
712 shiftLineTerminator();
713 else {
714 shift1();
715 if (UNLIKELY(m_current == -1))
716 goto returnError;
717 }
718 }
719 shift2();
720 m_atLineStart = false;
721 goto start;
722
723 startNumberWithZeroDigit:
724 shift1();
725 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
726 shift1();
727 goto inHex;
728 }
729 if (m_current == '.') {
730 record8('0');
731 record8('.');
732 shift1();
733 goto inNumberAfterDecimalPoint;
734 }
735 if ((m_current | 0x20) == 'e') {
736 record8('0');
737 record8('e');
738 shift1();
739 goto inExponentIndicator;
740 }
741 if (isASCIIOctalDigit(m_current))
742 goto inOctal;
743 if (isASCIIDigit(m_current))
744 goto startNumber;
745 lvalp->doubleValue = 0;
746 goto doneNumeric;
747
748 inNumberAfterDecimalPoint:
749 while (isASCIIDigit(m_current)) {
750 record8(m_current);
751 shift1();
752 }
753 if ((m_current | 0x20) == 'e') {
754 record8('e');
755 shift1();
756 goto inExponentIndicator;
757 }
758 goto doneNumber;
759
760 inExponentIndicator:
761 if (m_current == '+' || m_current == '-') {
762 record8(m_current);
763 shift1();
764 }
765 if (!isASCIIDigit(m_current))
766 goto returnError;
767 do {
768 record8(m_current);
769 shift1();
770 } while (isASCIIDigit(m_current));
771 goto doneNumber;
772
773 inOctal: {
774 do {
775 record8(m_current);
776 shift1();
777 } while (isASCIIOctalDigit(m_current));
778 if (isASCIIDigit(m_current))
779 goto startNumber;
780
781 double dval = 0;
782
783 const char* end = m_buffer8.end();
784 for (const char* p = m_buffer8.data(); p < end; ++p) {
785 dval *= 8;
786 dval += *p - '0';
787 }
788 if (dval >= mantissaOverflowLowerBound)
789 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
790
791 m_buffer8.resize(0);
792
793 lvalp->doubleValue = dval;
794 goto doneNumeric;
795 }
796
797 inHex: {
798 do {
799 record8(m_current);
800 shift1();
801 } while (isASCIIHexDigit(m_current));
802
803 double dval = 0;
804
805 const char* end = m_buffer8.end();
806 for (const char* p = m_buffer8.data(); p < end; ++p) {
807 dval *= 16;
808 dval += toASCIIHexValue(*p);
809 }
810 if (dval >= mantissaOverflowLowerBound)
811 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
812
813 m_buffer8.resize(0);
814
815 lvalp->doubleValue = dval;
816 goto doneNumeric;
817 }
818
819 startNumber:
820 record8(m_current);
821 shift1();
822 while (isASCIIDigit(m_current)) {
823 record8(m_current);
824 shift1();
825 }
826 if (m_current == '.') {
827 record8('.');
828 shift1();
829 goto inNumberAfterDecimalPoint;
830 }
831 if ((m_current | 0x20) == 'e') {
832 record8('e');
833 shift1();
834 goto inExponentIndicator;
835 }
836
837 // Fall through into doneNumber.
838
839 doneNumber:
840 // Null-terminate string for strtod.
841 m_buffer8.append('\0');
842 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
843 m_buffer8.resize(0);
844
845 // Fall through into doneNumeric.
846
847 doneNumeric:
848 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
849 if (UNLIKELY(isIdentStart(m_current)))
850 goto returnError;
851
852 m_atLineStart = false;
853 m_delimited = false;
854 token = NUMBER;
855 goto returnToken;
856
857 doneSemicolon:
858 token = ';';
859 m_delimited = true;
860 goto returnToken;
861
862 doneIdentifier:
863 m_atLineStart = false;
864 m_delimited = false;
865 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
866 m_buffer16.resize(0);
867 token = IDENT;
868 goto returnToken;
869
870 doneIdentifierOrKeyword: {
871 m_atLineStart = false;
872 m_delimited = false;
873 m_buffer16.resize(0);
874 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
875 token = entry ? entry->lexerValue() : IDENT;
876 goto returnToken;
877 }
878
879 doneString:
880 // Atomize constant strings in case they're later used in property lookup.
881 shift1();
882 m_atLineStart = false;
883 m_delimited = false;
884 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
885 m_buffer16.resize(0);
886 token = STRING;
887
888 // Fall through into returnToken.
889
890 returnToken: {
891 int lineNumber = m_lineNumber;
892 llocp->first_line = lineNumber;
893 llocp->last_line = lineNumber;
894 llocp->first_column = startOffset;
895 llocp->last_column = currentOffset();
896
897 m_lastToken = token;
898 return token;
899 }
900
901 returnError:
902 m_error = true;
903 return -1;
904 }
905
906 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
907 {
908 ASSERT(m_buffer16.isEmpty());
909
910 bool lastWasEscape = false;
911 bool inBrackets = false;
912
913 if (patternPrefix) {
914 ASSERT(!isLineTerminator(patternPrefix));
915 ASSERT(patternPrefix != '/');
916 ASSERT(patternPrefix != '[');
917 record16(patternPrefix);
918 }
919
920 while (true) {
921 int current = m_current;
922
923 if (isLineTerminator(current) || current == -1) {
924 m_buffer16.resize(0);
925 return false;
926 }
927
928 shift1();
929
930 if (current == '/' && !lastWasEscape && !inBrackets)
931 break;
932
933 record16(current);
934
935 if (lastWasEscape) {
936 lastWasEscape = false;
937 continue;
938 }
939
940 switch (current) {
941 case '[':
942 inBrackets = true;
943 break;
944 case ']':
945 inBrackets = false;
946 break;
947 case '\\':
948 lastWasEscape = true;
949 break;
950 }
951 }
952
953 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
954 m_buffer16.resize(0);
955
956 while (isIdentPart(m_current)) {
957 record16(m_current);
958 shift1();
959 }
960
961 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
962 m_buffer16.resize(0);
963
964 return true;
965 }
966
967 bool Lexer::skipRegExp()
968 {
969 bool lastWasEscape = false;
970 bool inBrackets = false;
971
972 while (true) {
973 int current = m_current;
974
975 if (isLineTerminator(current) || current == -1)
976 return false;
977
978 shift1();
979
980 if (current == '/' && !lastWasEscape && !inBrackets)
981 break;
982
983 if (lastWasEscape) {
984 lastWasEscape = false;
985 continue;
986 }
987
988 switch (current) {
989 case '[':
990 inBrackets = true;
991 break;
992 case ']':
993 inBrackets = false;
994 break;
995 case '\\':
996 lastWasEscape = true;
997 break;
998 }
999 }
1000
1001 while (isIdentPart(m_current))
1002 shift1();
1003
1004 return true;
1005 }
1006
1007 void Lexer::clear()
1008 {
1009 m_arena = 0;
1010 m_codeWithoutBOMs.clear();
1011
1012 Vector<char> newBuffer8;
1013 m_buffer8.swap(newBuffer8);
1014
1015 Vector<UChar> newBuffer16;
1016 m_buffer16.swap(newBuffer16);
1017
1018 m_isReparsing = false;
1019 }
1020
1021 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1022 {
1023 if (m_codeWithoutBOMs.isEmpty())
1024 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1025
1026 const UChar* data = m_source->provider()->data();
1027
1028 ASSERT(openBrace < closeBrace);
1029 int i;
1030 for (i = m_source->startOffset(); i < openBrace; ++i) {
1031 if (data[i] == byteOrderMark) {
1032 openBrace++;
1033 closeBrace++;
1034 }
1035 }
1036 for (; i < closeBrace; ++i) {
1037 if (data[i] == byteOrderMark)
1038 closeBrace++;
1039 }
1040
1041 ASSERT(openBrace < closeBrace);
1042
1043 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1044 }
1045
1046 } // namespace JSC