2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSFunction.h"
29 #include "JSGlobalObjectFunctions.h"
30 #include "Identifier.h"
37 #include <wtf/Assertions.h>
40 using namespace Unicode
;
43 #include "KeywordLookup.h"
45 #include "Lexer.lut.h"
51 // Types for the main switch
53 // The first three types are fixed, and also used for identifying
54 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
55 CharacterIdentifierStart
,
60 CharacterLineTerminator
,
61 CharacterExclamationMark
,
65 CharacterCloseBracket
,
89 // Other types (only one so far)
94 static const unsigned short typesOfASCIICharacters
[128] = {
95 /* 0 - Null */ CharacterInvalid
,
96 /* 1 - Start of Heading */ CharacterInvalid
,
97 /* 2 - Start of Text */ CharacterInvalid
,
98 /* 3 - End of Text */ CharacterInvalid
,
99 /* 4 - End of Transm. */ CharacterInvalid
,
100 /* 5 - Enquiry */ CharacterInvalid
,
101 /* 6 - Acknowledgment */ CharacterInvalid
,
102 /* 7 - Bell */ CharacterInvalid
,
103 /* 8 - Back Space */ CharacterInvalid
,
104 /* 9 - Horizontal Tab */ CharacterWhiteSpace
,
105 /* 10 - Line Feed */ CharacterLineTerminator
,
106 /* 11 - Vertical Tab */ CharacterWhiteSpace
,
107 /* 12 - Form Feed */ CharacterWhiteSpace
,
108 /* 13 - Carriage Return */ CharacterLineTerminator
,
109 /* 14 - Shift Out */ CharacterInvalid
,
110 /* 15 - Shift In */ CharacterInvalid
,
111 /* 16 - Data Line Escape */ CharacterInvalid
,
112 /* 17 - Device Control 1 */ CharacterInvalid
,
113 /* 18 - Device Control 2 */ CharacterInvalid
,
114 /* 19 - Device Control 3 */ CharacterInvalid
,
115 /* 20 - Device Control 4 */ CharacterInvalid
,
116 /* 21 - Negative Ack. */ CharacterInvalid
,
117 /* 22 - Synchronous Idle */ CharacterInvalid
,
118 /* 23 - End of Transmit */ CharacterInvalid
,
119 /* 24 - Cancel */ CharacterInvalid
,
120 /* 25 - End of Medium */ CharacterInvalid
,
121 /* 26 - Substitute */ CharacterInvalid
,
122 /* 27 - Escape */ CharacterInvalid
,
123 /* 28 - File Separator */ CharacterInvalid
,
124 /* 29 - Group Separator */ CharacterInvalid
,
125 /* 30 - Record Separator */ CharacterInvalid
,
126 /* 31 - Unit Separator */ CharacterInvalid
,
127 /* 32 - Space */ CharacterWhiteSpace
,
128 /* 33 - ! */ CharacterExclamationMark
,
129 /* 34 - " */ CharacterQuote
,
130 /* 35 - # */ CharacterInvalid
,
131 /* 36 - $ */ CharacterIdentifierStart
,
132 /* 37 - % */ CharacterModulo
,
133 /* 38 - & */ CharacterAnd
,
134 /* 39 - ' */ CharacterQuote
,
135 /* 40 - ( */ CharacterOpenParen
,
136 /* 41 - ) */ CharacterCloseParen
,
137 /* 42 - * */ CharacterMultiply
,
138 /* 43 - + */ CharacterAdd
,
139 /* 44 - , */ CharacterComma
,
140 /* 45 - - */ CharacterSub
,
141 /* 46 - . */ CharacterDot
,
142 /* 47 - / */ CharacterSlash
,
143 /* 48 - 0 */ CharacterZero
,
144 /* 49 - 1 */ CharacterNumber
,
145 /* 50 - 2 */ CharacterNumber
,
146 /* 51 - 3 */ CharacterNumber
,
147 /* 52 - 4 */ CharacterNumber
,
148 /* 53 - 5 */ CharacterNumber
,
149 /* 54 - 6 */ CharacterNumber
,
150 /* 55 - 7 */ CharacterNumber
,
151 /* 56 - 8 */ CharacterNumber
,
152 /* 57 - 9 */ CharacterNumber
,
153 /* 58 - : */ CharacterColon
,
154 /* 59 - ; */ CharacterSemicolon
,
155 /* 60 - < */ CharacterLess
,
156 /* 61 - = */ CharacterEqual
,
157 /* 62 - > */ CharacterGreater
,
158 /* 63 - ? */ CharacterQuestion
,
159 /* 64 - @ */ CharacterInvalid
,
160 /* 65 - A */ CharacterIdentifierStart
,
161 /* 66 - B */ CharacterIdentifierStart
,
162 /* 67 - C */ CharacterIdentifierStart
,
163 /* 68 - D */ CharacterIdentifierStart
,
164 /* 69 - E */ CharacterIdentifierStart
,
165 /* 70 - F */ CharacterIdentifierStart
,
166 /* 71 - G */ CharacterIdentifierStart
,
167 /* 72 - H */ CharacterIdentifierStart
,
168 /* 73 - I */ CharacterIdentifierStart
,
169 /* 74 - J */ CharacterIdentifierStart
,
170 /* 75 - K */ CharacterIdentifierStart
,
171 /* 76 - L */ CharacterIdentifierStart
,
172 /* 77 - M */ CharacterIdentifierStart
,
173 /* 78 - N */ CharacterIdentifierStart
,
174 /* 79 - O */ CharacterIdentifierStart
,
175 /* 80 - P */ CharacterIdentifierStart
,
176 /* 81 - Q */ CharacterIdentifierStart
,
177 /* 82 - R */ CharacterIdentifierStart
,
178 /* 83 - S */ CharacterIdentifierStart
,
179 /* 84 - T */ CharacterIdentifierStart
,
180 /* 85 - U */ CharacterIdentifierStart
,
181 /* 86 - V */ CharacterIdentifierStart
,
182 /* 87 - W */ CharacterIdentifierStart
,
183 /* 88 - X */ CharacterIdentifierStart
,
184 /* 89 - Y */ CharacterIdentifierStart
,
185 /* 90 - Z */ CharacterIdentifierStart
,
186 /* 91 - [ */ CharacterOpenBracket
,
187 /* 92 - \ */ CharacterBackSlash
,
188 /* 93 - ] */ CharacterCloseBracket
,
189 /* 94 - ^ */ CharacterXor
,
190 /* 95 - _ */ CharacterIdentifierStart
,
191 /* 96 - ` */ CharacterInvalid
,
192 /* 97 - a */ CharacterIdentifierStart
,
193 /* 98 - b */ CharacterIdentifierStart
,
194 /* 99 - c */ CharacterIdentifierStart
,
195 /* 100 - d */ CharacterIdentifierStart
,
196 /* 101 - e */ CharacterIdentifierStart
,
197 /* 102 - f */ CharacterIdentifierStart
,
198 /* 103 - g */ CharacterIdentifierStart
,
199 /* 104 - h */ CharacterIdentifierStart
,
200 /* 105 - i */ CharacterIdentifierStart
,
201 /* 106 - j */ CharacterIdentifierStart
,
202 /* 107 - k */ CharacterIdentifierStart
,
203 /* 108 - l */ CharacterIdentifierStart
,
204 /* 109 - m */ CharacterIdentifierStart
,
205 /* 110 - n */ CharacterIdentifierStart
,
206 /* 111 - o */ CharacterIdentifierStart
,
207 /* 112 - p */ CharacterIdentifierStart
,
208 /* 113 - q */ CharacterIdentifierStart
,
209 /* 114 - r */ CharacterIdentifierStart
,
210 /* 115 - s */ CharacterIdentifierStart
,
211 /* 116 - t */ CharacterIdentifierStart
,
212 /* 117 - u */ CharacterIdentifierStart
,
213 /* 118 - v */ CharacterIdentifierStart
,
214 /* 119 - w */ CharacterIdentifierStart
,
215 /* 120 - x */ CharacterIdentifierStart
,
216 /* 121 - y */ CharacterIdentifierStart
,
217 /* 122 - z */ CharacterIdentifierStart
,
218 /* 123 - { */ CharacterOpenBrace
,
219 /* 124 - | */ CharacterOr
,
220 /* 125 - } */ CharacterCloseBrace
,
221 /* 126 - ~ */ CharacterTilde
,
222 /* 127 - Delete */ CharacterInvalid
,
225 Lexer::Lexer(JSGlobalData
* globalData
)
226 : m_isReparsing(false)
227 , m_globalData(globalData
)
228 , m_keywordTable(JSC::mainTable
)
234 m_keywordTable
.deleteTable();
237 ALWAYS_INLINE
const UChar
* Lexer::currentCharacter() const
239 ASSERT(m_code
<= m_codeEnd
);
243 ALWAYS_INLINE
int Lexer::currentOffset() const
245 return currentCharacter() - m_codeStart
;
248 void Lexer::setCode(const SourceCode
& source
, ParserArena
& arena
)
250 m_arena
= &arena
.identifierArena();
252 m_lineNumber
= source
.firstLine();
256 const UChar
* data
= source
.provider()->data();
260 m_code
= data
+ source
.startOffset();
261 m_codeEnd
= data
+ source
.endOffset();
263 m_atLineStart
= true;
265 m_buffer8
.reserveInitialCapacity(initialReadBufferCapacity
);
266 m_buffer16
.reserveInitialCapacity((m_codeEnd
- m_code
) / 2);
268 if (LIKELY(m_code
< m_codeEnd
))
272 ASSERT(currentOffset() == source
.startOffset());
275 template <int shiftAmount
, Lexer::ShiftType shouldBoundsCheck
> ALWAYS_INLINE
void Lexer::internalShift()
277 if (shouldBoundsCheck
== DoBoundsCheck
) {
278 // Faster than an if-else sequence
279 ASSERT(m_current
!= -1);
281 m_code
+= shiftAmount
;
282 if (LIKELY(m_code
< m_codeEnd
))
285 m_code
+= shiftAmount
;
290 ALWAYS_INLINE
void Lexer::shift()
292 internalShift
<1, DoBoundsCheck
>();
295 ALWAYS_INLINE
int Lexer::peek(int offset
)
297 // Only use if necessary
298 ASSERT(offset
> 0 && offset
< 5);
299 const UChar
* code
= m_code
+ offset
;
300 return (code
< m_codeEnd
) ? *code
: -1;
303 int Lexer::getUnicodeCharacter()
309 if (UNLIKELY(!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(char1
) || !isASCIIHexDigit(char2
) || !isASCIIHexDigit(char3
)))
312 int result
= convertUnicode(m_current
, char1
, char2
, char3
);
320 void Lexer::shiftLineTerminator()
322 ASSERT(isLineTerminator(m_current
));
324 int m_prev
= m_current
;
327 // Allow both CRLF and LFCR.
328 if (m_prev
+ m_current
== '\n' + '\r')
334 ALWAYS_INLINE
bool Lexer::lastTokenWasRestrKeyword() const
336 return m_lastToken
== CONTINUE
|| m_lastToken
== BREAK
|| m_lastToken
== RETURN
|| m_lastToken
== THROW
;
339 static NEVER_INLINE
bool isNonASCIIIdentStart(int c
)
341 return category(c
) & (Letter_Uppercase
| Letter_Lowercase
| Letter_Titlecase
| Letter_Modifier
| Letter_Other
);
344 static inline bool isIdentStart(int c
)
346 return isASCII(c
) ? typesOfASCIICharacters
[c
] == CharacterIdentifierStart
: isNonASCIIIdentStart(c
);
349 static NEVER_INLINE
bool isNonASCIIIdentPart(int c
)
351 return category(c
) & (Letter_Uppercase
| Letter_Lowercase
| Letter_Titlecase
| Letter_Modifier
| Letter_Other
352 | Mark_NonSpacing
| Mark_SpacingCombining
| Number_DecimalDigit
| Punctuation_Connector
);
355 static ALWAYS_INLINE
bool isIdentPart(int c
)
357 // Character types are divided into two groups depending on whether they can be part of an
358 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
359 // part of an identifier. (See the CharacterType definition for more details.)
360 return isASCII(c
) ? typesOfASCIICharacters
[c
] <= CharacterNumber
: isNonASCIIIdentPart(c
);
363 static inline int singleEscape(int c
)
389 inline void Lexer::record8(int c
)
393 m_buffer8
.append(static_cast<char>(c
));
396 inline void Lexer::record16(UChar c
)
398 m_buffer16
.append(c
);
401 inline void Lexer::record16(int c
)
404 ASSERT(c
<= USHRT_MAX
);
405 record16(UChar(static_cast<unsigned short>(c
)));
408 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
Lexer::parseIdentifier(JSTokenData
* tokenData
, unsigned lexType
)
410 const ptrdiff_t remaining
= m_codeEnd
- m_code
;
411 if ((remaining
>= maxTokenLength
) && !(lexType
& IgnoreReservedWords
)) {
412 JSTokenType keyword
= parseKeyword
<shouldCreateIdentifier
>(tokenData
);
413 if (keyword
!= IDENT
) {
414 ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
);
418 const UChar
* identifierStart
= currentCharacter();
419 bool bufferRequired
= false;
422 if (LIKELY(isIdentPart(m_current
))) {
426 if (LIKELY(m_current
!= '\\'))
429 // \uXXXX unicode characters.
430 bufferRequired
= true;
431 if (identifierStart
!= currentCharacter())
432 m_buffer16
.append(identifierStart
, currentCharacter() - identifierStart
);
434 if (UNLIKELY(m_current
!= 'u'))
437 int character
= getUnicodeCharacter();
438 if (UNLIKELY(character
== -1))
440 if (UNLIKELY(m_buffer16
.size() ? !isIdentPart(character
) : !isIdentStart(character
)))
442 if (shouldCreateIdentifier
)
444 identifierStart
= currentCharacter();
447 int identifierLength
;
448 const Identifier
* ident
= 0;
449 if (shouldCreateIdentifier
) {
451 identifierLength
= currentCharacter() - identifierStart
;
453 if (identifierStart
!= currentCharacter())
454 m_buffer16
.append(identifierStart
, currentCharacter() - identifierStart
);
455 identifierStart
= m_buffer16
.data();
456 identifierLength
= m_buffer16
.size();
459 ident
= makeIdentifier(identifierStart
, identifierLength
);
460 tokenData
->ident
= ident
;
462 tokenData
->ident
= 0;
466 if (LIKELY(!bufferRequired
&& !(lexType
& IgnoreReservedWords
))) {
467 ASSERT(shouldCreateIdentifier
);
468 // Keywords must not be recognized if there was an \uXXXX in the identifier.
469 if (remaining
< maxTokenLength
) {
470 const HashEntry
* entry
= m_keywordTable
.entry(m_globalData
, *ident
);
471 ASSERT((remaining
< maxTokenLength
) || !entry
);
472 return entry
? static_cast<JSTokenType
>(entry
->lexerValue()) : IDENT
;
477 m_buffer16
.resize(0);
481 bool Lexer::isKeyword(const Identifier
& ident
)
483 return m_keywordTable
.entry(m_globalData
, ident
);
486 template <bool shouldBuildStrings
> ALWAYS_INLINE
bool Lexer::parseString(JSTokenData
* tokenData
, bool strictMode
)
488 int stringQuoteCharacter
= m_current
;
491 const UChar
* stringStart
= currentCharacter();
493 while (m_current
!= stringQuoteCharacter
) {
494 if (UNLIKELY(m_current
== '\\')) {
495 if (stringStart
!= currentCharacter() && shouldBuildStrings
)
496 m_buffer16
.append(stringStart
, currentCharacter() - stringStart
);
499 int escape
= singleEscape(m_current
);
501 // Most common escape sequences first
503 if (shouldBuildStrings
)
506 } else if (UNLIKELY(isLineTerminator(m_current
)))
507 shiftLineTerminator();
508 else if (m_current
== 'x') {
510 if (isASCIIHexDigit(m_current
) && isASCIIHexDigit(peek(1))) {
511 int prev
= m_current
;
513 if (shouldBuildStrings
)
514 record16(convertHex(prev
, m_current
));
516 } else if (shouldBuildStrings
)
518 } else if (m_current
== 'u') {
520 int character
= getUnicodeCharacter();
521 if (character
!= -1) {
522 if (shouldBuildStrings
)
524 } else if (m_current
== stringQuoteCharacter
) {
525 if (shouldBuildStrings
)
527 } else // Only stringQuoteCharacter allowed after \u
529 } else if (strictMode
&& isASCIIDigit(m_current
)) {
530 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
531 int character1
= m_current
;
533 if (character1
!= '0' || isASCIIDigit(m_current
))
535 if (shouldBuildStrings
)
537 } else if (!strictMode
&& isASCIIOctalDigit(m_current
)) {
538 // Octal character sequences
539 int character1
= m_current
;
541 if (isASCIIOctalDigit(m_current
)) {
542 // Two octal characters
543 int character2
= m_current
;
545 if (character1
>= '0' && character1
<= '3' && isASCIIOctalDigit(m_current
)) {
546 if (shouldBuildStrings
)
547 record16((character1
- '0') * 64 + (character2
- '0') * 8 + m_current
- '0');
550 if (shouldBuildStrings
)
551 record16((character1
- '0') * 8 + character2
- '0');
554 if (shouldBuildStrings
)
555 record16(character1
- '0');
557 } else if (m_current
!= -1) {
558 if (shouldBuildStrings
)
564 stringStart
= currentCharacter();
567 // Fast check for characters that require special handling.
568 // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
569 // as possible, and lets through all common ASCII characters.
570 if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) {
571 // New-line or end of input is not allowed
572 if (UNLIKELY(isLineTerminator(m_current
)) || UNLIKELY(m_current
== -1))
574 // Anything else is just a normal character
579 if (currentCharacter() != stringStart
&& shouldBuildStrings
)
580 m_buffer16
.append(stringStart
, currentCharacter() - stringStart
);
581 if (shouldBuildStrings
)
582 tokenData
->ident
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
584 tokenData
->ident
= 0;
586 m_buffer16
.resize(0);
590 ALWAYS_INLINE
void Lexer::parseHex(double& returnValue
)
592 // Optimization: most hexadecimal values fit into 4 bytes.
593 uint32_t hexValue
= 0;
594 int maximumDigits
= 7;
596 // Shift out the 'x' prefix.
600 hexValue
= (hexValue
<< 4) + toASCIIHexValue(m_current
);
603 } while (isASCIIHexDigit(m_current
) && maximumDigits
>= 0);
605 if (maximumDigits
>= 0) {
606 returnValue
= hexValue
;
610 // No more place in the hexValue buffer.
611 // The values are shifted out and placed into the m_buffer8 vector.
612 for (int i
= 0; i
< 8; ++i
) {
613 int digit
= hexValue
>> 28;
615 record8(digit
+ '0');
617 record8(digit
- 10 + 'a');
621 while (isASCIIHexDigit(m_current
)) {
626 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 16);
629 ALWAYS_INLINE
bool Lexer::parseOctal(double& returnValue
)
631 // Optimization: most octal values fit into 4 bytes.
632 uint32_t octalValue
= 0;
633 int maximumDigits
= 9;
634 // Temporary buffer for the digits. Makes easier
635 // to reconstruct the input characters when needed.
639 octalValue
= octalValue
* 8 + (m_current
- '0');
640 digits
[maximumDigits
] = m_current
;
643 } while (isASCIIOctalDigit(m_current
) && maximumDigits
>= 0);
645 if (!isASCIIDigit(m_current
) && maximumDigits
>= 0) {
646 returnValue
= octalValue
;
650 for (int i
= 9; i
> maximumDigits
; --i
)
653 while (isASCIIOctalDigit(m_current
)) {
658 if (isASCIIDigit(m_current
))
661 returnValue
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 8);
665 ALWAYS_INLINE
bool Lexer::parseDecimal(double& returnValue
)
667 // Optimization: most decimal values fit into 4 bytes.
668 uint32_t decimalValue
= 0;
670 // Since parseOctal may be executed before parseDecimal,
671 // the m_buffer8 may hold ascii digits.
672 if (!m_buffer8
.size()) {
673 int maximumDigits
= 9;
674 // Temporary buffer for the digits. Makes easier
675 // to reconstruct the input characters when needed.
679 decimalValue
= decimalValue
* 10 + (m_current
- '0');
680 digits
[maximumDigits
] = m_current
;
683 } while (isASCIIDigit(m_current
) && maximumDigits
>= 0);
685 if (maximumDigits
>= 0 && m_current
!= '.' && (m_current
| 0x20) != 'e') {
686 returnValue
= decimalValue
;
690 for (int i
= 9; i
> maximumDigits
; --i
)
694 while (isASCIIDigit(m_current
)) {
702 ALWAYS_INLINE
void Lexer::parseNumberAfterDecimalPoint()
705 while (isASCIIDigit(m_current
)) {
711 ALWAYS_INLINE
bool Lexer::parseNumberAfterExponentIndicator()
715 if (m_current
== '+' || m_current
== '-') {
720 if (!isASCIIDigit(m_current
))
726 } while (isASCIIDigit(m_current
));
730 ALWAYS_INLINE
bool Lexer::parseMultilineComment()
733 while (UNLIKELY(m_current
== '*')) {
735 if (m_current
== '/') {
741 if (UNLIKELY(m_current
== -1))
744 if (isLineTerminator(m_current
))
745 shiftLineTerminator();
751 bool Lexer::nextTokenIsColon()
753 const UChar
* code
= m_code
;
754 while (code
< m_codeEnd
&& (isWhiteSpace(*code
) || isLineTerminator(*code
)))
757 return code
< m_codeEnd
&& *code
== ':';
760 JSTokenType
Lexer::lex(JSTokenData
* tokenData
, JSTokenInfo
* tokenInfo
, unsigned lexType
, bool strictMode
)
763 ASSERT(m_buffer8
.isEmpty());
764 ASSERT(m_buffer16
.isEmpty());
766 JSTokenType token
= ERRORTOK
;
767 m_terminator
= false;
770 while (isWhiteSpace(m_current
))
773 int startOffset
= currentOffset();
775 if (UNLIKELY(m_current
== -1))
781 if (LIKELY(isASCII(m_current
)))
782 type
= static_cast<CharacterType
>(typesOfASCIICharacters
[m_current
]);
783 else if (isNonASCIIIdentStart(m_current
))
784 type
= CharacterIdentifierStart
;
785 else if (isLineTerminator(m_current
))
786 type
= CharacterLineTerminator
;
788 type
= CharacterInvalid
;
791 case CharacterGreater
:
793 if (m_current
== '>') {
795 if (m_current
== '>') {
797 if (m_current
== '=') {
799 token
= URSHIFTEQUAL
;
805 if (m_current
== '=') {
813 if (m_current
== '=') {
822 if (m_current
== '=') {
824 if (m_current
== '=') {
836 if (m_current
== '!' && peek(1) == '-' && peek(2) == '-') {
837 // <!-- marks the beginning of a line comment (for www usage)
838 goto inSingleLineComment
;
840 if (m_current
== '<') {
842 if (m_current
== '=') {
850 if (m_current
== '=') {
857 case CharacterExclamationMark
:
859 if (m_current
== '=') {
861 if (m_current
== '=') {
873 if (m_current
== '+') {
875 token
= (!m_terminator
) ? PLUSPLUS
: AUTOPLUSPLUS
;
878 if (m_current
== '=') {
887 if (m_current
== '-') {
889 if (m_atLineStart
&& m_current
== '>') {
891 goto inSingleLineComment
;
893 token
= (!m_terminator
) ? MINUSMINUS
: AUTOMINUSMINUS
;
896 if (m_current
== '=') {
903 case CharacterMultiply
:
905 if (m_current
== '=') {
914 if (m_current
== '/') {
916 goto inSingleLineComment
;
918 if (m_current
== '*') {
920 if (parseMultilineComment())
924 if (m_current
== '=') {
933 if (m_current
== '&') {
938 if (m_current
== '=') {
947 if (m_current
== '=') {
954 case CharacterModulo
:
956 if (m_current
== '=') {
965 if (m_current
== '=') {
970 if (m_current
== '|') {
977 case CharacterOpenParen
:
981 case CharacterCloseParen
:
985 case CharacterOpenBracket
:
989 case CharacterCloseBracket
:
990 token
= CLOSEBRACKET
;
1001 case CharacterQuestion
:
1005 case CharacterTilde
:
1009 case CharacterSemicolon
:
1014 case CharacterOpenBrace
:
1015 tokenData
->intValue
= currentOffset();
1019 case CharacterCloseBrace
:
1020 tokenData
->intValue
= currentOffset();
1027 if (!isASCIIDigit(m_current
)) {
1031 goto inNumberAfterDecimalPoint
;
1034 if ((m_current
| 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1035 parseHex(tokenData
->doubleValue
);
1039 if (isASCIIOctalDigit(m_current
)) {
1040 if (parseOctal(tokenData
->doubleValue
)) {
1047 // Fall through into CharacterNumber
1048 case CharacterNumber
:
1049 if (LIKELY(token
!= NUMBER
)) {
1050 if (!parseDecimal(tokenData
->doubleValue
)) {
1051 if (m_current
== '.') {
1053 inNumberAfterDecimalPoint
:
1054 parseNumberAfterDecimalPoint();
1056 if ((m_current
| 0x20) == 'e')
1057 if (!parseNumberAfterExponentIndicator())
1059 // Null-terminate string for strtod.
1060 m_buffer8
.append('\0');
1061 tokenData
->doubleValue
= WTF::strtod(m_buffer8
.data(), 0);
1066 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1067 if (UNLIKELY(isIdentStart(m_current
)))
1069 m_buffer8
.resize(0);
1070 m_delimited
= false;
1072 case CharacterQuote
:
1073 if (lexType
& DontBuildStrings
) {
1074 if (UNLIKELY(!parseString
<false>(tokenData
, strictMode
)))
1077 if (UNLIKELY(!parseString
<true>(tokenData
, strictMode
)))
1081 m_delimited
= false;
1084 case CharacterIdentifierStart
:
1085 ASSERT(isIdentStart(m_current
));
1086 // Fall through into CharacterBackSlash.
1087 case CharacterBackSlash
:
1088 if (lexType
& DontBuildKeywords
)
1089 token
= parseIdentifier
<false>(tokenData
, lexType
);
1091 token
= parseIdentifier
<true>(tokenData
, lexType
);
1093 case CharacterLineTerminator
:
1094 ASSERT(isLineTerminator(m_current
));
1095 shiftLineTerminator();
1096 m_atLineStart
= true;
1097 m_terminator
= true;
1099 case CharacterInvalid
:
1102 ASSERT_NOT_REACHED();
1106 m_atLineStart
= false;
1109 inSingleLineComment
:
1110 while (!isLineTerminator(m_current
)) {
1111 if (UNLIKELY(m_current
== -1))
1115 shiftLineTerminator();
1116 m_atLineStart
= true;
1117 m_terminator
= true;
1118 if (!lastTokenWasRestrKeyword())
1123 // Fall through into returnToken.
1126 tokenInfo
->line
= m_lineNumber
;
1127 tokenInfo
->startOffset
= startOffset
;
1128 tokenInfo
->endOffset
= currentOffset();
1129 m_lastToken
= token
;
1137 bool Lexer::scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
)
1139 ASSERT(m_buffer16
.isEmpty());
1141 bool lastWasEscape
= false;
1142 bool inBrackets
= false;
1144 if (patternPrefix
) {
1145 ASSERT(!isLineTerminator(patternPrefix
));
1146 ASSERT(patternPrefix
!= '/');
1147 ASSERT(patternPrefix
!= '[');
1148 record16(patternPrefix
);
1152 int current
= m_current
;
1154 if (isLineTerminator(current
) || current
== -1) {
1155 m_buffer16
.resize(0);
1161 if (current
== '/' && !lastWasEscape
&& !inBrackets
)
1166 if (lastWasEscape
) {
1167 lastWasEscape
= false;
1179 lastWasEscape
= true;
1184 pattern
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1185 m_buffer16
.resize(0);
1187 while (isIdentPart(m_current
)) {
1188 record16(m_current
);
1192 flags
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size());
1193 m_buffer16
.resize(0);
1198 bool Lexer::skipRegExp()
1200 bool lastWasEscape
= false;
1201 bool inBrackets
= false;
1204 int current
= m_current
;
1206 if (isLineTerminator(current
) || current
== -1)
1211 if (current
== '/' && !lastWasEscape
&& !inBrackets
)
1214 if (lastWasEscape
) {
1215 lastWasEscape
= false;
1227 lastWasEscape
= true;
1232 while (isIdentPart(m_current
))
1242 Vector
<char> newBuffer8
;
1243 m_buffer8
.swap(newBuffer8
);
1245 Vector
<UChar
> newBuffer16
;
1246 m_buffer16
.swap(newBuffer16
);
1248 m_isReparsing
= false;
1251 SourceCode
Lexer::sourceCode(int openBrace
, int closeBrace
, int firstLine
)
1253 ASSERT(m_source
->provider()->data()[openBrace
] == '{');
1254 ASSERT(m_source
->provider()->data()[closeBrace
] == '}');
1255 return SourceCode(m_source
->provider(), openBrace
, closeBrace
+ 1, firstLine
);