]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. | |
4 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | |
14957cd0 | 5 | * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu) |
9dae56ea A |
6 | * |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Library General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this library; see the file COPYING.LIB. If not, write to | |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
20 | * Boston, MA 02110-1301, USA. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "Lexer.h" | |
26 | ||
27 | #include "JSFunction.h" | |
14957cd0 | 28 | |
9dae56ea | 29 | #include "JSGlobalObjectFunctions.h" |
14957cd0 | 30 | #include "Identifier.h" |
9dae56ea A |
31 | #include "NodeInfo.h" |
32 | #include "Nodes.h" | |
33 | #include "dtoa.h" | |
34 | #include <ctype.h> | |
35 | #include <limits.h> | |
36 | #include <string.h> | |
9dae56ea | 37 | #include <wtf/Assertions.h> |
9dae56ea A |
38 | |
39 | using namespace WTF; | |
40 | using namespace Unicode; | |
41 | ||
14957cd0 A |
42 | #include "JSParser.h" |
43 | #include "KeywordLookup.h" | |
9dae56ea A |
44 | #include "Lookup.h" |
45 | #include "Lexer.lut.h" | |
46 | ||
9dae56ea A |
47 | namespace JSC { |
48 | ||
14957cd0 A |
49 | |
50 | enum CharacterType { | |
51 | // Types for the main switch | |
52 | ||
53 | // The first three types are fixed, and also used for identifying | |
54 | // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart). | |
55 | CharacterIdentifierStart, | |
56 | CharacterZero, | |
57 | CharacterNumber, | |
58 | ||
59 | CharacterInvalid, | |
60 | CharacterLineTerminator, | |
61 | CharacterExclamationMark, | |
62 | CharacterOpenParen, | |
63 | CharacterCloseParen, | |
64 | CharacterOpenBracket, | |
65 | CharacterCloseBracket, | |
66 | CharacterComma, | |
67 | CharacterColon, | |
68 | CharacterQuestion, | |
69 | CharacterTilde, | |
70 | CharacterQuote, | |
71 | CharacterDot, | |
72 | CharacterSlash, | |
73 | CharacterBackSlash, | |
74 | CharacterSemicolon, | |
75 | CharacterOpenBrace, | |
76 | CharacterCloseBrace, | |
77 | ||
78 | CharacterAdd, | |
79 | CharacterSub, | |
80 | CharacterMultiply, | |
81 | CharacterModulo, | |
82 | CharacterAnd, | |
83 | CharacterXor, | |
84 | CharacterOr, | |
85 | CharacterLess, | |
86 | CharacterGreater, | |
87 | CharacterEqual, | |
88 | ||
89 | // Other types (only one so far) | |
90 | CharacterWhiteSpace, | |
91 | }; | |
92 | ||
93 | // 128 ASCII codes | |
94 | static const unsigned short typesOfASCIICharacters[128] = { | |
95 | /* 0 - Null */ CharacterInvalid, | |
96 | /* 1 - Start of Heading */ CharacterInvalid, | |
97 | /* 2 - Start of Text */ CharacterInvalid, | |
98 | /* 3 - End of Text */ CharacterInvalid, | |
99 | /* 4 - End of Transm. */ CharacterInvalid, | |
100 | /* 5 - Enquiry */ CharacterInvalid, | |
101 | /* 6 - Acknowledgment */ CharacterInvalid, | |
102 | /* 7 - Bell */ CharacterInvalid, | |
103 | /* 8 - Back Space */ CharacterInvalid, | |
104 | /* 9 - Horizontal Tab */ CharacterWhiteSpace, | |
105 | /* 10 - Line Feed */ CharacterLineTerminator, | |
106 | /* 11 - Vertical Tab */ CharacterWhiteSpace, | |
107 | /* 12 - Form Feed */ CharacterWhiteSpace, | |
108 | /* 13 - Carriage Return */ CharacterLineTerminator, | |
109 | /* 14 - Shift Out */ CharacterInvalid, | |
110 | /* 15 - Shift In */ CharacterInvalid, | |
111 | /* 16 - Data Line Escape */ CharacterInvalid, | |
112 | /* 17 - Device Control 1 */ CharacterInvalid, | |
113 | /* 18 - Device Control 2 */ CharacterInvalid, | |
114 | /* 19 - Device Control 3 */ CharacterInvalid, | |
115 | /* 20 - Device Control 4 */ CharacterInvalid, | |
116 | /* 21 - Negative Ack. */ CharacterInvalid, | |
117 | /* 22 - Synchronous Idle */ CharacterInvalid, | |
118 | /* 23 - End of Transmit */ CharacterInvalid, | |
119 | /* 24 - Cancel */ CharacterInvalid, | |
120 | /* 25 - End of Medium */ CharacterInvalid, | |
121 | /* 26 - Substitute */ CharacterInvalid, | |
122 | /* 27 - Escape */ CharacterInvalid, | |
123 | /* 28 - File Separator */ CharacterInvalid, | |
124 | /* 29 - Group Separator */ CharacterInvalid, | |
125 | /* 30 - Record Separator */ CharacterInvalid, | |
126 | /* 31 - Unit Separator */ CharacterInvalid, | |
127 | /* 32 - Space */ CharacterWhiteSpace, | |
128 | /* 33 - ! */ CharacterExclamationMark, | |
129 | /* 34 - " */ CharacterQuote, | |
130 | /* 35 - # */ CharacterInvalid, | |
131 | /* 36 - $ */ CharacterIdentifierStart, | |
132 | /* 37 - % */ CharacterModulo, | |
133 | /* 38 - & */ CharacterAnd, | |
134 | /* 39 - ' */ CharacterQuote, | |
135 | /* 40 - ( */ CharacterOpenParen, | |
136 | /* 41 - ) */ CharacterCloseParen, | |
137 | /* 42 - * */ CharacterMultiply, | |
138 | /* 43 - + */ CharacterAdd, | |
139 | /* 44 - , */ CharacterComma, | |
140 | /* 45 - - */ CharacterSub, | |
141 | /* 46 - . */ CharacterDot, | |
142 | /* 47 - / */ CharacterSlash, | |
143 | /* 48 - 0 */ CharacterZero, | |
144 | /* 49 - 1 */ CharacterNumber, | |
145 | /* 50 - 2 */ CharacterNumber, | |
146 | /* 51 - 3 */ CharacterNumber, | |
147 | /* 52 - 4 */ CharacterNumber, | |
148 | /* 53 - 5 */ CharacterNumber, | |
149 | /* 54 - 6 */ CharacterNumber, | |
150 | /* 55 - 7 */ CharacterNumber, | |
151 | /* 56 - 8 */ CharacterNumber, | |
152 | /* 57 - 9 */ CharacterNumber, | |
153 | /* 58 - : */ CharacterColon, | |
154 | /* 59 - ; */ CharacterSemicolon, | |
155 | /* 60 - < */ CharacterLess, | |
156 | /* 61 - = */ CharacterEqual, | |
157 | /* 62 - > */ CharacterGreater, | |
158 | /* 63 - ? */ CharacterQuestion, | |
159 | /* 64 - @ */ CharacterInvalid, | |
160 | /* 65 - A */ CharacterIdentifierStart, | |
161 | /* 66 - B */ CharacterIdentifierStart, | |
162 | /* 67 - C */ CharacterIdentifierStart, | |
163 | /* 68 - D */ CharacterIdentifierStart, | |
164 | /* 69 - E */ CharacterIdentifierStart, | |
165 | /* 70 - F */ CharacterIdentifierStart, | |
166 | /* 71 - G */ CharacterIdentifierStart, | |
167 | /* 72 - H */ CharacterIdentifierStart, | |
168 | /* 73 - I */ CharacterIdentifierStart, | |
169 | /* 74 - J */ CharacterIdentifierStart, | |
170 | /* 75 - K */ CharacterIdentifierStart, | |
171 | /* 76 - L */ CharacterIdentifierStart, | |
172 | /* 77 - M */ CharacterIdentifierStart, | |
173 | /* 78 - N */ CharacterIdentifierStart, | |
174 | /* 79 - O */ CharacterIdentifierStart, | |
175 | /* 80 - P */ CharacterIdentifierStart, | |
176 | /* 81 - Q */ CharacterIdentifierStart, | |
177 | /* 82 - R */ CharacterIdentifierStart, | |
178 | /* 83 - S */ CharacterIdentifierStart, | |
179 | /* 84 - T */ CharacterIdentifierStart, | |
180 | /* 85 - U */ CharacterIdentifierStart, | |
181 | /* 86 - V */ CharacterIdentifierStart, | |
182 | /* 87 - W */ CharacterIdentifierStart, | |
183 | /* 88 - X */ CharacterIdentifierStart, | |
184 | /* 89 - Y */ CharacterIdentifierStart, | |
185 | /* 90 - Z */ CharacterIdentifierStart, | |
186 | /* 91 - [ */ CharacterOpenBracket, | |
187 | /* 92 - \ */ CharacterBackSlash, | |
188 | /* 93 - ] */ CharacterCloseBracket, | |
189 | /* 94 - ^ */ CharacterXor, | |
190 | /* 95 - _ */ CharacterIdentifierStart, | |
191 | /* 96 - ` */ CharacterInvalid, | |
192 | /* 97 - a */ CharacterIdentifierStart, | |
193 | /* 98 - b */ CharacterIdentifierStart, | |
194 | /* 99 - c */ CharacterIdentifierStart, | |
195 | /* 100 - d */ CharacterIdentifierStart, | |
196 | /* 101 - e */ CharacterIdentifierStart, | |
197 | /* 102 - f */ CharacterIdentifierStart, | |
198 | /* 103 - g */ CharacterIdentifierStart, | |
199 | /* 104 - h */ CharacterIdentifierStart, | |
200 | /* 105 - i */ CharacterIdentifierStart, | |
201 | /* 106 - j */ CharacterIdentifierStart, | |
202 | /* 107 - k */ CharacterIdentifierStart, | |
203 | /* 108 - l */ CharacterIdentifierStart, | |
204 | /* 109 - m */ CharacterIdentifierStart, | |
205 | /* 110 - n */ CharacterIdentifierStart, | |
206 | /* 111 - o */ CharacterIdentifierStart, | |
207 | /* 112 - p */ CharacterIdentifierStart, | |
208 | /* 113 - q */ CharacterIdentifierStart, | |
209 | /* 114 - r */ CharacterIdentifierStart, | |
210 | /* 115 - s */ CharacterIdentifierStart, | |
211 | /* 116 - t */ CharacterIdentifierStart, | |
212 | /* 117 - u */ CharacterIdentifierStart, | |
213 | /* 118 - v */ CharacterIdentifierStart, | |
214 | /* 119 - w */ CharacterIdentifierStart, | |
215 | /* 120 - x */ CharacterIdentifierStart, | |
216 | /* 121 - y */ CharacterIdentifierStart, | |
217 | /* 122 - z */ CharacterIdentifierStart, | |
218 | /* 123 - { */ CharacterOpenBrace, | |
219 | /* 124 - | */ CharacterOr, | |
220 | /* 125 - } */ CharacterCloseBrace, | |
221 | /* 126 - ~ */ CharacterTilde, | |
222 | /* 127 - Delete */ CharacterInvalid, | |
223 | }; | |
9dae56ea A |
224 | |
225 | Lexer::Lexer(JSGlobalData* globalData) | |
ba379fdc | 226 | : m_isReparsing(false) |
9dae56ea | 227 | , m_globalData(globalData) |
ba379fdc | 228 | , m_keywordTable(JSC::mainTable) |
9dae56ea | 229 | { |
9dae56ea A |
230 | } |
231 | ||
232 | Lexer::~Lexer() | |
233 | { | |
ba379fdc A |
234 | m_keywordTable.deleteTable(); |
235 | } | |
236 | ||
14957cd0 | 237 | ALWAYS_INLINE const UChar* Lexer::currentCharacter() const |
ba379fdc | 238 | { |
14957cd0 A |
239 | ASSERT(m_code <= m_codeEnd); |
240 | return m_code; | |
ba379fdc A |
241 | } |
242 | ||
14957cd0 | 243 | ALWAYS_INLINE int Lexer::currentOffset() const |
ba379fdc A |
244 | { |
245 | return currentCharacter() - m_codeStart; | |
246 | } | |
247 | ||
f9bf01c6 | 248 | void Lexer::setCode(const SourceCode& source, ParserArena& arena) |
9dae56ea | 249 | { |
f9bf01c6 A |
250 | m_arena = &arena.identifierArena(); |
251 | ||
ba379fdc | 252 | m_lineNumber = source.firstLine(); |
9dae56ea | 253 | m_delimited = false; |
9dae56ea A |
254 | m_lastToken = -1; |
255 | ||
ba379fdc A |
256 | const UChar* data = source.provider()->data(); |
257 | ||
9dae56ea | 258 | m_source = &source; |
ba379fdc A |
259 | m_codeStart = data; |
260 | m_code = data + source.startOffset(); | |
261 | m_codeEnd = data + source.endOffset(); | |
9dae56ea A |
262 | m_error = false; |
263 | m_atLineStart = true; | |
264 | ||
4e4e5a6f A |
265 | m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); |
266 | m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2); | |
267 | ||
14957cd0 A |
268 | if (LIKELY(m_code < m_codeEnd)) |
269 | m_current = *m_code; | |
270 | else | |
271 | m_current = -1; | |
ba379fdc | 272 | ASSERT(currentOffset() == source.startOffset()); |
9dae56ea A |
273 | } |
274 | ||
14957cd0 | 275 | template <int shiftAmount, Lexer::ShiftType shouldBoundsCheck> ALWAYS_INLINE void Lexer::internalShift() |
9dae56ea | 276 | { |
14957cd0 A |
277 | if (shouldBoundsCheck == DoBoundsCheck) { |
278 | // Faster than an if-else sequence | |
279 | ASSERT(m_current != -1); | |
280 | m_current = -1; | |
281 | m_code += shiftAmount; | |
282 | if (LIKELY(m_code < m_codeEnd)) | |
283 | m_current = *m_code; | |
284 | } else { | |
285 | m_code += shiftAmount; | |
286 | m_current = *m_code; | |
ba379fdc | 287 | } |
14957cd0 A |
288 | } |
289 | ||
290 | ALWAYS_INLINE void Lexer::shift() | |
291 | { | |
292 | internalShift<1, DoBoundsCheck>(); | |
293 | } | |
294 | ||
295 | ALWAYS_INLINE int Lexer::peek(int offset) | |
296 | { | |
297 | // Only use if necessary | |
298 | ASSERT(offset > 0 && offset < 5); | |
299 | const UChar* code = m_code + offset; | |
300 | return (code < m_codeEnd) ? *code : -1; | |
301 | } | |
302 | ||
303 | int Lexer::getUnicodeCharacter() | |
304 | { | |
305 | int char1 = peek(1); | |
306 | int char2 = peek(2); | |
307 | int char3 = peek(3); | |
308 | ||
309 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3))) | |
310 | return -1; | |
311 | ||
312 | int result = convertUnicode(m_current, char1, char2, char3); | |
313 | shift(); | |
314 | shift(); | |
315 | shift(); | |
316 | shift(); | |
317 | return result; | |
ba379fdc A |
318 | } |
319 | ||
320 | void Lexer::shiftLineTerminator() | |
321 | { | |
322 | ASSERT(isLineTerminator(m_current)); | |
323 | ||
14957cd0 A |
324 | int m_prev = m_current; |
325 | shift(); | |
326 | ||
ba379fdc | 327 | // Allow both CRLF and LFCR. |
14957cd0 A |
328 | if (m_prev + m_current == '\n' + '\r') |
329 | shift(); | |
ba379fdc A |
330 | |
331 | ++m_lineNumber; | |
332 | } | |
333 | ||
14957cd0 | 334 | ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const |
ba379fdc A |
335 | { |
336 | return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; | |
337 | } | |
338 | ||
339 | static NEVER_INLINE bool isNonASCIIIdentStart(int c) | |
340 | { | |
341 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); | |
342 | } | |
343 | ||
344 | static inline bool isIdentStart(int c) | |
345 | { | |
14957cd0 | 346 | return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c); |
ba379fdc A |
347 | } |
348 | ||
349 | static NEVER_INLINE bool isNonASCIIIdentPart(int c) | |
350 | { | |
351 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | |
352 | | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); | |
353 | } | |
354 | ||
14957cd0 | 355 | static ALWAYS_INLINE bool isIdentPart(int c) |
ba379fdc | 356 | { |
14957cd0 A |
357 | // Character types are divided into two groups depending on whether they can be part of an |
358 | // identifier or not. Those whose type value is less or equal than CharacterNumber can be | |
359 | // part of an identifier. (See the CharacterType definition for more details.) | |
360 | return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c); | |
ba379fdc A |
361 | } |
362 | ||
363 | static inline int singleEscape(int c) | |
364 | { | |
365 | switch (c) { | |
14957cd0 A |
366 | case 'b': |
367 | return 0x08; | |
368 | case 't': | |
369 | return 0x09; | |
370 | case 'n': | |
371 | return 0x0A; | |
372 | case 'v': | |
373 | return 0x0B; | |
374 | case 'f': | |
375 | return 0x0C; | |
376 | case 'r': | |
377 | return 0x0D; | |
378 | case '\\': | |
379 | return '\\'; | |
380 | case '\'': | |
381 | return '\''; | |
382 | case '"': | |
383 | return '"'; | |
384 | default: | |
385 | return 0; | |
9dae56ea A |
386 | } |
387 | } | |
388 | ||
ba379fdc | 389 | inline void Lexer::record8(int c) |
9dae56ea | 390 | { |
ba379fdc A |
391 | ASSERT(c >= 0); |
392 | ASSERT(c <= 0xFF); | |
393 | m_buffer8.append(static_cast<char>(c)); | |
9dae56ea A |
394 | } |
395 | ||
ba379fdc | 396 | inline void Lexer::record16(UChar c) |
9dae56ea | 397 | { |
ba379fdc A |
398 | m_buffer16.append(c); |
399 | } | |
400 | ||
401 | inline void Lexer::record16(int c) | |
402 | { | |
403 | ASSERT(c >= 0); | |
404 | ASSERT(c <= USHRT_MAX); | |
405 | record16(UChar(static_cast<unsigned short>(c))); | |
9dae56ea A |
406 | } |
407 | ||
14957cd0 A |
408 | template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* tokenData, unsigned lexType) |
409 | { | |
410 | const ptrdiff_t remaining = m_codeEnd - m_code; | |
411 | if ((remaining >= maxTokenLength) && !(lexType & IgnoreReservedWords)) { | |
412 | JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData); | |
413 | if (keyword != IDENT) { | |
414 | ASSERT((!shouldCreateIdentifier) || tokenData->ident); | |
415 | return keyword; | |
416 | } | |
417 | } | |
418 | const UChar* identifierStart = currentCharacter(); | |
419 | bool bufferRequired = false; | |
420 | ||
421 | while (true) { | |
422 | if (LIKELY(isIdentPart(m_current))) { | |
423 | shift(); | |
424 | continue; | |
425 | } | |
426 | if (LIKELY(m_current != '\\')) | |
427 | break; | |
428 | ||
429 | // \uXXXX unicode characters. | |
430 | bufferRequired = true; | |
431 | if (identifierStart != currentCharacter()) | |
432 | m_buffer16.append(identifierStart, currentCharacter() - identifierStart); | |
433 | shift(); | |
434 | if (UNLIKELY(m_current != 'u')) | |
435 | return ERRORTOK; | |
436 | shift(); | |
437 | int character = getUnicodeCharacter(); | |
438 | if (UNLIKELY(character == -1)) | |
439 | return ERRORTOK; | |
440 | if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character))) | |
441 | return ERRORTOK; | |
442 | if (shouldCreateIdentifier) | |
443 | record16(character); | |
444 | identifierStart = currentCharacter(); | |
445 | } | |
446 | ||
447 | int identifierLength; | |
448 | const Identifier* ident = 0; | |
449 | if (shouldCreateIdentifier) { | |
450 | if (!bufferRequired) | |
451 | identifierLength = currentCharacter() - identifierStart; | |
452 | else { | |
453 | if (identifierStart != currentCharacter()) | |
454 | m_buffer16.append(identifierStart, currentCharacter() - identifierStart); | |
455 | identifierStart = m_buffer16.data(); | |
456 | identifierLength = m_buffer16.size(); | |
457 | } | |
458 | ||
459 | ident = makeIdentifier(identifierStart, identifierLength); | |
460 | tokenData->ident = ident; | |
461 | } else | |
462 | tokenData->ident = 0; | |
463 | ||
464 | m_delimited = false; | |
465 | ||
466 | if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) { | |
467 | ASSERT(shouldCreateIdentifier); | |
468 | // Keywords must not be recognized if there was an \uXXXX in the identifier. | |
469 | if (remaining < maxTokenLength) { | |
470 | const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident); | |
471 | ASSERT((remaining < maxTokenLength) || !entry); | |
472 | return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT; | |
473 | } | |
474 | return IDENT; | |
475 | } | |
476 | ||
477 | m_buffer16.resize(0); | |
478 | return IDENT; | |
479 | } | |
480 | ||
481 | bool Lexer::isKeyword(const Identifier& ident) | |
482 | { | |
483 | return m_keywordTable.entry(m_globalData, ident); | |
484 | } | |
485 | ||
486 | template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* tokenData, bool strictMode) | |
487 | { | |
488 | int stringQuoteCharacter = m_current; | |
489 | shift(); | |
490 | ||
491 | const UChar* stringStart = currentCharacter(); | |
492 | ||
493 | while (m_current != stringQuoteCharacter) { | |
494 | if (UNLIKELY(m_current == '\\')) { | |
495 | if (stringStart != currentCharacter() && shouldBuildStrings) | |
496 | m_buffer16.append(stringStart, currentCharacter() - stringStart); | |
497 | shift(); | |
498 | ||
499 | int escape = singleEscape(m_current); | |
500 | ||
501 | // Most common escape sequences first | |
502 | if (escape) { | |
503 | if (shouldBuildStrings) | |
504 | record16(escape); | |
505 | shift(); | |
506 | } else if (UNLIKELY(isLineTerminator(m_current))) | |
507 | shiftLineTerminator(); | |
508 | else if (m_current == 'x') { | |
509 | shift(); | |
510 | if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) { | |
511 | int prev = m_current; | |
512 | shift(); | |
513 | if (shouldBuildStrings) | |
514 | record16(convertHex(prev, m_current)); | |
515 | shift(); | |
516 | } else if (shouldBuildStrings) | |
517 | record16('x'); | |
518 | } else if (m_current == 'u') { | |
519 | shift(); | |
520 | int character = getUnicodeCharacter(); | |
521 | if (character != -1) { | |
522 | if (shouldBuildStrings) | |
523 | record16(character); | |
524 | } else if (m_current == stringQuoteCharacter) { | |
525 | if (shouldBuildStrings) | |
526 | record16('u'); | |
527 | } else // Only stringQuoteCharacter allowed after \u | |
528 | return false; | |
529 | } else if (strictMode && isASCIIDigit(m_current)) { | |
530 | // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit. | |
531 | int character1 = m_current; | |
532 | shift(); | |
533 | if (character1 != '0' || isASCIIDigit(m_current)) | |
534 | return false; | |
535 | if (shouldBuildStrings) | |
536 | record16(0); | |
537 | } else if (!strictMode && isASCIIOctalDigit(m_current)) { | |
538 | // Octal character sequences | |
539 | int character1 = m_current; | |
540 | shift(); | |
541 | if (isASCIIOctalDigit(m_current)) { | |
542 | // Two octal characters | |
543 | int character2 = m_current; | |
544 | shift(); | |
545 | if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) { | |
546 | if (shouldBuildStrings) | |
547 | record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0'); | |
548 | shift(); | |
549 | } else { | |
550 | if (shouldBuildStrings) | |
551 | record16((character1 - '0') * 8 + character2 - '0'); | |
552 | } | |
553 | } else { | |
554 | if (shouldBuildStrings) | |
555 | record16(character1 - '0'); | |
556 | } | |
557 | } else if (m_current != -1) { | |
558 | if (shouldBuildStrings) | |
559 | record16(m_current); | |
560 | shift(); | |
561 | } else | |
562 | return false; | |
563 | ||
564 | stringStart = currentCharacter(); | |
565 | continue; | |
566 | } | |
567 | // Fast check for characters that require special handling. | |
568 | // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently | |
569 | // as possible, and lets through all common ASCII characters. | |
570 | if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { | |
571 | // New-line or end of input is not allowed | |
572 | if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1)) | |
573 | return false; | |
574 | // Anything else is just a normal character | |
575 | } | |
576 | shift(); | |
577 | } | |
578 | ||
579 | if (currentCharacter() != stringStart && shouldBuildStrings) | |
580 | m_buffer16.append(stringStart, currentCharacter() - stringStart); | |
581 | if (shouldBuildStrings) | |
582 | tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
583 | else | |
584 | tokenData->ident = 0; | |
585 | ||
586 | m_buffer16.resize(0); | |
587 | return true; | |
588 | } | |
589 | ||
590 | ALWAYS_INLINE void Lexer::parseHex(double& returnValue) | |
591 | { | |
592 | // Optimization: most hexadecimal values fit into 4 bytes. | |
593 | uint32_t hexValue = 0; | |
594 | int maximumDigits = 7; | |
595 | ||
596 | // Shift out the 'x' prefix. | |
597 | shift(); | |
598 | ||
599 | do { | |
600 | hexValue = (hexValue << 4) + toASCIIHexValue(m_current); | |
601 | shift(); | |
602 | --maximumDigits; | |
603 | } while (isASCIIHexDigit(m_current) && maximumDigits >= 0); | |
604 | ||
605 | if (maximumDigits >= 0) { | |
606 | returnValue = hexValue; | |
607 | return; | |
608 | } | |
609 | ||
610 | // No more place in the hexValue buffer. | |
611 | // The values are shifted out and placed into the m_buffer8 vector. | |
612 | for (int i = 0; i < 8; ++i) { | |
613 | int digit = hexValue >> 28; | |
614 | if (digit < 10) | |
615 | record8(digit + '0'); | |
616 | else | |
617 | record8(digit - 10 + 'a'); | |
618 | hexValue <<= 4; | |
619 | } | |
620 | ||
621 | while (isASCIIHexDigit(m_current)) { | |
622 | record8(m_current); | |
623 | shift(); | |
624 | } | |
625 | ||
626 | returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16); | |
627 | } | |
628 | ||
629 | ALWAYS_INLINE bool Lexer::parseOctal(double& returnValue) | |
630 | { | |
631 | // Optimization: most octal values fit into 4 bytes. | |
632 | uint32_t octalValue = 0; | |
633 | int maximumDigits = 9; | |
634 | // Temporary buffer for the digits. Makes easier | |
635 | // to reconstruct the input characters when needed. | |
636 | char digits[10]; | |
637 | ||
638 | do { | |
639 | octalValue = octalValue * 8 + (m_current - '0'); | |
640 | digits[maximumDigits] = m_current; | |
641 | shift(); | |
642 | --maximumDigits; | |
643 | } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0); | |
644 | ||
645 | if (!isASCIIDigit(m_current) && maximumDigits >= 0) { | |
646 | returnValue = octalValue; | |
647 | return true; | |
648 | } | |
649 | ||
650 | for (int i = 9; i > maximumDigits; --i) | |
651 | record8(digits[i]); | |
652 | ||
653 | while (isASCIIOctalDigit(m_current)) { | |
654 | record8(m_current); | |
655 | shift(); | |
656 | } | |
657 | ||
658 | if (isASCIIDigit(m_current)) | |
659 | return false; | |
660 | ||
661 | returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8); | |
662 | return true; | |
663 | } | |
664 | ||
665 | ALWAYS_INLINE bool Lexer::parseDecimal(double& returnValue) | |
666 | { | |
667 | // Optimization: most decimal values fit into 4 bytes. | |
668 | uint32_t decimalValue = 0; | |
669 | ||
670 | // Since parseOctal may be executed before parseDecimal, | |
671 | // the m_buffer8 may hold ascii digits. | |
672 | if (!m_buffer8.size()) { | |
673 | int maximumDigits = 9; | |
674 | // Temporary buffer for the digits. Makes easier | |
675 | // to reconstruct the input characters when needed. | |
676 | char digits[10]; | |
677 | ||
678 | do { | |
679 | decimalValue = decimalValue * 10 + (m_current - '0'); | |
680 | digits[maximumDigits] = m_current; | |
681 | shift(); | |
682 | --maximumDigits; | |
683 | } while (isASCIIDigit(m_current) && maximumDigits >= 0); | |
684 | ||
685 | if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') { | |
686 | returnValue = decimalValue; | |
687 | return true; | |
688 | } | |
689 | ||
690 | for (int i = 9; i > maximumDigits; --i) | |
691 | record8(digits[i]); | |
692 | } | |
693 | ||
694 | while (isASCIIDigit(m_current)) { | |
695 | record8(m_current); | |
696 | shift(); | |
697 | } | |
698 | ||
699 | return false; | |
700 | } | |
701 | ||
702 | ALWAYS_INLINE void Lexer::parseNumberAfterDecimalPoint() | |
703 | { | |
704 | record8('.'); | |
705 | while (isASCIIDigit(m_current)) { | |
706 | record8(m_current); | |
707 | shift(); | |
708 | } | |
709 | } | |
710 | ||
711 | ALWAYS_INLINE bool Lexer::parseNumberAfterExponentIndicator() | |
712 | { | |
713 | record8('e'); | |
714 | shift(); | |
715 | if (m_current == '+' || m_current == '-') { | |
716 | record8(m_current); | |
717 | shift(); | |
718 | } | |
719 | ||
720 | if (!isASCIIDigit(m_current)) | |
721 | return false; | |
722 | ||
723 | do { | |
724 | record8(m_current); | |
725 | shift(); | |
726 | } while (isASCIIDigit(m_current)); | |
727 | return true; | |
728 | } | |
729 | ||
730 | ALWAYS_INLINE bool Lexer::parseMultilineComment() | |
731 | { | |
732 | while (true) { | |
733 | while (UNLIKELY(m_current == '*')) { | |
734 | shift(); | |
735 | if (m_current == '/') { | |
736 | shift(); | |
737 | return true; | |
738 | } | |
739 | } | |
740 | ||
741 | if (UNLIKELY(m_current == -1)) | |
742 | return false; | |
743 | ||
744 | if (isLineTerminator(m_current)) | |
745 | shiftLineTerminator(); | |
746 | else | |
747 | shift(); | |
748 | } | |
749 | } | |
750 | ||
751 | bool Lexer::nextTokenIsColon() | |
752 | { | |
753 | const UChar* code = m_code; | |
754 | while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code))) | |
755 | code++; | |
756 | ||
757 | return code < m_codeEnd && *code == ':'; | |
758 | } | |
759 | ||
760 | JSTokenType Lexer::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexType, bool strictMode) | |
9dae56ea | 761 | { |
ba379fdc A |
762 | ASSERT(!m_error); |
763 | ASSERT(m_buffer8.isEmpty()); | |
764 | ASSERT(m_buffer16.isEmpty()); | |
765 | ||
14957cd0 | 766 | JSTokenType token = ERRORTOK; |
9dae56ea | 767 | m_terminator = false; |
ba379fdc A |
768 | |
769 | start: | |
770 | while (isWhiteSpace(m_current)) | |
14957cd0 | 771 | shift(); |
ba379fdc A |
772 | |
773 | int startOffset = currentOffset(); | |
774 | ||
14957cd0 A |
775 | if (UNLIKELY(m_current == -1)) |
776 | return EOFTOK; | |
ba379fdc A |
777 | |
778 | m_delimited = false; | |
14957cd0 A |
779 | |
780 | CharacterType type; | |
781 | if (LIKELY(isASCII(m_current))) | |
782 | type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]); | |
783 | else if (isNonASCIIIdentStart(m_current)) | |
784 | type = CharacterIdentifierStart; | |
785 | else if (isLineTerminator(m_current)) | |
786 | type = CharacterLineTerminator; | |
787 | else | |
788 | type = CharacterInvalid; | |
789 | ||
790 | switch (type) { | |
791 | case CharacterGreater: | |
792 | shift(); | |
793 | if (m_current == '>') { | |
794 | shift(); | |
795 | if (m_current == '>') { | |
796 | shift(); | |
797 | if (m_current == '=') { | |
798 | shift(); | |
ba379fdc A |
799 | token = URSHIFTEQUAL; |
800 | break; | |
9dae56ea | 801 | } |
ba379fdc | 802 | token = URSHIFT; |
9dae56ea | 803 | break; |
ba379fdc | 804 | } |
14957cd0 A |
805 | if (m_current == '=') { |
806 | shift(); | |
807 | token = RSHIFTEQUAL; | |
ba379fdc A |
808 | break; |
809 | } | |
14957cd0 | 810 | token = RSHIFT; |
ba379fdc | 811 | break; |
14957cd0 A |
812 | } |
813 | if (m_current == '=') { | |
814 | shift(); | |
815 | token = GE; | |
ba379fdc | 816 | break; |
14957cd0 A |
817 | } |
818 | token = GT; | |
819 | break; | |
820 | case CharacterEqual: | |
821 | shift(); | |
822 | if (m_current == '=') { | |
823 | shift(); | |
824 | if (m_current == '=') { | |
825 | shift(); | |
826 | token = STREQ; | |
9dae56ea | 827 | break; |
ba379fdc | 828 | } |
14957cd0 | 829 | token = EQEQ; |
ba379fdc | 830 | break; |
14957cd0 A |
831 | } |
832 | token = EQUAL; | |
833 | break; | |
834 | case CharacterLess: | |
835 | shift(); | |
836 | if (m_current == '!' && peek(1) == '-' && peek(2) == '-') { | |
837 | // <!-- marks the beginning of a line comment (for www usage) | |
838 | goto inSingleLineComment; | |
839 | } | |
840 | if (m_current == '<') { | |
841 | shift(); | |
842 | if (m_current == '=') { | |
843 | shift(); | |
844 | token = LSHIFTEQUAL; | |
9dae56ea | 845 | break; |
ba379fdc | 846 | } |
14957cd0 | 847 | token = LSHIFT; |
ba379fdc | 848 | break; |
14957cd0 A |
849 | } |
850 | if (m_current == '=') { | |
851 | shift(); | |
852 | token = LE; | |
ba379fdc | 853 | break; |
14957cd0 A |
854 | } |
855 | token = LT; | |
856 | break; | |
857 | case CharacterExclamationMark: | |
858 | shift(); | |
859 | if (m_current == '=') { | |
860 | shift(); | |
861 | if (m_current == '=') { | |
862 | shift(); | |
863 | token = STRNEQ; | |
9dae56ea | 864 | break; |
ba379fdc | 865 | } |
14957cd0 | 866 | token = NE; |
ba379fdc | 867 | break; |
14957cd0 A |
868 | } |
869 | token = EXCLAMATION; | |
870 | break; | |
871 | case CharacterAdd: | |
872 | shift(); | |
873 | if (m_current == '+') { | |
874 | shift(); | |
875 | token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS; | |
876 | break; | |
877 | } | |
878 | if (m_current == '=') { | |
879 | shift(); | |
880 | token = PLUSEQUAL; | |
ba379fdc | 881 | break; |
14957cd0 A |
882 | } |
883 | token = PLUS; | |
884 | break; | |
885 | case CharacterSub: | |
886 | shift(); | |
887 | if (m_current == '-') { | |
888 | shift(); | |
889 | if (m_atLineStart && m_current == '>') { | |
890 | shift(); | |
ba379fdc A |
891 | goto inSingleLineComment; |
892 | } | |
14957cd0 | 893 | token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS; |
ba379fdc | 894 | break; |
14957cd0 A |
895 | } |
896 | if (m_current == '=') { | |
897 | shift(); | |
898 | token = MINUSEQUAL; | |
ba379fdc | 899 | break; |
14957cd0 A |
900 | } |
901 | token = MINUS; | |
902 | break; | |
903 | case CharacterMultiply: | |
904 | shift(); | |
905 | if (m_current == '=') { | |
906 | shift(); | |
907 | token = MULTEQUAL; | |
ba379fdc | 908 | break; |
14957cd0 A |
909 | } |
910 | token = TIMES; | |
911 | break; | |
912 | case CharacterSlash: | |
913 | shift(); | |
914 | if (m_current == '/') { | |
915 | shift(); | |
916 | goto inSingleLineComment; | |
917 | } | |
918 | if (m_current == '*') { | |
919 | shift(); | |
920 | if (parseMultilineComment()) | |
921 | goto start; | |
922 | goto returnError; | |
923 | } | |
924 | if (m_current == '=') { | |
925 | shift(); | |
926 | token = DIVEQUAL; | |
ba379fdc | 927 | break; |
14957cd0 A |
928 | } |
929 | token = DIVIDE; | |
930 | break; | |
931 | case CharacterAnd: | |
932 | shift(); | |
933 | if (m_current == '&') { | |
934 | shift(); | |
935 | token = AND; | |
ba379fdc | 936 | break; |
14957cd0 A |
937 | } |
938 | if (m_current == '=') { | |
939 | shift(); | |
940 | token = ANDEQUAL; | |
ba379fdc | 941 | break; |
14957cd0 A |
942 | } |
943 | token = BITAND; | |
944 | break; | |
945 | case CharacterXor: | |
946 | shift(); | |
947 | if (m_current == '=') { | |
948 | shift(); | |
949 | token = XOREQUAL; | |
ba379fdc | 950 | break; |
14957cd0 A |
951 | } |
952 | token = BITXOR; | |
953 | break; | |
954 | case CharacterModulo: | |
955 | shift(); | |
956 | if (m_current == '=') { | |
957 | shift(); | |
958 | token = MODEQUAL; | |
ba379fdc | 959 | break; |
14957cd0 A |
960 | } |
961 | token = MOD; | |
962 | break; | |
963 | case CharacterOr: | |
964 | shift(); | |
965 | if (m_current == '=') { | |
966 | shift(); | |
967 | token = OREQUAL; | |
ba379fdc | 968 | break; |
14957cd0 A |
969 | } |
970 | if (m_current == '|') { | |
971 | shift(); | |
972 | token = OR; | |
ba379fdc | 973 | break; |
14957cd0 A |
974 | } |
975 | token = BITOR; | |
976 | break; | |
977 | case CharacterOpenParen: | |
978 | token = OPENPAREN; | |
979 | shift(); | |
980 | break; | |
981 | case CharacterCloseParen: | |
982 | token = CLOSEPAREN; | |
983 | shift(); | |
984 | break; | |
985 | case CharacterOpenBracket: | |
986 | token = OPENBRACKET; | |
987 | shift(); | |
988 | break; | |
989 | case CharacterCloseBracket: | |
990 | token = CLOSEBRACKET; | |
991 | shift(); | |
992 | break; | |
993 | case CharacterComma: | |
994 | token = COMMA; | |
995 | shift(); | |
996 | break; | |
997 | case CharacterColon: | |
998 | token = COLON; | |
999 | shift(); | |
1000 | break; | |
1001 | case CharacterQuestion: | |
1002 | token = QUESTION; | |
1003 | shift(); | |
1004 | break; | |
1005 | case CharacterTilde: | |
1006 | token = TILDE; | |
1007 | shift(); | |
1008 | break; | |
1009 | case CharacterSemicolon: | |
1010 | m_delimited = true; | |
1011 | shift(); | |
1012 | token = SEMICOLON; | |
1013 | break; | |
1014 | case CharacterOpenBrace: | |
1015 | tokenData->intValue = currentOffset(); | |
1016 | shift(); | |
1017 | token = OPENBRACE; | |
1018 | break; | |
1019 | case CharacterCloseBrace: | |
1020 | tokenData->intValue = currentOffset(); | |
1021 | m_delimited = true; | |
1022 | shift(); | |
1023 | token = CLOSEBRACE; | |
1024 | break; | |
1025 | case CharacterDot: | |
1026 | shift(); | |
1027 | if (!isASCIIDigit(m_current)) { | |
1028 | token = DOT; | |
1029 | break; | |
1030 | } | |
1031 | goto inNumberAfterDecimalPoint; | |
1032 | case CharacterZero: | |
1033 | shift(); | |
1034 | if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) { | |
1035 | parseHex(tokenData->doubleValue); | |
1036 | token = NUMBER; | |
1037 | } else { | |
1038 | record8('0'); | |
1039 | if (isASCIIOctalDigit(m_current)) { | |
1040 | if (parseOctal(tokenData->doubleValue)) { | |
1041 | if (strictMode) | |
1042 | goto returnError; | |
1043 | token = NUMBER; | |
9dae56ea | 1044 | } |
ba379fdc | 1045 | } |
ba379fdc | 1046 | } |
14957cd0 A |
1047 | // Fall through into CharacterNumber |
1048 | case CharacterNumber: | |
1049 | if (LIKELY(token != NUMBER)) { | |
1050 | if (!parseDecimal(tokenData->doubleValue)) { | |
1051 | if (m_current == '.') { | |
1052 | shift(); | |
1053 | inNumberAfterDecimalPoint: | |
1054 | parseNumberAfterDecimalPoint(); | |
1055 | } | |
1056 | if ((m_current | 0x20) == 'e') | |
1057 | if (!parseNumberAfterExponentIndicator()) | |
1058 | goto returnError; | |
1059 | // Null-terminate string for strtod. | |
1060 | m_buffer8.append('\0'); | |
1061 | tokenData->doubleValue = WTF::strtod(m_buffer8.data(), 0); | |
1062 | } | |
1063 | token = NUMBER; | |
1064 | } | |
ba379fdc | 1065 | |
14957cd0 A |
1066 | // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. |
1067 | if (UNLIKELY(isIdentStart(m_current))) | |
ba379fdc | 1068 | goto returnError; |
14957cd0 A |
1069 | m_buffer8.resize(0); |
1070 | m_delimited = false; | |
1071 | break; | |
1072 | case CharacterQuote: | |
1073 | if (lexType & DontBuildStrings) { | |
1074 | if (UNLIKELY(!parseString<false>(tokenData, strictMode))) | |
1075 | goto returnError; | |
1076 | } else { | |
1077 | if (UNLIKELY(!parseString<true>(tokenData, strictMode))) | |
1078 | goto returnError; | |
ba379fdc | 1079 | } |
14957cd0 A |
1080 | shift(); |
1081 | m_delimited = false; | |
1082 | token = STRING; | |
1083 | break; | |
1084 | case CharacterIdentifierStart: | |
1085 | ASSERT(isIdentStart(m_current)); | |
1086 | // Fall through into CharacterBackSlash. | |
1087 | case CharacterBackSlash: | |
1088 | if (lexType & DontBuildKeywords) | |
1089 | token = parseIdentifier<false>(tokenData, lexType); | |
1090 | else | |
1091 | token = parseIdentifier<true>(tokenData, lexType); | |
1092 | break; | |
1093 | case CharacterLineTerminator: | |
1094 | ASSERT(isLineTerminator(m_current)); | |
ba379fdc | 1095 | shiftLineTerminator(); |
14957cd0 A |
1096 | m_atLineStart = true; |
1097 | m_terminator = true; | |
1098 | goto start; | |
1099 | case CharacterInvalid: | |
ba379fdc | 1100 | goto returnError; |
14957cd0 A |
1101 | default: |
1102 | ASSERT_NOT_REACHED(); | |
ba379fdc | 1103 | goto returnError; |
ba379fdc | 1104 | } |
9dae56ea | 1105 | |
14957cd0 A |
1106 | m_atLineStart = false; |
1107 | goto returnToken; | |
9dae56ea | 1108 | |
ba379fdc A |
1109 | inSingleLineComment: |
1110 | while (!isLineTerminator(m_current)) { | |
1111 | if (UNLIKELY(m_current == -1)) | |
14957cd0 A |
1112 | return EOFTOK; |
1113 | shift(); | |
9dae56ea | 1114 | } |
ba379fdc A |
1115 | shiftLineTerminator(); |
1116 | m_atLineStart = true; | |
1117 | m_terminator = true; | |
14957cd0 A |
1118 | if (!lastTokenWasRestrKeyword()) |
1119 | goto start; | |
ba379fdc | 1120 | |
14957cd0 | 1121 | token = SEMICOLON; |
ba379fdc | 1122 | m_delimited = true; |
ba379fdc A |
1123 | // Fall through into returnToken. |
1124 | ||
14957cd0 A |
1125 | returnToken: |
1126 | tokenInfo->line = m_lineNumber; | |
1127 | tokenInfo->startOffset = startOffset; | |
1128 | tokenInfo->endOffset = currentOffset(); | |
ba379fdc A |
1129 | m_lastToken = token; |
1130 | return token; | |
9dae56ea | 1131 | |
ba379fdc A |
1132 | returnError: |
1133 | m_error = true; | |
14957cd0 | 1134 | return ERRORTOK; |
9dae56ea A |
1135 | } |
1136 | ||
f9bf01c6 | 1137 | bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) |
9dae56ea | 1138 | { |
ba379fdc A |
1139 | ASSERT(m_buffer16.isEmpty()); |
1140 | ||
9dae56ea A |
1141 | bool lastWasEscape = false; |
1142 | bool inBrackets = false; | |
1143 | ||
f9bf01c6 A |
1144 | if (patternPrefix) { |
1145 | ASSERT(!isLineTerminator(patternPrefix)); | |
1146 | ASSERT(patternPrefix != '/'); | |
1147 | ASSERT(patternPrefix != '['); | |
1148 | record16(patternPrefix); | |
1149 | } | |
1150 | ||
ba379fdc | 1151 | while (true) { |
f9bf01c6 A |
1152 | int current = m_current; |
1153 | ||
1154 | if (isLineTerminator(current) || current == -1) { | |
ba379fdc | 1155 | m_buffer16.resize(0); |
f9bf01c6 | 1156 | return false; |
9dae56ea | 1157 | } |
f9bf01c6 | 1158 | |
14957cd0 | 1159 | shift(); |
f9bf01c6 A |
1160 | |
1161 | if (current == '/' && !lastWasEscape && !inBrackets) | |
1162 | break; | |
1163 | ||
1164 | record16(current); | |
1165 | ||
1166 | if (lastWasEscape) { | |
1167 | lastWasEscape = false; | |
1168 | continue; | |
1169 | } | |
1170 | ||
1171 | switch (current) { | |
1172 | case '[': | |
1173 | inBrackets = true; | |
1174 | break; | |
1175 | case ']': | |
1176 | inBrackets = false; | |
1177 | break; | |
1178 | case '\\': | |
1179 | lastWasEscape = true; | |
1180 | break; | |
1181 | } | |
9dae56ea A |
1182 | } |
1183 | ||
f9bf01c6 A |
1184 | pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size()); |
1185 | m_buffer16.resize(0); | |
1186 | ||
9dae56ea A |
1187 | while (isIdentPart(m_current)) { |
1188 | record16(m_current); | |
14957cd0 | 1189 | shift(); |
9dae56ea | 1190 | } |
f9bf01c6 A |
1191 | |
1192 | flags = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
ba379fdc | 1193 | m_buffer16.resize(0); |
9dae56ea A |
1194 | |
1195 | return true; | |
1196 | } | |
1197 | ||
f9bf01c6 A |
1198 | bool Lexer::skipRegExp() |
1199 | { | |
1200 | bool lastWasEscape = false; | |
1201 | bool inBrackets = false; | |
1202 | ||
1203 | while (true) { | |
1204 | int current = m_current; | |
1205 | ||
1206 | if (isLineTerminator(current) || current == -1) | |
1207 | return false; | |
1208 | ||
14957cd0 | 1209 | shift(); |
f9bf01c6 A |
1210 | |
1211 | if (current == '/' && !lastWasEscape && !inBrackets) | |
1212 | break; | |
1213 | ||
1214 | if (lastWasEscape) { | |
1215 | lastWasEscape = false; | |
1216 | continue; | |
1217 | } | |
1218 | ||
1219 | switch (current) { | |
1220 | case '[': | |
1221 | inBrackets = true; | |
1222 | break; | |
1223 | case ']': | |
1224 | inBrackets = false; | |
1225 | break; | |
1226 | case '\\': | |
1227 | lastWasEscape = true; | |
1228 | break; | |
1229 | } | |
1230 | } | |
1231 | ||
1232 | while (isIdentPart(m_current)) | |
14957cd0 | 1233 | shift(); |
f9bf01c6 A |
1234 | |
1235 | return true; | |
1236 | } | |
1237 | ||
9dae56ea A |
1238 | void Lexer::clear() |
1239 | { | |
f9bf01c6 | 1240 | m_arena = 0; |
9dae56ea A |
1241 | |
1242 | Vector<char> newBuffer8; | |
9dae56ea A |
1243 | m_buffer8.swap(newBuffer8); |
1244 | ||
1245 | Vector<UChar> newBuffer16; | |
9dae56ea A |
1246 | m_buffer16.swap(newBuffer16); |
1247 | ||
1248 | m_isReparsing = false; | |
ba379fdc A |
1249 | } |
1250 | ||
1251 | SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) | |
1252 | { | |
14957cd0 A |
1253 | ASSERT(m_source->provider()->data()[openBrace] == '{'); |
1254 | ASSERT(m_source->provider()->data()[closeBrace] == '}'); | |
fb8617cd | 1255 | return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); |
9dae56ea A |
1256 | } |
1257 | ||
1258 | } // namespace JSC |