]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. | |
4 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Library General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Library General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Library General Public License | |
17 | * along with this library; see the file COPYING.LIB. If not, write to | |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 | * Boston, MA 02110-1301, USA. | |
20 | * | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "Lexer.h" | |
25 | ||
26 | #include "JSFunction.h" | |
27 | #include "JSGlobalObjectFunctions.h" | |
28 | #include "NodeInfo.h" | |
29 | #include "Nodes.h" | |
30 | #include "dtoa.h" | |
31 | #include <ctype.h> | |
32 | #include <limits.h> | |
33 | #include <string.h> | |
9dae56ea | 34 | #include <wtf/Assertions.h> |
9dae56ea A |
35 | |
36 | using namespace WTF; | |
37 | using namespace Unicode; | |
38 | ||
ba379fdc | 39 | // We can't specify the namespace in yacc's C output, so do it here instead. |
9dae56ea A |
40 | using namespace JSC; |
41 | ||
42 | #ifndef KDE_USE_FINAL | |
43 | #include "Grammar.h" | |
44 | #endif | |
45 | ||
46 | #include "Lookup.h" | |
47 | #include "Lexer.lut.h" | |
48 | ||
ba379fdc | 49 | // A bridge for yacc from the C world to the C++ world. |
9dae56ea A |
50 | int jscyylex(void* lvalp, void* llocp, void* globalData) |
51 | { | |
52 | return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp); | |
53 | } | |
54 | ||
55 | namespace JSC { | |
56 | ||
ba379fdc | 57 | static const UChar byteOrderMark = 0xFEFF; |
9dae56ea A |
58 | |
59 | Lexer::Lexer(JSGlobalData* globalData) | |
ba379fdc | 60 | : m_isReparsing(false) |
9dae56ea | 61 | , m_globalData(globalData) |
ba379fdc | 62 | , m_keywordTable(JSC::mainTable) |
9dae56ea A |
63 | { |
64 | m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); | |
65 | m_buffer16.reserveInitialCapacity(initialReadBufferCapacity); | |
66 | } | |
67 | ||
68 | Lexer::~Lexer() | |
69 | { | |
ba379fdc A |
70 | m_keywordTable.deleteTable(); |
71 | } | |
72 | ||
73 | inline const UChar* Lexer::currentCharacter() const | |
74 | { | |
75 | return m_code - 4; | |
76 | } | |
77 | ||
78 | inline int Lexer::currentOffset() const | |
79 | { | |
80 | return currentCharacter() - m_codeStart; | |
81 | } | |
82 | ||
83 | ALWAYS_INLINE void Lexer::shift1() | |
84 | { | |
85 | m_current = m_next1; | |
86 | m_next1 = m_next2; | |
87 | m_next2 = m_next3; | |
88 | if (LIKELY(m_code < m_codeEnd)) | |
89 | m_next3 = m_code[0]; | |
90 | else | |
91 | m_next3 = -1; | |
92 | ||
93 | ++m_code; | |
94 | } | |
95 | ||
96 | ALWAYS_INLINE void Lexer::shift2() | |
97 | { | |
98 | m_current = m_next2; | |
99 | m_next1 = m_next3; | |
100 | if (LIKELY(m_code + 1 < m_codeEnd)) { | |
101 | m_next2 = m_code[0]; | |
102 | m_next3 = m_code[1]; | |
103 | } else { | |
104 | m_next2 = m_code < m_codeEnd ? m_code[0] : -1; | |
105 | m_next3 = -1; | |
106 | } | |
107 | ||
108 | m_code += 2; | |
109 | } | |
110 | ||
111 | ALWAYS_INLINE void Lexer::shift3() | |
112 | { | |
113 | m_current = m_next3; | |
114 | if (LIKELY(m_code + 2 < m_codeEnd)) { | |
115 | m_next1 = m_code[0]; | |
116 | m_next2 = m_code[1]; | |
117 | m_next3 = m_code[2]; | |
118 | } else { | |
119 | m_next1 = m_code < m_codeEnd ? m_code[0] : -1; | |
120 | m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | |
121 | m_next3 = -1; | |
122 | } | |
123 | ||
124 | m_code += 3; | |
125 | } | |
126 | ||
127 | ALWAYS_INLINE void Lexer::shift4() | |
128 | { | |
129 | if (LIKELY(m_code + 3 < m_codeEnd)) { | |
130 | m_current = m_code[0]; | |
131 | m_next1 = m_code[1]; | |
132 | m_next2 = m_code[2]; | |
133 | m_next3 = m_code[3]; | |
134 | } else { | |
135 | m_current = m_code < m_codeEnd ? m_code[0] : -1; | |
136 | m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | |
137 | m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; | |
138 | m_next3 = -1; | |
139 | } | |
140 | ||
141 | m_code += 4; | |
9dae56ea A |
142 | } |
143 | ||
144 | void Lexer::setCode(const SourceCode& source) | |
145 | { | |
ba379fdc | 146 | m_lineNumber = source.firstLine(); |
9dae56ea | 147 | m_delimited = false; |
9dae56ea A |
148 | m_lastToken = -1; |
149 | ||
ba379fdc A |
150 | const UChar* data = source.provider()->data(); |
151 | ||
9dae56ea | 152 | m_source = &source; |
ba379fdc A |
153 | m_codeStart = data; |
154 | m_code = data + source.startOffset(); | |
155 | m_codeEnd = data + source.endOffset(); | |
9dae56ea A |
156 | m_error = false; |
157 | m_atLineStart = true; | |
158 | ||
ba379fdc A |
159 | // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. |
160 | // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. | |
161 | if (source.provider()->hasBOMs()) { | |
162 | for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { | |
163 | if (UNLIKELY(*p == byteOrderMark)) { | |
164 | copyCodeWithoutBOMs(); | |
165 | break; | |
166 | } | |
167 | } | |
168 | } | |
169 | ||
170 | // Read the first characters into the 4-character buffer. | |
171 | shift4(); | |
172 | ASSERT(currentOffset() == source.startOffset()); | |
9dae56ea A |
173 | } |
174 | ||
ba379fdc | 175 | void Lexer::copyCodeWithoutBOMs() |
9dae56ea | 176 | { |
ba379fdc A |
177 | // Note: In this case, the character offset data for debugging will be incorrect. |
178 | // If it's important to correctly debug code with extraneous BOMs, then the caller | |
179 | // should strip the BOMs when creating the SourceProvider object and do its own | |
180 | // mapping of offsets within the stripped text to original text offset. | |
181 | ||
182 | m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code); | |
183 | for (const UChar* p = m_code; p < m_codeEnd; ++p) { | |
184 | UChar c = *p; | |
185 | if (c != byteOrderMark) | |
186 | m_codeWithoutBOMs.append(c); | |
187 | } | |
188 | ptrdiff_t startDelta = m_codeStart - m_code; | |
189 | m_code = m_codeWithoutBOMs.data(); | |
190 | m_codeStart = m_code + startDelta; | |
191 | m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); | |
192 | } | |
193 | ||
194 | void Lexer::shiftLineTerminator() | |
195 | { | |
196 | ASSERT(isLineTerminator(m_current)); | |
197 | ||
198 | // Allow both CRLF and LFCR. | |
199 | if (m_current + m_next1 == '\n' + '\r') | |
200 | shift2(); | |
201 | else | |
202 | shift1(); | |
203 | ||
204 | ++m_lineNumber; | |
205 | } | |
206 | ||
207 | ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) | |
208 | { | |
209 | m_identifiers.append(Identifier(m_globalData, characters, length)); | |
210 | return &m_identifiers.last(); | |
211 | } | |
212 | ||
213 | inline bool Lexer::lastTokenWasRestrKeyword() const | |
214 | { | |
215 | return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; | |
216 | } | |
217 | ||
218 | static NEVER_INLINE bool isNonASCIIIdentStart(int c) | |
219 | { | |
220 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); | |
221 | } | |
222 | ||
223 | static inline bool isIdentStart(int c) | |
224 | { | |
225 | return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); | |
226 | } | |
227 | ||
228 | static NEVER_INLINE bool isNonASCIIIdentPart(int c) | |
229 | { | |
230 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | |
231 | | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); | |
232 | } | |
233 | ||
234 | static inline bool isIdentPart(int c) | |
235 | { | |
236 | return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); | |
237 | } | |
238 | ||
239 | static inline int singleEscape(int c) | |
240 | { | |
241 | switch (c) { | |
242 | case 'b': | |
243 | return 0x08; | |
244 | case 't': | |
245 | return 0x09; | |
246 | case 'n': | |
247 | return 0x0A; | |
248 | case 'v': | |
249 | return 0x0B; | |
250 | case 'f': | |
251 | return 0x0C; | |
252 | case 'r': | |
253 | return 0x0D; | |
254 | default: | |
255 | return c; | |
9dae56ea A |
256 | } |
257 | } | |
258 | ||
ba379fdc | 259 | inline void Lexer::record8(int c) |
9dae56ea | 260 | { |
ba379fdc A |
261 | ASSERT(c >= 0); |
262 | ASSERT(c <= 0xFF); | |
263 | m_buffer8.append(static_cast<char>(c)); | |
9dae56ea A |
264 | } |
265 | ||
ba379fdc | 266 | inline void Lexer::record16(UChar c) |
9dae56ea | 267 | { |
ba379fdc A |
268 | m_buffer16.append(c); |
269 | } | |
270 | ||
271 | inline void Lexer::record16(int c) | |
272 | { | |
273 | ASSERT(c >= 0); | |
274 | ASSERT(c <= USHRT_MAX); | |
275 | record16(UChar(static_cast<unsigned short>(c))); | |
9dae56ea A |
276 | } |
277 | ||
278 | int Lexer::lex(void* p1, void* p2) | |
279 | { | |
ba379fdc A |
280 | ASSERT(!m_error); |
281 | ASSERT(m_buffer8.isEmpty()); | |
282 | ASSERT(m_buffer16.isEmpty()); | |
283 | ||
9dae56ea A |
284 | YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); |
285 | YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); | |
286 | int token = 0; | |
9dae56ea | 287 | m_terminator = false; |
ba379fdc A |
288 | |
289 | start: | |
290 | while (isWhiteSpace(m_current)) | |
291 | shift1(); | |
292 | ||
293 | int startOffset = currentOffset(); | |
294 | ||
295 | if (m_current == -1) { | |
296 | if (!m_terminator && !m_delimited && !m_isReparsing) { | |
297 | // automatic semicolon insertion if program incomplete | |
298 | token = ';'; | |
299 | goto doneSemicolon; | |
9dae56ea | 300 | } |
ba379fdc A |
301 | return 0; |
302 | } | |
303 | ||
304 | m_delimited = false; | |
305 | switch (m_current) { | |
306 | case '>': | |
307 | if (m_next1 == '>' && m_next2 == '>') { | |
308 | if (m_next3 == '=') { | |
309 | shift4(); | |
310 | token = URSHIFTEQUAL; | |
311 | break; | |
9dae56ea | 312 | } |
ba379fdc A |
313 | shift3(); |
314 | token = URSHIFT; | |
9dae56ea | 315 | break; |
ba379fdc A |
316 | } |
317 | if (m_next1 == '>') { | |
318 | if (m_next2 == '=') { | |
319 | shift3(); | |
320 | token = RSHIFTEQUAL; | |
321 | break; | |
322 | } | |
323 | shift2(); | |
324 | token = RSHIFT; | |
9dae56ea | 325 | break; |
ba379fdc A |
326 | } |
327 | if (m_next1 == '=') { | |
328 | shift2(); | |
329 | token = GE; | |
330 | break; | |
331 | } | |
332 | shift1(); | |
333 | token = '>'; | |
334 | break; | |
335 | case '=': | |
336 | if (m_next1 == '=') { | |
337 | if (m_next2 == '=') { | |
338 | shift3(); | |
339 | token = STREQ; | |
340 | break; | |
9dae56ea | 341 | } |
ba379fdc A |
342 | shift2(); |
343 | token = EQEQ; | |
9dae56ea | 344 | break; |
ba379fdc A |
345 | } |
346 | shift1(); | |
347 | token = '='; | |
348 | break; | |
349 | case '!': | |
350 | if (m_next1 == '=') { | |
351 | if (m_next2 == '=') { | |
352 | shift3(); | |
353 | token = STRNEQ; | |
354 | break; | |
9dae56ea | 355 | } |
ba379fdc A |
356 | shift2(); |
357 | token = NE; | |
9dae56ea | 358 | break; |
ba379fdc A |
359 | } |
360 | shift1(); | |
361 | token = '!'; | |
362 | break; | |
363 | case '<': | |
364 | if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { | |
365 | // <!-- marks the beginning of a line comment (for www usage) | |
366 | shift4(); | |
367 | goto inSingleLineComment; | |
368 | } | |
369 | if (m_next1 == '<') { | |
370 | if (m_next2 == '=') { | |
371 | shift3(); | |
372 | token = LSHIFTEQUAL; | |
373 | break; | |
374 | } | |
375 | shift2(); | |
376 | token = LSHIFT; | |
9dae56ea | 377 | break; |
ba379fdc A |
378 | } |
379 | if (m_next1 == '=') { | |
380 | shift2(); | |
381 | token = LE; | |
9dae56ea | 382 | break; |
ba379fdc A |
383 | } |
384 | shift1(); | |
385 | token = '<'; | |
386 | break; | |
387 | case '+': | |
388 | if (m_next1 == '+') { | |
389 | shift2(); | |
390 | if (m_terminator) { | |
391 | token = AUTOPLUSPLUS; | |
392 | break; | |
9dae56ea | 393 | } |
ba379fdc | 394 | token = PLUSPLUS; |
9dae56ea | 395 | break; |
ba379fdc A |
396 | } |
397 | if (m_next1 == '=') { | |
398 | shift2(); | |
399 | token = PLUSEQUAL; | |
9dae56ea | 400 | break; |
ba379fdc A |
401 | } |
402 | shift1(); | |
403 | token = '+'; | |
404 | break; | |
405 | case '-': | |
406 | if (m_next1 == '-') { | |
407 | if (m_atLineStart && m_next2 == '>') { | |
408 | shift3(); | |
409 | goto inSingleLineComment; | |
410 | } | |
411 | shift2(); | |
412 | if (m_terminator) { | |
413 | token = AUTOMINUSMINUS; | |
414 | break; | |
415 | } | |
416 | token = MINUSMINUS; | |
9dae56ea | 417 | break; |
ba379fdc A |
418 | } |
419 | if (m_next1 == '=') { | |
420 | shift2(); | |
421 | token = MINUSEQUAL; | |
9dae56ea | 422 | break; |
ba379fdc A |
423 | } |
424 | shift1(); | |
425 | token = '-'; | |
426 | break; | |
427 | case '*': | |
428 | if (m_next1 == '=') { | |
429 | shift2(); | |
430 | token = MULTEQUAL; | |
9dae56ea | 431 | break; |
ba379fdc A |
432 | } |
433 | shift1(); | |
434 | token = '*'; | |
435 | break; | |
436 | case '/': | |
437 | if (m_next1 == '/') { | |
438 | shift2(); | |
439 | goto inSingleLineComment; | |
440 | } | |
441 | if (m_next1 == '*') | |
442 | goto inMultiLineComment; | |
443 | if (m_next1 == '=') { | |
444 | shift2(); | |
445 | token = DIVEQUAL; | |
9dae56ea | 446 | break; |
ba379fdc A |
447 | } |
448 | shift1(); | |
449 | token = '/'; | |
450 | break; | |
451 | case '&': | |
452 | if (m_next1 == '&') { | |
453 | shift2(); | |
454 | token = AND; | |
9dae56ea | 455 | break; |
ba379fdc A |
456 | } |
457 | if (m_next1 == '=') { | |
458 | shift2(); | |
459 | token = ANDEQUAL; | |
9dae56ea | 460 | break; |
ba379fdc A |
461 | } |
462 | shift1(); | |
463 | token = '&'; | |
464 | break; | |
465 | case '^': | |
466 | if (m_next1 == '=') { | |
467 | shift2(); | |
468 | token = XOREQUAL; | |
9dae56ea | 469 | break; |
ba379fdc A |
470 | } |
471 | shift1(); | |
472 | token = '^'; | |
473 | break; | |
474 | case '%': | |
475 | if (m_next1 == '=') { | |
476 | shift2(); | |
477 | token = MODEQUAL; | |
9dae56ea | 478 | break; |
ba379fdc A |
479 | } |
480 | shift1(); | |
481 | token = '%'; | |
482 | break; | |
483 | case '|': | |
484 | if (m_next1 == '=') { | |
485 | shift2(); | |
486 | token = OREQUAL; | |
9dae56ea | 487 | break; |
ba379fdc A |
488 | } |
489 | if (m_next1 == '|') { | |
490 | shift2(); | |
491 | token = OR; | |
9dae56ea | 492 | break; |
ba379fdc A |
493 | } |
494 | shift1(); | |
495 | token = '|'; | |
496 | break; | |
497 | case '.': | |
498 | if (isASCIIDigit(m_next1)) { | |
499 | record8('.'); | |
500 | shift1(); | |
501 | goto inNumberAfterDecimalPoint; | |
502 | } | |
503 | token = '.'; | |
504 | shift1(); | |
505 | break; | |
506 | case ',': | |
507 | case '~': | |
508 | case '?': | |
509 | case ':': | |
510 | case '(': | |
511 | case ')': | |
512 | case '[': | |
513 | case ']': | |
514 | token = m_current; | |
515 | shift1(); | |
516 | break; | |
517 | case ';': | |
518 | shift1(); | |
519 | m_delimited = true; | |
520 | token = ';'; | |
521 | break; | |
522 | case '{': | |
523 | lvalp->intValue = currentOffset(); | |
524 | shift1(); | |
525 | token = OPENBRACE; | |
526 | break; | |
527 | case '}': | |
528 | lvalp->intValue = currentOffset(); | |
529 | shift1(); | |
530 | m_delimited = true; | |
531 | token = CLOSEBRACE; | |
532 | break; | |
533 | case '\\': | |
534 | goto startIdentifierWithBackslash; | |
535 | case '0': | |
536 | goto startNumberWithZeroDigit; | |
537 | case '1': | |
538 | case '2': | |
539 | case '3': | |
540 | case '4': | |
541 | case '5': | |
542 | case '6': | |
543 | case '7': | |
544 | case '8': | |
545 | case '9': | |
546 | goto startNumber; | |
547 | case '"': | |
548 | case '\'': | |
549 | goto startString; | |
550 | default: | |
551 | if (isIdentStart(m_current)) | |
552 | goto startIdentifierOrKeyword; | |
553 | if (isLineTerminator(m_current)) { | |
554 | shiftLineTerminator(); | |
555 | m_atLineStart = true; | |
556 | m_terminator = true; | |
557 | if (lastTokenWasRestrKeyword()) { | |
558 | token = ';'; | |
559 | goto doneSemicolon; | |
9dae56ea | 560 | } |
ba379fdc A |
561 | goto start; |
562 | } | |
563 | goto returnError; | |
9dae56ea A |
564 | } |
565 | ||
ba379fdc A |
566 | m_atLineStart = false; |
567 | goto returnToken; | |
9dae56ea | 568 | |
ba379fdc A |
569 | startString: { |
570 | int stringQuoteCharacter = m_current; | |
571 | shift1(); | |
9dae56ea | 572 | |
ba379fdc A |
573 | const UChar* stringStart = currentCharacter(); |
574 | while (m_current != stringQuoteCharacter) { | |
575 | // Fast check for characters that require special handling. | |
576 | // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently | |
577 | // as possible, and lets through all common ASCII characters. | |
578 | if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { | |
579 | m_buffer16.append(stringStart, currentCharacter() - stringStart); | |
580 | goto inString; | |
581 | } | |
582 | shift1(); | |
583 | } | |
584 | lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); | |
585 | shift1(); | |
586 | m_atLineStart = false; | |
587 | m_delimited = false; | |
588 | token = STRING; | |
589 | goto returnToken; | |
590 | ||
591 | inString: | |
592 | while (m_current != stringQuoteCharacter) { | |
593 | if (m_current == '\\') | |
594 | goto inStringEscapeSequence; | |
595 | if (UNLIKELY(isLineTerminator(m_current))) | |
596 | goto returnError; | |
597 | if (UNLIKELY(m_current == -1)) | |
598 | goto returnError; | |
599 | record16(m_current); | |
600 | shift1(); | |
601 | } | |
602 | goto doneString; | |
603 | ||
604 | inStringEscapeSequence: | |
605 | shift1(); | |
606 | if (m_current == 'x') { | |
607 | shift1(); | |
608 | if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { | |
609 | record16(convertHex(m_current, m_next1)); | |
610 | shift2(); | |
611 | goto inString; | |
9dae56ea | 612 | } |
ba379fdc A |
613 | record16('x'); |
614 | if (m_current == stringQuoteCharacter) | |
615 | goto doneString; | |
616 | goto inString; | |
617 | } | |
618 | if (m_current == 'u') { | |
619 | shift1(); | |
620 | if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { | |
621 | record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); | |
622 | shift4(); | |
623 | goto inString; | |
624 | } | |
625 | if (m_current == stringQuoteCharacter) { | |
626 | record16('u'); | |
627 | goto doneString; | |
628 | } | |
629 | goto returnError; | |
630 | } | |
631 | if (isASCIIOctalDigit(m_current)) { | |
632 | if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { | |
633 | record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); | |
634 | shift3(); | |
635 | goto inString; | |
636 | } | |
637 | if (isASCIIOctalDigit(m_next1)) { | |
638 | record16((m_current - '0') * 8 + m_next1 - '0'); | |
639 | shift2(); | |
640 | goto inString; | |
641 | } | |
642 | record16(m_current - '0'); | |
643 | shift1(); | |
644 | goto inString; | |
645 | } | |
646 | if (isLineTerminator(m_current)) { | |
647 | shiftLineTerminator(); | |
648 | goto inString; | |
649 | } | |
650 | record16(singleEscape(m_current)); | |
651 | shift1(); | |
652 | goto inString; | |
653 | } | |
9dae56ea | 654 | |
ba379fdc A |
655 | startIdentifierWithBackslash: |
656 | shift1(); | |
657 | if (UNLIKELY(m_current != 'u')) | |
658 | goto returnError; | |
659 | shift1(); | |
660 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | |
661 | goto returnError; | |
662 | token = convertUnicode(m_current, m_next1, m_next2, m_next3); | |
663 | if (UNLIKELY(!isIdentStart(token))) | |
664 | goto returnError; | |
665 | goto inIdentifierAfterCharacterCheck; | |
666 | ||
667 | startIdentifierOrKeyword: { | |
668 | const UChar* identifierStart = currentCharacter(); | |
669 | shift1(); | |
670 | while (isIdentPart(m_current)) | |
671 | shift1(); | |
672 | if (LIKELY(m_current != '\\')) { | |
673 | lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); | |
674 | goto doneIdentifierOrKeyword; | |
675 | } | |
676 | m_buffer16.append(identifierStart, currentCharacter() - identifierStart); | |
677 | } | |
9dae56ea | 678 | |
ba379fdc A |
679 | do { |
680 | shift1(); | |
681 | if (UNLIKELY(m_current != 'u')) | |
682 | goto returnError; | |
683 | shift1(); | |
684 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | |
685 | goto returnError; | |
686 | token = convertUnicode(m_current, m_next1, m_next2, m_next3); | |
687 | if (UNLIKELY(!isIdentPart(token))) | |
688 | goto returnError; | |
689 | inIdentifierAfterCharacterCheck: | |
690 | record16(token); | |
691 | shift4(); | |
692 | ||
693 | while (isIdentPart(m_current)) { | |
694 | record16(m_current); | |
695 | shift1(); | |
9dae56ea | 696 | } |
ba379fdc A |
697 | } while (UNLIKELY(m_current == '\\')); |
698 | goto doneIdentifier; | |
9dae56ea | 699 | |
ba379fdc A |
700 | inSingleLineComment: |
701 | while (!isLineTerminator(m_current)) { | |
702 | if (UNLIKELY(m_current == -1)) | |
703 | return 0; | |
704 | shift1(); | |
9dae56ea | 705 | } |
ba379fdc A |
706 | shiftLineTerminator(); |
707 | m_atLineStart = true; | |
708 | m_terminator = true; | |
709 | if (lastTokenWasRestrKeyword()) | |
710 | goto doneSemicolon; | |
711 | goto start; | |
712 | ||
713 | inMultiLineComment: | |
714 | shift2(); | |
715 | while (m_current != '*' || m_next1 != '/') { | |
716 | if (isLineTerminator(m_current)) | |
717 | shiftLineTerminator(); | |
718 | else { | |
719 | shift1(); | |
720 | if (UNLIKELY(m_current == -1)) | |
721 | goto returnError; | |
722 | } | |
9dae56ea | 723 | } |
ba379fdc A |
724 | shift2(); |
725 | m_atLineStart = false; | |
726 | goto start; | |
727 | ||
728 | startNumberWithZeroDigit: | |
729 | shift1(); | |
730 | if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { | |
731 | shift1(); | |
732 | goto inHex; | |
733 | } | |
734 | if (m_current == '.') { | |
735 | record8('0'); | |
736 | record8('.'); | |
737 | shift1(); | |
738 | goto inNumberAfterDecimalPoint; | |
739 | } | |
740 | if ((m_current | 0x20) == 'e') { | |
741 | record8('0'); | |
742 | record8('e'); | |
743 | shift1(); | |
744 | goto inExponentIndicator; | |
745 | } | |
746 | if (isASCIIOctalDigit(m_current)) | |
747 | goto inOctal; | |
748 | if (isASCIIDigit(m_current)) | |
749 | goto startNumber; | |
750 | lvalp->doubleValue = 0; | |
751 | goto doneNumeric; | |
752 | ||
753 | inNumberAfterDecimalPoint: | |
754 | while (isASCIIDigit(m_current)) { | |
755 | record8(m_current); | |
756 | shift1(); | |
757 | } | |
758 | if ((m_current | 0x20) == 'e') { | |
759 | record8('e'); | |
760 | shift1(); | |
761 | goto inExponentIndicator; | |
762 | } | |
763 | goto doneNumber; | |
764 | ||
765 | inExponentIndicator: | |
766 | if (m_current == '+' || m_current == '-') { | |
767 | record8(m_current); | |
768 | shift1(); | |
769 | } | |
770 | if (!isASCIIDigit(m_current)) | |
771 | goto returnError; | |
772 | do { | |
773 | record8(m_current); | |
774 | shift1(); | |
775 | } while (isASCIIDigit(m_current)); | |
776 | goto doneNumber; | |
777 | ||
778 | inOctal: { | |
779 | do { | |
780 | record8(m_current); | |
781 | shift1(); | |
782 | } while (isASCIIOctalDigit(m_current)); | |
783 | if (isASCIIDigit(m_current)) | |
784 | goto startNumber; | |
9dae56ea | 785 | |
ba379fdc | 786 | double dval = 0; |
9dae56ea | 787 | |
ba379fdc A |
788 | const char* end = m_buffer8.end(); |
789 | for (const char* p = m_buffer8.data(); p < end; ++p) { | |
790 | dval *= 8; | |
791 | dval += *p - '0'; | |
9dae56ea | 792 | } |
ba379fdc A |
793 | if (dval >= mantissaOverflowLowerBound) |
794 | dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); | |
9dae56ea | 795 | |
ba379fdc | 796 | m_buffer8.resize(0); |
9dae56ea | 797 | |
ba379fdc A |
798 | lvalp->doubleValue = dval; |
799 | goto doneNumeric; | |
9dae56ea A |
800 | } |
801 | ||
ba379fdc A |
802 | inHex: { |
803 | do { | |
804 | record8(m_current); | |
805 | shift1(); | |
806 | } while (isASCIIHexDigit(m_current)); | |
9dae56ea | 807 | |
ba379fdc | 808 | double dval = 0; |
9dae56ea | 809 | |
ba379fdc A |
810 | const char* end = m_buffer8.end(); |
811 | for (const char* p = m_buffer8.data(); p < end; ++p) { | |
812 | dval *= 16; | |
813 | dval += toASCIIHexValue(*p); | |
814 | } | |
815 | if (dval >= mantissaOverflowLowerBound) | |
816 | dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); | |
9dae56ea | 817 | |
ba379fdc | 818 | m_buffer8.resize(0); |
9dae56ea | 819 | |
ba379fdc A |
820 | lvalp->doubleValue = dval; |
821 | goto doneNumeric; | |
9dae56ea A |
822 | } |
823 | ||
ba379fdc A |
824 | startNumber: |
825 | record8(m_current); | |
826 | shift1(); | |
827 | while (isASCIIDigit(m_current)) { | |
828 | record8(m_current); | |
829 | shift1(); | |
9dae56ea | 830 | } |
ba379fdc A |
831 | if (m_current == '.') { |
832 | record8('.'); | |
833 | shift1(); | |
834 | goto inNumberAfterDecimalPoint; | |
9dae56ea | 835 | } |
ba379fdc A |
836 | if ((m_current | 0x20) == 'e') { |
837 | record8('e'); | |
838 | shift1(); | |
839 | goto inExponentIndicator; | |
9dae56ea A |
840 | } |
841 | ||
ba379fdc | 842 | // Fall through into doneNumber. |
9dae56ea | 843 | |
ba379fdc A |
844 | doneNumber: |
845 | // Null-terminate string for strtod. | |
846 | m_buffer8.append('\0'); | |
847 | lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); | |
848 | m_buffer8.resize(0); | |
9dae56ea | 849 | |
ba379fdc | 850 | // Fall through into doneNumeric. |
9dae56ea | 851 | |
ba379fdc A |
852 | doneNumeric: |
853 | // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. | |
854 | if (UNLIKELY(isIdentStart(m_current))) | |
855 | goto returnError; | |
9dae56ea | 856 | |
ba379fdc A |
857 | m_atLineStart = false; |
858 | m_delimited = false; | |
859 | token = NUMBER; | |
860 | goto returnToken; | |
9dae56ea | 861 | |
ba379fdc A |
862 | doneSemicolon: |
863 | token = ';'; | |
864 | m_delimited = true; | |
865 | goto returnToken; | |
9dae56ea | 866 | |
ba379fdc A |
867 | doneIdentifier: |
868 | m_atLineStart = false; | |
869 | m_delimited = false; | |
870 | lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
871 | m_buffer16.resize(0); | |
872 | token = IDENT; | |
873 | goto returnToken; | |
874 | ||
875 | doneIdentifierOrKeyword: { | |
876 | m_atLineStart = false; | |
877 | m_delimited = false; | |
878 | m_buffer16.resize(0); | |
879 | const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); | |
880 | token = entry ? entry->lexerValue() : IDENT; | |
881 | goto returnToken; | |
9dae56ea A |
882 | } |
883 | ||
ba379fdc A |
884 | doneString: |
885 | // Atomize constant strings in case they're later used in property lookup. | |
886 | shift1(); | |
887 | m_atLineStart = false; | |
888 | m_delimited = false; | |
889 | lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
890 | m_buffer16.resize(0); | |
891 | token = STRING; | |
892 | ||
893 | // Fall through into returnToken. | |
894 | ||
895 | returnToken: { | |
896 | int lineNumber = m_lineNumber; | |
897 | llocp->first_line = lineNumber; | |
898 | llocp->last_line = lineNumber; | |
899 | llocp->first_column = startOffset; | |
900 | llocp->last_column = currentOffset(); | |
901 | ||
902 | m_lastToken = token; | |
903 | return token; | |
9dae56ea A |
904 | } |
905 | ||
ba379fdc A |
906 | returnError: |
907 | m_error = true; | |
908 | return -1; | |
9dae56ea A |
909 | } |
910 | ||
911 | bool Lexer::scanRegExp() | |
912 | { | |
ba379fdc A |
913 | ASSERT(m_buffer16.isEmpty()); |
914 | ||
9dae56ea A |
915 | bool lastWasEscape = false; |
916 | bool inBrackets = false; | |
917 | ||
ba379fdc A |
918 | while (true) { |
919 | if (isLineTerminator(m_current) || m_current == -1) | |
9dae56ea | 920 | return false; |
ba379fdc | 921 | if (m_current != '/' || lastWasEscape || inBrackets) { |
9dae56ea A |
922 | // keep track of '[' and ']' |
923 | if (!lastWasEscape) { | |
ba379fdc | 924 | if (m_current == '[' && !inBrackets) |
9dae56ea | 925 | inBrackets = true; |
ba379fdc | 926 | if (m_current == ']' && inBrackets) |
9dae56ea A |
927 | inBrackets = false; |
928 | } | |
929 | record16(m_current); | |
ba379fdc | 930 | lastWasEscape = !lastWasEscape && m_current == '\\'; |
9dae56ea A |
931 | } else { // end of regexp |
932 | m_pattern = UString(m_buffer16); | |
ba379fdc A |
933 | m_buffer16.resize(0); |
934 | shift1(); | |
9dae56ea A |
935 | break; |
936 | } | |
ba379fdc | 937 | shift1(); |
9dae56ea A |
938 | } |
939 | ||
940 | while (isIdentPart(m_current)) { | |
941 | record16(m_current); | |
ba379fdc | 942 | shift1(); |
9dae56ea A |
943 | } |
944 | m_flags = UString(m_buffer16); | |
ba379fdc | 945 | m_buffer16.resize(0); |
9dae56ea A |
946 | |
947 | return true; | |
948 | } | |
949 | ||
950 | void Lexer::clear() | |
951 | { | |
952 | m_identifiers.clear(); | |
ba379fdc | 953 | m_codeWithoutBOMs.clear(); |
9dae56ea A |
954 | |
955 | Vector<char> newBuffer8; | |
956 | newBuffer8.reserveInitialCapacity(initialReadBufferCapacity); | |
957 | m_buffer8.swap(newBuffer8); | |
958 | ||
959 | Vector<UChar> newBuffer16; | |
960 | newBuffer16.reserveInitialCapacity(initialReadBufferCapacity); | |
961 | m_buffer16.swap(newBuffer16); | |
962 | ||
963 | m_isReparsing = false; | |
964 | ||
ba379fdc A |
965 | m_pattern = UString(); |
966 | m_flags = UString(); | |
967 | } | |
968 | ||
969 | SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) | |
970 | { | |
971 | if (m_codeWithoutBOMs.isEmpty()) | |
972 | return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); | |
973 | ||
974 | const UChar* data = m_source->provider()->data(); | |
975 | ||
976 | ASSERT(openBrace < closeBrace); | |
977 | ||
978 | int numBOMsBeforeOpenBrace = 0; | |
979 | int numBOMsBetweenBraces = 0; | |
980 | ||
981 | int i; | |
982 | for (i = m_source->startOffset(); i < openBrace; ++i) | |
983 | numBOMsBeforeOpenBrace += data[i] == byteOrderMark; | |
984 | for (; i < closeBrace; ++i) | |
985 | numBOMsBetweenBraces += data[i] == byteOrderMark; | |
986 | ||
987 | return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, | |
988 | closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); | |
9dae56ea A |
989 | } |
990 | ||
991 | } // namespace JSC |