]>
Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | // -*- c-basic-offset: 2 -*- |
2 | /* | |
3 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
8537cb5c | 4 | * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved. |
b37bf2e1 A |
5 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
6 | * | |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Library General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this library; see the file COPYING.LIB. If not, write to | |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
20 | * Boston, MA 02110-1301, USA. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "lexer.h" | |
26 | ||
27 | #include "dtoa.h" | |
28 | #include "function.h" | |
29 | #include "nodes.h" | |
30 | #include "NodeInfo.h" | |
31 | #include <ctype.h> | |
32 | #include <limits.h> | |
33 | #include <string.h> | |
34 | #include <wtf/Assertions.h> | |
35 | #include <wtf/unicode/Unicode.h> | |
36 | ||
37 | using namespace WTF; | |
38 | using namespace Unicode; | |
39 | ||
40 | // we can't specify the namespace in yacc's C output, so do it here | |
41 | using namespace KJS; | |
42 | ||
43 | #ifndef KDE_USE_FINAL | |
44 | #include "grammar.h" | |
45 | #endif | |
46 | ||
47 | #include "lookup.h" | |
48 | #include "lexer.lut.h" | |
49 | ||
50 | extern YYLTYPE kjsyylloc; // global bison variable holding token info | |
51 | ||
52 | // a bridge for yacc from the C world to C++ | |
53 | int kjsyylex() | |
54 | { | |
55 | return lexer().lex(); | |
56 | } | |
57 | ||
58 | namespace KJS { | |
59 | ||
60 | static bool isDecimalDigit(int); | |
61 | ||
62 | static const size_t initialReadBufferCapacity = 32; | |
63 | static const size_t initialStringTableCapacity = 64; | |
64 | ||
65 | Lexer& lexer() | |
66 | { | |
67 | ASSERT(JSLock::currentThreadIsHoldingLock()); | |
68 | ||
69 | // FIXME: We'd like to avoid calling new here, but we don't currently | |
70 | // support tearing down the Lexer at app quit time, since that would involve | |
71 | // tearing down its UString data members without holding the JSLock. | |
72 | static Lexer* staticLexer = new Lexer; | |
73 | return *staticLexer; | |
74 | } | |
75 | ||
76 | Lexer::Lexer() | |
77 | : yylineno(1) | |
78 | , restrKeyword(false) | |
79 | , eatNextIdentifier(false) | |
80 | , stackToken(-1) | |
81 | , lastToken(-1) | |
82 | , pos(0) | |
83 | , code(0) | |
84 | , length(0) | |
85 | , atLineStart(true) | |
86 | , current(0) | |
87 | , next1(0) | |
88 | , next2(0) | |
89 | , next3(0) | |
90 | { | |
91 | m_buffer8.reserveCapacity(initialReadBufferCapacity); | |
92 | m_buffer16.reserveCapacity(initialReadBufferCapacity); | |
93 | m_strings.reserveCapacity(initialStringTableCapacity); | |
94 | m_identifiers.reserveCapacity(initialStringTableCapacity); | |
95 | } | |
96 | ||
97 | void Lexer::setCode(int startingLineNumber, const KJS::UChar *c, unsigned int len) | |
98 | { | |
8537cb5c A |
99 | yylineno = 1 + startingLineNumber; |
100 | restrKeyword = false; | |
101 | delimited = false; | |
102 | eatNextIdentifier = false; | |
103 | stackToken = -1; | |
104 | lastToken = -1; | |
105 | pos = 0; | |
106 | code = c; | |
107 | length = len; | |
108 | skipLF = false; | |
109 | skipCR = false; | |
110 | error = false; | |
111 | atLineStart = true; | |
112 | ||
113 | // read first characters | |
114 | shift(4); | |
b37bf2e1 A |
115 | } |
116 | ||
8537cb5c | 117 | void Lexer::shift(unsigned p) |
b37bf2e1 | 118 | { |
8537cb5c A |
119 | // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM, |
120 | // see <https://bugs.webkit.org/show_bug.cgi?id=4931>. | |
121 | ||
122 | while (p--) { | |
123 | current = next1; | |
124 | next1 = next2; | |
125 | next2 = next3; | |
126 | do { | |
127 | if (pos >= length) { | |
128 | next3 = -1; | |
129 | break; | |
130 | } | |
131 | next3 = code[pos++].uc; | |
132 | } while (next3 == 0xFEFF); | |
133 | } | |
b37bf2e1 A |
134 | } |
135 | ||
136 | // called on each new line | |
137 | void Lexer::nextLine() | |
138 | { | |
139 | yylineno++; | |
140 | atLineStart = true; | |
141 | } | |
142 | ||
143 | void Lexer::setDone(State s) | |
144 | { | |
145 | state = s; | |
146 | done = true; | |
147 | } | |
148 | ||
149 | int Lexer::lex() | |
150 | { | |
151 | int token = 0; | |
152 | state = Start; | |
153 | unsigned short stringType = 0; // either single or double quotes | |
154 | m_buffer8.clear(); | |
155 | m_buffer16.clear(); | |
156 | done = false; | |
157 | terminator = false; | |
158 | skipLF = false; | |
159 | skipCR = false; | |
160 | ||
161 | // did we push a token on the stack previously ? | |
162 | // (after an automatic semicolon insertion) | |
163 | if (stackToken >= 0) { | |
164 | setDone(Other); | |
165 | token = stackToken; | |
166 | stackToken = 0; | |
167 | } | |
168 | ||
169 | while (!done) { | |
170 | if (skipLF && current != '\n') // found \r but not \n afterwards | |
171 | skipLF = false; | |
172 | if (skipCR && current != '\r') // found \n but not \r afterwards | |
173 | skipCR = false; | |
174 | if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one | |
175 | { | |
176 | skipLF = false; | |
177 | skipCR = false; | |
178 | shift(1); | |
179 | } | |
180 | switch (state) { | |
181 | case Start: | |
182 | if (isWhiteSpace()) { | |
183 | // do nothing | |
184 | } else if (current == '/' && next1 == '/') { | |
185 | shift(1); | |
186 | state = InSingleLineComment; | |
187 | } else if (current == '/' && next1 == '*') { | |
188 | shift(1); | |
189 | state = InMultiLineComment; | |
190 | } else if (current == -1) { | |
191 | if (!terminator && !delimited) { | |
192 | // automatic semicolon insertion if program incomplete | |
193 | token = ';'; | |
194 | stackToken = 0; | |
195 | setDone(Other); | |
196 | } else | |
197 | setDone(Eof); | |
198 | } else if (isLineTerminator()) { | |
199 | nextLine(); | |
200 | terminator = true; | |
201 | if (restrKeyword) { | |
202 | token = ';'; | |
203 | setDone(Other); | |
204 | } | |
205 | } else if (current == '"' || current == '\'') { | |
206 | state = InString; | |
207 | stringType = static_cast<unsigned short>(current); | |
208 | } else if (isIdentStart(current)) { | |
209 | record16(current); | |
210 | state = InIdentifierOrKeyword; | |
211 | } else if (current == '\\') { | |
212 | state = InIdentifierStartUnicodeEscapeStart; | |
213 | } else if (current == '0') { | |
214 | record8(current); | |
215 | state = InNum0; | |
216 | } else if (isDecimalDigit(current)) { | |
217 | record8(current); | |
218 | state = InNum; | |
219 | } else if (current == '.' && isDecimalDigit(next1)) { | |
220 | record8(current); | |
221 | state = InDecimal; | |
222 | // <!-- marks the beginning of a line comment (for www usage) | |
223 | } else if (current == '<' && next1 == '!' && | |
224 | next2 == '-' && next3 == '-') { | |
225 | shift(3); | |
226 | state = InSingleLineComment; | |
227 | // same for --> | |
228 | } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') { | |
229 | shift(2); | |
230 | state = InSingleLineComment; | |
231 | } else { | |
232 | token = matchPunctuator(current, next1, next2, next3); | |
233 | if (token != -1) { | |
234 | setDone(Other); | |
235 | } else { | |
236 | // cerr << "encountered unknown character" << endl; | |
237 | setDone(Bad); | |
238 | } | |
239 | } | |
240 | break; | |
241 | case InString: | |
242 | if (current == stringType) { | |
243 | shift(1); | |
244 | setDone(String); | |
245 | } else if (isLineTerminator() || current == -1) { | |
246 | setDone(Bad); | |
247 | } else if (current == '\\') { | |
248 | state = InEscapeSequence; | |
249 | } else { | |
250 | record16(current); | |
251 | } | |
252 | break; | |
253 | // Escape Sequences inside of strings | |
254 | case InEscapeSequence: | |
255 | if (isOctalDigit(current)) { | |
256 | if (current >= '0' && current <= '3' && | |
257 | isOctalDigit(next1) && isOctalDigit(next2)) { | |
258 | record16(convertOctal(current, next1, next2)); | |
259 | shift(2); | |
260 | state = InString; | |
261 | } else if (isOctalDigit(current) && isOctalDigit(next1)) { | |
262 | record16(convertOctal('0', current, next1)); | |
263 | shift(1); | |
264 | state = InString; | |
265 | } else if (isOctalDigit(current)) { | |
266 | record16(convertOctal('0', '0', current)); | |
267 | state = InString; | |
268 | } else { | |
269 | setDone(Bad); | |
270 | } | |
271 | } else if (current == 'x') | |
272 | state = InHexEscape; | |
273 | else if (current == 'u') | |
274 | state = InUnicodeEscape; | |
275 | else if (isLineTerminator()) { | |
276 | nextLine(); | |
277 | state = InString; | |
278 | } else { | |
279 | record16(singleEscape(static_cast<unsigned short>(current))); | |
280 | state = InString; | |
281 | } | |
282 | break; | |
283 | case InHexEscape: | |
284 | if (isHexDigit(current) && isHexDigit(next1)) { | |
285 | state = InString; | |
286 | record16(convertHex(current, next1)); | |
287 | shift(1); | |
288 | } else if (current == stringType) { | |
289 | record16('x'); | |
290 | shift(1); | |
291 | setDone(String); | |
292 | } else { | |
293 | record16('x'); | |
294 | record16(current); | |
295 | state = InString; | |
296 | } | |
297 | break; | |
298 | case InUnicodeEscape: | |
299 | if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { | |
300 | record16(convertUnicode(current, next1, next2, next3)); | |
301 | shift(3); | |
302 | state = InString; | |
303 | } else if (current == stringType) { | |
304 | record16('u'); | |
305 | shift(1); | |
306 | setDone(String); | |
307 | } else { | |
308 | setDone(Bad); | |
309 | } | |
310 | break; | |
311 | case InSingleLineComment: | |
312 | if (isLineTerminator()) { | |
313 | nextLine(); | |
314 | terminator = true; | |
315 | if (restrKeyword) { | |
316 | token = ';'; | |
317 | setDone(Other); | |
318 | } else | |
319 | state = Start; | |
320 | } else if (current == -1) { | |
321 | setDone(Eof); | |
322 | } | |
323 | break; | |
324 | case InMultiLineComment: | |
325 | if (current == -1) { | |
326 | setDone(Bad); | |
327 | } else if (isLineTerminator()) { | |
328 | nextLine(); | |
329 | } else if (current == '*' && next1 == '/') { | |
330 | state = Start; | |
331 | shift(1); | |
332 | } | |
333 | break; | |
334 | case InIdentifierOrKeyword: | |
335 | case InIdentifier: | |
336 | if (isIdentPart(current)) | |
337 | record16(current); | |
338 | else if (current == '\\') | |
339 | state = InIdentifierPartUnicodeEscapeStart; | |
340 | else | |
341 | setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); | |
342 | break; | |
343 | case InNum0: | |
344 | if (current == 'x' || current == 'X') { | |
345 | record8(current); | |
346 | state = InHex; | |
347 | } else if (current == '.') { | |
348 | record8(current); | |
349 | state = InDecimal; | |
350 | } else if (current == 'e' || current == 'E') { | |
351 | record8(current); | |
352 | state = InExponentIndicator; | |
353 | } else if (isOctalDigit(current)) { | |
354 | record8(current); | |
355 | state = InOctal; | |
356 | } else if (isDecimalDigit(current)) { | |
357 | record8(current); | |
358 | state = InDecimal; | |
359 | } else { | |
360 | setDone(Number); | |
361 | } | |
362 | break; | |
363 | case InHex: | |
364 | if (isHexDigit(current)) { | |
365 | record8(current); | |
366 | } else { | |
367 | setDone(Hex); | |
368 | } | |
369 | break; | |
370 | case InOctal: | |
371 | if (isOctalDigit(current)) { | |
372 | record8(current); | |
373 | } | |
374 | else if (isDecimalDigit(current)) { | |
375 | record8(current); | |
376 | state = InDecimal; | |
377 | } else | |
378 | setDone(Octal); | |
379 | break; | |
380 | case InNum: | |
381 | if (isDecimalDigit(current)) { | |
382 | record8(current); | |
383 | } else if (current == '.') { | |
384 | record8(current); | |
385 | state = InDecimal; | |
386 | } else if (current == 'e' || current == 'E') { | |
387 | record8(current); | |
388 | state = InExponentIndicator; | |
389 | } else | |
390 | setDone(Number); | |
391 | break; | |
392 | case InDecimal: | |
393 | if (isDecimalDigit(current)) { | |
394 | record8(current); | |
395 | } else if (current == 'e' || current == 'E') { | |
396 | record8(current); | |
397 | state = InExponentIndicator; | |
398 | } else | |
399 | setDone(Number); | |
400 | break; | |
401 | case InExponentIndicator: | |
402 | if (current == '+' || current == '-') { | |
403 | record8(current); | |
404 | } else if (isDecimalDigit(current)) { | |
405 | record8(current); | |
406 | state = InExponent; | |
407 | } else | |
408 | setDone(Bad); | |
409 | break; | |
410 | case InExponent: | |
411 | if (isDecimalDigit(current)) { | |
412 | record8(current); | |
413 | } else | |
414 | setDone(Number); | |
415 | break; | |
416 | case InIdentifierStartUnicodeEscapeStart: | |
417 | if (current == 'u') | |
418 | state = InIdentifierStartUnicodeEscape; | |
419 | else | |
420 | setDone(Bad); | |
421 | break; | |
422 | case InIdentifierPartUnicodeEscapeStart: | |
423 | if (current == 'u') | |
424 | state = InIdentifierPartUnicodeEscape; | |
425 | else | |
426 | setDone(Bad); | |
427 | break; | |
428 | case InIdentifierStartUnicodeEscape: | |
429 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { | |
430 | setDone(Bad); | |
431 | break; | |
432 | } | |
433 | token = convertUnicode(current, next1, next2, next3).uc; | |
434 | shift(3); | |
435 | if (!isIdentStart(token)) { | |
436 | setDone(Bad); | |
437 | break; | |
438 | } | |
439 | record16(token); | |
440 | state = InIdentifier; | |
441 | break; | |
442 | case InIdentifierPartUnicodeEscape: | |
443 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { | |
444 | setDone(Bad); | |
445 | break; | |
446 | } | |
447 | token = convertUnicode(current, next1, next2, next3).uc; | |
448 | shift(3); | |
449 | if (!isIdentPart(token)) { | |
450 | setDone(Bad); | |
451 | break; | |
452 | } | |
453 | record16(token); | |
454 | state = InIdentifier; | |
455 | break; | |
456 | default: | |
457 | ASSERT(!"Unhandled state in switch statement"); | |
458 | } | |
459 | ||
460 | // move on to the next character | |
461 | if (!done) | |
462 | shift(1); | |
463 | if (state != Start && state != InSingleLineComment) | |
464 | atLineStart = false; | |
465 | } | |
466 | ||
467 | // no identifiers allowed directly after numeric literal, e.g. "3in" is bad | |
468 | if ((state == Number || state == Octal || state == Hex) && isIdentStart(current)) | |
469 | state = Bad; | |
470 | ||
471 | // terminate string | |
472 | m_buffer8.append('\0'); | |
473 | ||
474 | #ifdef KJS_DEBUG_LEX | |
475 | fprintf(stderr, "line: %d ", lineNo()); | |
476 | fprintf(stderr, "yytext (%x): ", m_buffer8[0]); | |
477 | fprintf(stderr, "%s ", buffer8.data()); | |
478 | #endif | |
479 | ||
480 | double dval = 0; | |
481 | if (state == Number) { | |
482 | dval = kjs_strtod(m_buffer8.data(), 0L); | |
483 | } else if (state == Hex) { // scan hex numbers | |
484 | const char* p = m_buffer8.data() + 2; | |
485 | while (char c = *p++) { | |
486 | dval *= 16; | |
487 | dval += convertHex(c); | |
488 | } | |
489 | ||
490 | if (dval >= mantissaOverflowLowerBound) | |
491 | dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); | |
492 | ||
493 | state = Number; | |
494 | } else if (state == Octal) { // scan octal number | |
495 | const char* p = m_buffer8.data() + 1; | |
496 | while (char c = *p++) { | |
497 | dval *= 8; | |
498 | dval += c - '0'; | |
499 | } | |
500 | ||
501 | if (dval >= mantissaOverflowLowerBound) | |
502 | dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); | |
503 | ||
504 | state = Number; | |
505 | } | |
506 | ||
507 | #ifdef KJS_DEBUG_LEX | |
508 | switch (state) { | |
509 | case Eof: | |
510 | printf("(EOF)\n"); | |
511 | break; | |
512 | case Other: | |
513 | printf("(Other)\n"); | |
514 | break; | |
515 | case Identifier: | |
516 | printf("(Identifier)/(Keyword)\n"); | |
517 | break; | |
518 | case String: | |
519 | printf("(String)\n"); | |
520 | break; | |
521 | case Number: | |
522 | printf("(Number)\n"); | |
523 | break; | |
524 | default: | |
525 | printf("(unknown)"); | |
526 | } | |
527 | #endif | |
528 | ||
529 | if (state != Identifier && eatNextIdentifier) | |
530 | eatNextIdentifier = false; | |
531 | ||
532 | restrKeyword = false; | |
533 | delimited = false; | |
534 | kjsyylloc.first_line = yylineno; // ??? | |
535 | kjsyylloc.last_line = yylineno; | |
536 | ||
537 | switch (state) { | |
538 | case Eof: | |
539 | token = 0; | |
540 | break; | |
541 | case Other: | |
542 | if(token == '}' || token == ';') { | |
543 | delimited = true; | |
544 | } | |
545 | break; | |
546 | case IdentifierOrKeyword: | |
547 | if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) { | |
548 | case Identifier: | |
549 | // Lookup for keyword failed, means this is an identifier | |
550 | // Apply anonymous-function hack below (eat the identifier) | |
551 | if (eatNextIdentifier) { | |
552 | eatNextIdentifier = false; | |
553 | token = lex(); | |
554 | break; | |
555 | } | |
556 | kjsyylval.ident = makeIdentifier(m_buffer16); | |
557 | token = IDENT; | |
558 | break; | |
559 | } | |
560 | ||
561 | eatNextIdentifier = false; | |
562 | // Hack for "f = function somename() { ... }", too hard to get into the grammar | |
563 | if (token == FUNCTION && lastToken == '=' ) | |
564 | eatNextIdentifier = true; | |
565 | ||
566 | if (token == CONTINUE || token == BREAK || | |
567 | token == RETURN || token == THROW) | |
568 | restrKeyword = true; | |
569 | break; | |
570 | case String: | |
571 | kjsyylval.string = makeUString(m_buffer16); | |
572 | token = STRING; | |
573 | break; | |
574 | case Number: | |
575 | kjsyylval.doubleValue = dval; | |
576 | token = NUMBER; | |
577 | break; | |
578 | case Bad: | |
579 | #ifdef KJS_DEBUG_LEX | |
580 | fprintf(stderr, "yylex: ERROR.\n"); | |
581 | #endif | |
582 | error = true; | |
583 | return -1; | |
584 | default: | |
585 | ASSERT(!"unhandled numeration value in switch"); | |
586 | error = true; | |
587 | return -1; | |
588 | } | |
589 | lastToken = token; | |
590 | return token; | |
591 | } | |
592 | ||
593 | bool Lexer::isWhiteSpace() const | |
594 | { | |
595 | return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current); | |
596 | } | |
597 | ||
598 | bool Lexer::isLineTerminator() | |
599 | { | |
600 | bool cr = (current == '\r'); | |
601 | bool lf = (current == '\n'); | |
602 | if (cr) | |
603 | skipLF = true; | |
604 | else if (lf) | |
605 | skipCR = true; | |
606 | return cr || lf || current == 0x2028 || current == 0x2029; | |
607 | } | |
608 | ||
609 | bool Lexer::isIdentStart(int c) | |
610 | { | |
611 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)) | |
612 | || c == '$' || c == '_'; | |
613 | } | |
614 | ||
615 | bool Lexer::isIdentPart(int c) | |
616 | { | |
617 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | |
618 | | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) | |
619 | || c == '$' || c == '_'; | |
620 | } | |
621 | ||
622 | static bool isDecimalDigit(int c) | |
623 | { | |
624 | return (c >= '0' && c <= '9'); | |
625 | } | |
626 | ||
627 | bool Lexer::isHexDigit(int c) | |
628 | { | |
629 | return (c >= '0' && c <= '9' || | |
630 | c >= 'a' && c <= 'f' || | |
631 | c >= 'A' && c <= 'F'); | |
632 | } | |
633 | ||
634 | bool Lexer::isOctalDigit(int c) | |
635 | { | |
636 | return (c >= '0' && c <= '7'); | |
637 | } | |
638 | ||
639 | int Lexer::matchPunctuator(int c1, int c2, int c3, int c4) | |
640 | { | |
641 | if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { | |
642 | shift(4); | |
643 | return URSHIFTEQUAL; | |
644 | } else if (c1 == '=' && c2 == '=' && c3 == '=') { | |
645 | shift(3); | |
646 | return STREQ; | |
647 | } else if (c1 == '!' && c2 == '=' && c3 == '=') { | |
648 | shift(3); | |
649 | return STRNEQ; | |
650 | } else if (c1 == '>' && c2 == '>' && c3 == '>') { | |
651 | shift(3); | |
652 | return URSHIFT; | |
653 | } else if (c1 == '<' && c2 == '<' && c3 == '=') { | |
654 | shift(3); | |
655 | return LSHIFTEQUAL; | |
656 | } else if (c1 == '>' && c2 == '>' && c3 == '=') { | |
657 | shift(3); | |
658 | return RSHIFTEQUAL; | |
659 | } else if (c1 == '<' && c2 == '=') { | |
660 | shift(2); | |
661 | return LE; | |
662 | } else if (c1 == '>' && c2 == '=') { | |
663 | shift(2); | |
664 | return GE; | |
665 | } else if (c1 == '!' && c2 == '=') { | |
666 | shift(2); | |
667 | return NE; | |
668 | } else if (c1 == '+' && c2 == '+') { | |
669 | shift(2); | |
670 | if (terminator) | |
671 | return AUTOPLUSPLUS; | |
672 | else | |
673 | return PLUSPLUS; | |
674 | } else if (c1 == '-' && c2 == '-') { | |
675 | shift(2); | |
676 | if (terminator) | |
677 | return AUTOMINUSMINUS; | |
678 | else | |
679 | return MINUSMINUS; | |
680 | } else if (c1 == '=' && c2 == '=') { | |
681 | shift(2); | |
682 | return EQEQ; | |
683 | } else if (c1 == '+' && c2 == '=') { | |
684 | shift(2); | |
685 | return PLUSEQUAL; | |
686 | } else if (c1 == '-' && c2 == '=') { | |
687 | shift(2); | |
688 | return MINUSEQUAL; | |
689 | } else if (c1 == '*' && c2 == '=') { | |
690 | shift(2); | |
691 | return MULTEQUAL; | |
692 | } else if (c1 == '/' && c2 == '=') { | |
693 | shift(2); | |
694 | return DIVEQUAL; | |
695 | } else if (c1 == '&' && c2 == '=') { | |
696 | shift(2); | |
697 | return ANDEQUAL; | |
698 | } else if (c1 == '^' && c2 == '=') { | |
699 | shift(2); | |
700 | return XOREQUAL; | |
701 | } else if (c1 == '%' && c2 == '=') { | |
702 | shift(2); | |
703 | return MODEQUAL; | |
704 | } else if (c1 == '|' && c2 == '=') { | |
705 | shift(2); | |
706 | return OREQUAL; | |
707 | } else if (c1 == '<' && c2 == '<') { | |
708 | shift(2); | |
709 | return LSHIFT; | |
710 | } else if (c1 == '>' && c2 == '>') { | |
711 | shift(2); | |
712 | return RSHIFT; | |
713 | } else if (c1 == '&' && c2 == '&') { | |
714 | shift(2); | |
715 | return AND; | |
716 | } else if (c1 == '|' && c2 == '|') { | |
717 | shift(2); | |
718 | return OR; | |
719 | } | |
720 | ||
721 | switch(c1) { | |
722 | case '=': | |
723 | case '>': | |
724 | case '<': | |
725 | case ',': | |
726 | case '!': | |
727 | case '~': | |
728 | case '?': | |
729 | case ':': | |
730 | case '.': | |
731 | case '+': | |
732 | case '-': | |
733 | case '*': | |
734 | case '/': | |
735 | case '&': | |
736 | case '|': | |
737 | case '^': | |
738 | case '%': | |
739 | case '(': | |
740 | case ')': | |
741 | case '{': | |
742 | case '}': | |
743 | case '[': | |
744 | case ']': | |
745 | case ';': | |
746 | shift(1); | |
747 | return static_cast<int>(c1); | |
748 | default: | |
749 | return -1; | |
750 | } | |
751 | } | |
752 | ||
753 | unsigned short Lexer::singleEscape(unsigned short c) | |
754 | { | |
755 | switch(c) { | |
756 | case 'b': | |
757 | return 0x08; | |
758 | case 't': | |
759 | return 0x09; | |
760 | case 'n': | |
761 | return 0x0A; | |
762 | case 'v': | |
763 | return 0x0B; | |
764 | case 'f': | |
765 | return 0x0C; | |
766 | case 'r': | |
767 | return 0x0D; | |
768 | case '"': | |
769 | return 0x22; | |
770 | case '\'': | |
771 | return 0x27; | |
772 | case '\\': | |
773 | return 0x5C; | |
774 | default: | |
775 | return c; | |
776 | } | |
777 | } | |
778 | ||
779 | unsigned short Lexer::convertOctal(int c1, int c2, int c3) | |
780 | { | |
781 | return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); | |
782 | } | |
783 | ||
784 | unsigned char Lexer::convertHex(int c) | |
785 | { | |
786 | if (c >= '0' && c <= '9') | |
787 | return static_cast<unsigned char>(c - '0'); | |
788 | if (c >= 'a' && c <= 'f') | |
789 | return static_cast<unsigned char>(c - 'a' + 10); | |
790 | return static_cast<unsigned char>(c - 'A' + 10); | |
791 | } | |
792 | ||
793 | unsigned char Lexer::convertHex(int c1, int c2) | |
794 | { | |
795 | return ((convertHex(c1) << 4) + convertHex(c2)); | |
796 | } | |
797 | ||
798 | KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) | |
799 | { | |
800 | return KJS::UChar((convertHex(c1) << 4) + convertHex(c2), | |
801 | (convertHex(c3) << 4) + convertHex(c4)); | |
802 | } | |
803 | ||
804 | void Lexer::record8(int c) | |
805 | { | |
806 | ASSERT(c >= 0); | |
807 | ASSERT(c <= 0xff); | |
808 | m_buffer8.append(static_cast<char>(c)); | |
809 | } | |
810 | ||
811 | void Lexer::record16(int c) | |
812 | { | |
813 | ASSERT(c >= 0); | |
814 | ASSERT(c <= USHRT_MAX); | |
815 | record16(UChar(static_cast<unsigned short>(c))); | |
816 | } | |
817 | ||
818 | void Lexer::record16(KJS::UChar c) | |
819 | { | |
820 | m_buffer16.append(c); | |
821 | } | |
822 | ||
823 | bool Lexer::scanRegExp() | |
824 | { | |
825 | m_buffer16.clear(); | |
826 | bool lastWasEscape = false; | |
827 | bool inBrackets = false; | |
828 | ||
829 | while (1) { | |
830 | if (isLineTerminator() || current == -1) | |
831 | return false; | |
832 | else if (current != '/' || lastWasEscape == true || inBrackets == true) | |
833 | { | |
834 | // keep track of '[' and ']' | |
835 | if (!lastWasEscape) { | |
836 | if ( current == '[' && !inBrackets ) | |
837 | inBrackets = true; | |
838 | if ( current == ']' && inBrackets ) | |
839 | inBrackets = false; | |
840 | } | |
841 | record16(current); | |
842 | lastWasEscape = | |
843 | !lastWasEscape && (current == '\\'); | |
844 | } else { // end of regexp | |
845 | m_pattern = UString(m_buffer16); | |
846 | m_buffer16.clear(); | |
847 | shift(1); | |
848 | break; | |
849 | } | |
850 | shift(1); | |
851 | } | |
852 | ||
853 | while (isIdentPart(current)) { | |
854 | record16(current); | |
855 | shift(1); | |
856 | } | |
857 | m_flags = UString(m_buffer16); | |
858 | ||
859 | return true; | |
860 | } | |
861 | ||
862 | void Lexer::clear() | |
863 | { | |
864 | deleteAllValues(m_strings); | |
865 | Vector<UString*> newStrings; | |
866 | newStrings.reserveCapacity(initialStringTableCapacity); | |
867 | m_strings.swap(newStrings); | |
868 | ||
869 | deleteAllValues(m_identifiers); | |
870 | Vector<KJS::Identifier*> newIdentifiers; | |
871 | newIdentifiers.reserveCapacity(initialStringTableCapacity); | |
872 | m_identifiers.swap(newIdentifiers); | |
873 | ||
874 | Vector<char> newBuffer8; | |
875 | newBuffer8.reserveCapacity(initialReadBufferCapacity); | |
876 | m_buffer8.swap(newBuffer8); | |
877 | ||
878 | Vector<UChar> newBuffer16; | |
879 | newBuffer16.reserveCapacity(initialReadBufferCapacity); | |
880 | m_buffer16.swap(newBuffer16); | |
881 | ||
882 | m_pattern = 0; | |
883 | m_flags = 0; | |
884 | } | |
885 | ||
886 | Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer) | |
887 | { | |
888 | KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size()); | |
889 | m_identifiers.append(identifier); | |
890 | return identifier; | |
891 | } | |
892 | ||
893 | UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer) | |
894 | { | |
895 | UString* string = new UString(buffer); | |
896 | m_strings.append(string); | |
897 | return string; | |
898 | } | |
899 | ||
900 | } // namespace KJS |