]>
Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | // -*- c-basic-offset: 2 -*- |
2 | /* | |
3 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
4 | * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved. | |
5 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | |
6 | * | |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Library General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this library; see the file COPYING.LIB. If not, write to | |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
20 | * Boston, MA 02110-1301, USA. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "lexer.h" | |
26 | ||
27 | #include "dtoa.h" | |
28 | #include "function.h" | |
29 | #include "nodes.h" | |
30 | #include "NodeInfo.h" | |
31 | #include <ctype.h> | |
32 | #include <limits.h> | |
33 | #include <string.h> | |
34 | #include <wtf/Assertions.h> | |
35 | #include <wtf/unicode/Unicode.h> | |
36 | ||
37 | using namespace WTF; | |
38 | using namespace Unicode; | |
39 | ||
40 | // we can't specify the namespace in yacc's C output, so do it here | |
41 | using namespace KJS; | |
42 | ||
43 | #ifndef KDE_USE_FINAL | |
44 | #include "grammar.h" | |
45 | #endif | |
46 | ||
47 | #include "lookup.h" | |
48 | #include "lexer.lut.h" | |
49 | ||
50 | extern YYLTYPE kjsyylloc; // global bison variable holding token info | |
51 | ||
52 | // a bridge for yacc from the C world to C++ | |
53 | int kjsyylex() | |
54 | { | |
55 | return lexer().lex(); | |
56 | } | |
57 | ||
58 | namespace KJS { | |
59 | ||
60 | static bool isDecimalDigit(int); | |
61 | ||
62 | static const size_t initialReadBufferCapacity = 32; | |
63 | static const size_t initialStringTableCapacity = 64; | |
64 | ||
65 | Lexer& lexer() | |
66 | { | |
67 | ASSERT(JSLock::currentThreadIsHoldingLock()); | |
68 | ||
69 | // FIXME: We'd like to avoid calling new here, but we don't currently | |
70 | // support tearing down the Lexer at app quit time, since that would involve | |
71 | // tearing down its UString data members without holding the JSLock. | |
72 | static Lexer* staticLexer = new Lexer; | |
73 | return *staticLexer; | |
74 | } | |
75 | ||
76 | Lexer::Lexer() | |
77 | : yylineno(1) | |
78 | , restrKeyword(false) | |
79 | , eatNextIdentifier(false) | |
80 | , stackToken(-1) | |
81 | , lastToken(-1) | |
82 | , pos(0) | |
83 | , code(0) | |
84 | , length(0) | |
85 | , atLineStart(true) | |
86 | , current(0) | |
87 | , next1(0) | |
88 | , next2(0) | |
89 | , next3(0) | |
90 | { | |
91 | m_buffer8.reserveCapacity(initialReadBufferCapacity); | |
92 | m_buffer16.reserveCapacity(initialReadBufferCapacity); | |
93 | m_strings.reserveCapacity(initialStringTableCapacity); | |
94 | m_identifiers.reserveCapacity(initialStringTableCapacity); | |
95 | } | |
96 | ||
97 | void Lexer::setCode(int startingLineNumber, const KJS::UChar *c, unsigned int len) | |
98 | { | |
99 | yylineno = 1 + startingLineNumber; | |
100 | restrKeyword = false; | |
101 | delimited = false; | |
102 | eatNextIdentifier = false; | |
103 | stackToken = -1; | |
104 | lastToken = -1; | |
105 | pos = 0; | |
106 | code = c; | |
107 | length = len; | |
108 | skipLF = false; | |
109 | skipCR = false; | |
110 | error = false; | |
111 | atLineStart = true; | |
112 | ||
113 | // read first characters | |
114 | current = (length > 0) ? code[0].uc : -1; | |
115 | next1 = (length > 1) ? code[1].uc : -1; | |
116 | next2 = (length > 2) ? code[2].uc : -1; | |
117 | next3 = (length > 3) ? code[3].uc : -1; | |
118 | } | |
119 | ||
120 | void Lexer::shift(unsigned int p) | |
121 | { | |
122 | // Here would be a good place to strip Cf characters, but that has caused compatibility problems: | |
123 | // <http://bugs.webkit.org/show_bug.cgi?id=10183>. | |
124 | while (p--) { | |
125 | pos++; | |
126 | current = next1; | |
127 | next1 = next2; | |
128 | next2 = next3; | |
129 | next3 = (pos + 3 < length) ? code[pos + 3].uc : -1; | |
130 | } | |
131 | } | |
132 | ||
133 | // called on each new line | |
134 | void Lexer::nextLine() | |
135 | { | |
136 | yylineno++; | |
137 | atLineStart = true; | |
138 | } | |
139 | ||
140 | void Lexer::setDone(State s) | |
141 | { | |
142 | state = s; | |
143 | done = true; | |
144 | } | |
145 | ||
146 | int Lexer::lex() | |
147 | { | |
148 | int token = 0; | |
149 | state = Start; | |
150 | unsigned short stringType = 0; // either single or double quotes | |
151 | m_buffer8.clear(); | |
152 | m_buffer16.clear(); | |
153 | done = false; | |
154 | terminator = false; | |
155 | skipLF = false; | |
156 | skipCR = false; | |
157 | ||
158 | // did we push a token on the stack previously ? | |
159 | // (after an automatic semicolon insertion) | |
160 | if (stackToken >= 0) { | |
161 | setDone(Other); | |
162 | token = stackToken; | |
163 | stackToken = 0; | |
164 | } | |
165 | ||
166 | while (!done) { | |
167 | if (skipLF && current != '\n') // found \r but not \n afterwards | |
168 | skipLF = false; | |
169 | if (skipCR && current != '\r') // found \n but not \r afterwards | |
170 | skipCR = false; | |
171 | if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one | |
172 | { | |
173 | skipLF = false; | |
174 | skipCR = false; | |
175 | shift(1); | |
176 | } | |
177 | switch (state) { | |
178 | case Start: | |
179 | if (isWhiteSpace()) { | |
180 | // do nothing | |
181 | } else if (current == '/' && next1 == '/') { | |
182 | shift(1); | |
183 | state = InSingleLineComment; | |
184 | } else if (current == '/' && next1 == '*') { | |
185 | shift(1); | |
186 | state = InMultiLineComment; | |
187 | } else if (current == -1) { | |
188 | if (!terminator && !delimited) { | |
189 | // automatic semicolon insertion if program incomplete | |
190 | token = ';'; | |
191 | stackToken = 0; | |
192 | setDone(Other); | |
193 | } else | |
194 | setDone(Eof); | |
195 | } else if (isLineTerminator()) { | |
196 | nextLine(); | |
197 | terminator = true; | |
198 | if (restrKeyword) { | |
199 | token = ';'; | |
200 | setDone(Other); | |
201 | } | |
202 | } else if (current == '"' || current == '\'') { | |
203 | state = InString; | |
204 | stringType = static_cast<unsigned short>(current); | |
205 | } else if (isIdentStart(current)) { | |
206 | record16(current); | |
207 | state = InIdentifierOrKeyword; | |
208 | } else if (current == '\\') { | |
209 | state = InIdentifierStartUnicodeEscapeStart; | |
210 | } else if (current == '0') { | |
211 | record8(current); | |
212 | state = InNum0; | |
213 | } else if (isDecimalDigit(current)) { | |
214 | record8(current); | |
215 | state = InNum; | |
216 | } else if (current == '.' && isDecimalDigit(next1)) { | |
217 | record8(current); | |
218 | state = InDecimal; | |
219 | // <!-- marks the beginning of a line comment (for www usage) | |
220 | } else if (current == '<' && next1 == '!' && | |
221 | next2 == '-' && next3 == '-') { | |
222 | shift(3); | |
223 | state = InSingleLineComment; | |
224 | // same for --> | |
225 | } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') { | |
226 | shift(2); | |
227 | state = InSingleLineComment; | |
228 | } else { | |
229 | token = matchPunctuator(current, next1, next2, next3); | |
230 | if (token != -1) { | |
231 | setDone(Other); | |
232 | } else { | |
233 | // cerr << "encountered unknown character" << endl; | |
234 | setDone(Bad); | |
235 | } | |
236 | } | |
237 | break; | |
238 | case InString: | |
239 | if (current == stringType) { | |
240 | shift(1); | |
241 | setDone(String); | |
242 | } else if (isLineTerminator() || current == -1) { | |
243 | setDone(Bad); | |
244 | } else if (current == '\\') { | |
245 | state = InEscapeSequence; | |
246 | } else { | |
247 | record16(current); | |
248 | } | |
249 | break; | |
250 | // Escape Sequences inside of strings | |
251 | case InEscapeSequence: | |
252 | if (isOctalDigit(current)) { | |
253 | if (current >= '0' && current <= '3' && | |
254 | isOctalDigit(next1) && isOctalDigit(next2)) { | |
255 | record16(convertOctal(current, next1, next2)); | |
256 | shift(2); | |
257 | state = InString; | |
258 | } else if (isOctalDigit(current) && isOctalDigit(next1)) { | |
259 | record16(convertOctal('0', current, next1)); | |
260 | shift(1); | |
261 | state = InString; | |
262 | } else if (isOctalDigit(current)) { | |
263 | record16(convertOctal('0', '0', current)); | |
264 | state = InString; | |
265 | } else { | |
266 | setDone(Bad); | |
267 | } | |
268 | } else if (current == 'x') | |
269 | state = InHexEscape; | |
270 | else if (current == 'u') | |
271 | state = InUnicodeEscape; | |
272 | else if (isLineTerminator()) { | |
273 | nextLine(); | |
274 | state = InString; | |
275 | } else { | |
276 | record16(singleEscape(static_cast<unsigned short>(current))); | |
277 | state = InString; | |
278 | } | |
279 | break; | |
280 | case InHexEscape: | |
281 | if (isHexDigit(current) && isHexDigit(next1)) { | |
282 | state = InString; | |
283 | record16(convertHex(current, next1)); | |
284 | shift(1); | |
285 | } else if (current == stringType) { | |
286 | record16('x'); | |
287 | shift(1); | |
288 | setDone(String); | |
289 | } else { | |
290 | record16('x'); | |
291 | record16(current); | |
292 | state = InString; | |
293 | } | |
294 | break; | |
295 | case InUnicodeEscape: | |
296 | if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { | |
297 | record16(convertUnicode(current, next1, next2, next3)); | |
298 | shift(3); | |
299 | state = InString; | |
300 | } else if (current == stringType) { | |
301 | record16('u'); | |
302 | shift(1); | |
303 | setDone(String); | |
304 | } else { | |
305 | setDone(Bad); | |
306 | } | |
307 | break; | |
308 | case InSingleLineComment: | |
309 | if (isLineTerminator()) { | |
310 | nextLine(); | |
311 | terminator = true; | |
312 | if (restrKeyword) { | |
313 | token = ';'; | |
314 | setDone(Other); | |
315 | } else | |
316 | state = Start; | |
317 | } else if (current == -1) { | |
318 | setDone(Eof); | |
319 | } | |
320 | break; | |
321 | case InMultiLineComment: | |
322 | if (current == -1) { | |
323 | setDone(Bad); | |
324 | } else if (isLineTerminator()) { | |
325 | nextLine(); | |
326 | } else if (current == '*' && next1 == '/') { | |
327 | state = Start; | |
328 | shift(1); | |
329 | } | |
330 | break; | |
331 | case InIdentifierOrKeyword: | |
332 | case InIdentifier: | |
333 | if (isIdentPart(current)) | |
334 | record16(current); | |
335 | else if (current == '\\') | |
336 | state = InIdentifierPartUnicodeEscapeStart; | |
337 | else | |
338 | setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); | |
339 | break; | |
340 | case InNum0: | |
341 | if (current == 'x' || current == 'X') { | |
342 | record8(current); | |
343 | state = InHex; | |
344 | } else if (current == '.') { | |
345 | record8(current); | |
346 | state = InDecimal; | |
347 | } else if (current == 'e' || current == 'E') { | |
348 | record8(current); | |
349 | state = InExponentIndicator; | |
350 | } else if (isOctalDigit(current)) { | |
351 | record8(current); | |
352 | state = InOctal; | |
353 | } else if (isDecimalDigit(current)) { | |
354 | record8(current); | |
355 | state = InDecimal; | |
356 | } else { | |
357 | setDone(Number); | |
358 | } | |
359 | break; | |
360 | case InHex: | |
361 | if (isHexDigit(current)) { | |
362 | record8(current); | |
363 | } else { | |
364 | setDone(Hex); | |
365 | } | |
366 | break; | |
367 | case InOctal: | |
368 | if (isOctalDigit(current)) { | |
369 | record8(current); | |
370 | } | |
371 | else if (isDecimalDigit(current)) { | |
372 | record8(current); | |
373 | state = InDecimal; | |
374 | } else | |
375 | setDone(Octal); | |
376 | break; | |
377 | case InNum: | |
378 | if (isDecimalDigit(current)) { | |
379 | record8(current); | |
380 | } else if (current == '.') { | |
381 | record8(current); | |
382 | state = InDecimal; | |
383 | } else if (current == 'e' || current == 'E') { | |
384 | record8(current); | |
385 | state = InExponentIndicator; | |
386 | } else | |
387 | setDone(Number); | |
388 | break; | |
389 | case InDecimal: | |
390 | if (isDecimalDigit(current)) { | |
391 | record8(current); | |
392 | } else if (current == 'e' || current == 'E') { | |
393 | record8(current); | |
394 | state = InExponentIndicator; | |
395 | } else | |
396 | setDone(Number); | |
397 | break; | |
398 | case InExponentIndicator: | |
399 | if (current == '+' || current == '-') { | |
400 | record8(current); | |
401 | } else if (isDecimalDigit(current)) { | |
402 | record8(current); | |
403 | state = InExponent; | |
404 | } else | |
405 | setDone(Bad); | |
406 | break; | |
407 | case InExponent: | |
408 | if (isDecimalDigit(current)) { | |
409 | record8(current); | |
410 | } else | |
411 | setDone(Number); | |
412 | break; | |
413 | case InIdentifierStartUnicodeEscapeStart: | |
414 | if (current == 'u') | |
415 | state = InIdentifierStartUnicodeEscape; | |
416 | else | |
417 | setDone(Bad); | |
418 | break; | |
419 | case InIdentifierPartUnicodeEscapeStart: | |
420 | if (current == 'u') | |
421 | state = InIdentifierPartUnicodeEscape; | |
422 | else | |
423 | setDone(Bad); | |
424 | break; | |
425 | case InIdentifierStartUnicodeEscape: | |
426 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { | |
427 | setDone(Bad); | |
428 | break; | |
429 | } | |
430 | token = convertUnicode(current, next1, next2, next3).uc; | |
431 | shift(3); | |
432 | if (!isIdentStart(token)) { | |
433 | setDone(Bad); | |
434 | break; | |
435 | } | |
436 | record16(token); | |
437 | state = InIdentifier; | |
438 | break; | |
439 | case InIdentifierPartUnicodeEscape: | |
440 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { | |
441 | setDone(Bad); | |
442 | break; | |
443 | } | |
444 | token = convertUnicode(current, next1, next2, next3).uc; | |
445 | shift(3); | |
446 | if (!isIdentPart(token)) { | |
447 | setDone(Bad); | |
448 | break; | |
449 | } | |
450 | record16(token); | |
451 | state = InIdentifier; | |
452 | break; | |
453 | default: | |
454 | ASSERT(!"Unhandled state in switch statement"); | |
455 | } | |
456 | ||
457 | // move on to the next character | |
458 | if (!done) | |
459 | shift(1); | |
460 | if (state != Start && state != InSingleLineComment) | |
461 | atLineStart = false; | |
462 | } | |
463 | ||
464 | // no identifiers allowed directly after numeric literal, e.g. "3in" is bad | |
465 | if ((state == Number || state == Octal || state == Hex) && isIdentStart(current)) | |
466 | state = Bad; | |
467 | ||
468 | // terminate string | |
469 | m_buffer8.append('\0'); | |
470 | ||
471 | #ifdef KJS_DEBUG_LEX | |
472 | fprintf(stderr, "line: %d ", lineNo()); | |
473 | fprintf(stderr, "yytext (%x): ", m_buffer8[0]); | |
474 | fprintf(stderr, "%s ", buffer8.data()); | |
475 | #endif | |
476 | ||
477 | double dval = 0; | |
478 | if (state == Number) { | |
479 | dval = kjs_strtod(m_buffer8.data(), 0L); | |
480 | } else if (state == Hex) { // scan hex numbers | |
481 | const char* p = m_buffer8.data() + 2; | |
482 | while (char c = *p++) { | |
483 | dval *= 16; | |
484 | dval += convertHex(c); | |
485 | } | |
486 | ||
487 | if (dval >= mantissaOverflowLowerBound) | |
488 | dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); | |
489 | ||
490 | state = Number; | |
491 | } else if (state == Octal) { // scan octal number | |
492 | const char* p = m_buffer8.data() + 1; | |
493 | while (char c = *p++) { | |
494 | dval *= 8; | |
495 | dval += c - '0'; | |
496 | } | |
497 | ||
498 | if (dval >= mantissaOverflowLowerBound) | |
499 | dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); | |
500 | ||
501 | state = Number; | |
502 | } | |
503 | ||
504 | #ifdef KJS_DEBUG_LEX | |
505 | switch (state) { | |
506 | case Eof: | |
507 | printf("(EOF)\n"); | |
508 | break; | |
509 | case Other: | |
510 | printf("(Other)\n"); | |
511 | break; | |
512 | case Identifier: | |
513 | printf("(Identifier)/(Keyword)\n"); | |
514 | break; | |
515 | case String: | |
516 | printf("(String)\n"); | |
517 | break; | |
518 | case Number: | |
519 | printf("(Number)\n"); | |
520 | break; | |
521 | default: | |
522 | printf("(unknown)"); | |
523 | } | |
524 | #endif | |
525 | ||
526 | if (state != Identifier && eatNextIdentifier) | |
527 | eatNextIdentifier = false; | |
528 | ||
529 | restrKeyword = false; | |
530 | delimited = false; | |
531 | kjsyylloc.first_line = yylineno; // ??? | |
532 | kjsyylloc.last_line = yylineno; | |
533 | ||
534 | switch (state) { | |
535 | case Eof: | |
536 | token = 0; | |
537 | break; | |
538 | case Other: | |
539 | if(token == '}' || token == ';') { | |
540 | delimited = true; | |
541 | } | |
542 | break; | |
543 | case IdentifierOrKeyword: | |
544 | if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) { | |
545 | case Identifier: | |
546 | // Lookup for keyword failed, means this is an identifier | |
547 | // Apply anonymous-function hack below (eat the identifier) | |
548 | if (eatNextIdentifier) { | |
549 | eatNextIdentifier = false; | |
550 | token = lex(); | |
551 | break; | |
552 | } | |
553 | kjsyylval.ident = makeIdentifier(m_buffer16); | |
554 | token = IDENT; | |
555 | break; | |
556 | } | |
557 | ||
558 | eatNextIdentifier = false; | |
559 | // Hack for "f = function somename() { ... }", too hard to get into the grammar | |
560 | if (token == FUNCTION && lastToken == '=' ) | |
561 | eatNextIdentifier = true; | |
562 | ||
563 | if (token == CONTINUE || token == BREAK || | |
564 | token == RETURN || token == THROW) | |
565 | restrKeyword = true; | |
566 | break; | |
567 | case String: | |
568 | kjsyylval.string = makeUString(m_buffer16); | |
569 | token = STRING; | |
570 | break; | |
571 | case Number: | |
572 | kjsyylval.doubleValue = dval; | |
573 | token = NUMBER; | |
574 | break; | |
575 | case Bad: | |
576 | #ifdef KJS_DEBUG_LEX | |
577 | fprintf(stderr, "yylex: ERROR.\n"); | |
578 | #endif | |
579 | error = true; | |
580 | return -1; | |
581 | default: | |
582 | ASSERT(!"unhandled numeration value in switch"); | |
583 | error = true; | |
584 | return -1; | |
585 | } | |
586 | lastToken = token; | |
587 | return token; | |
588 | } | |
589 | ||
590 | bool Lexer::isWhiteSpace() const | |
591 | { | |
592 | return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current); | |
593 | } | |
594 | ||
595 | bool Lexer::isLineTerminator() | |
596 | { | |
597 | bool cr = (current == '\r'); | |
598 | bool lf = (current == '\n'); | |
599 | if (cr) | |
600 | skipLF = true; | |
601 | else if (lf) | |
602 | skipCR = true; | |
603 | return cr || lf || current == 0x2028 || current == 0x2029; | |
604 | } | |
605 | ||
606 | bool Lexer::isIdentStart(int c) | |
607 | { | |
608 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)) | |
609 | || c == '$' || c == '_'; | |
610 | } | |
611 | ||
612 | bool Lexer::isIdentPart(int c) | |
613 | { | |
614 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | |
615 | | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) | |
616 | || c == '$' || c == '_'; | |
617 | } | |
618 | ||
619 | static bool isDecimalDigit(int c) | |
620 | { | |
621 | return (c >= '0' && c <= '9'); | |
622 | } | |
623 | ||
624 | bool Lexer::isHexDigit(int c) | |
625 | { | |
626 | return (c >= '0' && c <= '9' || | |
627 | c >= 'a' && c <= 'f' || | |
628 | c >= 'A' && c <= 'F'); | |
629 | } | |
630 | ||
631 | bool Lexer::isOctalDigit(int c) | |
632 | { | |
633 | return (c >= '0' && c <= '7'); | |
634 | } | |
635 | ||
636 | int Lexer::matchPunctuator(int c1, int c2, int c3, int c4) | |
637 | { | |
638 | if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { | |
639 | shift(4); | |
640 | return URSHIFTEQUAL; | |
641 | } else if (c1 == '=' && c2 == '=' && c3 == '=') { | |
642 | shift(3); | |
643 | return STREQ; | |
644 | } else if (c1 == '!' && c2 == '=' && c3 == '=') { | |
645 | shift(3); | |
646 | return STRNEQ; | |
647 | } else if (c1 == '>' && c2 == '>' && c3 == '>') { | |
648 | shift(3); | |
649 | return URSHIFT; | |
650 | } else if (c1 == '<' && c2 == '<' && c3 == '=') { | |
651 | shift(3); | |
652 | return LSHIFTEQUAL; | |
653 | } else if (c1 == '>' && c2 == '>' && c3 == '=') { | |
654 | shift(3); | |
655 | return RSHIFTEQUAL; | |
656 | } else if (c1 == '<' && c2 == '=') { | |
657 | shift(2); | |
658 | return LE; | |
659 | } else if (c1 == '>' && c2 == '=') { | |
660 | shift(2); | |
661 | return GE; | |
662 | } else if (c1 == '!' && c2 == '=') { | |
663 | shift(2); | |
664 | return NE; | |
665 | } else if (c1 == '+' && c2 == '+') { | |
666 | shift(2); | |
667 | if (terminator) | |
668 | return AUTOPLUSPLUS; | |
669 | else | |
670 | return PLUSPLUS; | |
671 | } else if (c1 == '-' && c2 == '-') { | |
672 | shift(2); | |
673 | if (terminator) | |
674 | return AUTOMINUSMINUS; | |
675 | else | |
676 | return MINUSMINUS; | |
677 | } else if (c1 == '=' && c2 == '=') { | |
678 | shift(2); | |
679 | return EQEQ; | |
680 | } else if (c1 == '+' && c2 == '=') { | |
681 | shift(2); | |
682 | return PLUSEQUAL; | |
683 | } else if (c1 == '-' && c2 == '=') { | |
684 | shift(2); | |
685 | return MINUSEQUAL; | |
686 | } else if (c1 == '*' && c2 == '=') { | |
687 | shift(2); | |
688 | return MULTEQUAL; | |
689 | } else if (c1 == '/' && c2 == '=') { | |
690 | shift(2); | |
691 | return DIVEQUAL; | |
692 | } else if (c1 == '&' && c2 == '=') { | |
693 | shift(2); | |
694 | return ANDEQUAL; | |
695 | } else if (c1 == '^' && c2 == '=') { | |
696 | shift(2); | |
697 | return XOREQUAL; | |
698 | } else if (c1 == '%' && c2 == '=') { | |
699 | shift(2); | |
700 | return MODEQUAL; | |
701 | } else if (c1 == '|' && c2 == '=') { | |
702 | shift(2); | |
703 | return OREQUAL; | |
704 | } else if (c1 == '<' && c2 == '<') { | |
705 | shift(2); | |
706 | return LSHIFT; | |
707 | } else if (c1 == '>' && c2 == '>') { | |
708 | shift(2); | |
709 | return RSHIFT; | |
710 | } else if (c1 == '&' && c2 == '&') { | |
711 | shift(2); | |
712 | return AND; | |
713 | } else if (c1 == '|' && c2 == '|') { | |
714 | shift(2); | |
715 | return OR; | |
716 | } | |
717 | ||
718 | switch(c1) { | |
719 | case '=': | |
720 | case '>': | |
721 | case '<': | |
722 | case ',': | |
723 | case '!': | |
724 | case '~': | |
725 | case '?': | |
726 | case ':': | |
727 | case '.': | |
728 | case '+': | |
729 | case '-': | |
730 | case '*': | |
731 | case '/': | |
732 | case '&': | |
733 | case '|': | |
734 | case '^': | |
735 | case '%': | |
736 | case '(': | |
737 | case ')': | |
738 | case '{': | |
739 | case '}': | |
740 | case '[': | |
741 | case ']': | |
742 | case ';': | |
743 | shift(1); | |
744 | return static_cast<int>(c1); | |
745 | default: | |
746 | return -1; | |
747 | } | |
748 | } | |
749 | ||
750 | unsigned short Lexer::singleEscape(unsigned short c) | |
751 | { | |
752 | switch(c) { | |
753 | case 'b': | |
754 | return 0x08; | |
755 | case 't': | |
756 | return 0x09; | |
757 | case 'n': | |
758 | return 0x0A; | |
759 | case 'v': | |
760 | return 0x0B; | |
761 | case 'f': | |
762 | return 0x0C; | |
763 | case 'r': | |
764 | return 0x0D; | |
765 | case '"': | |
766 | return 0x22; | |
767 | case '\'': | |
768 | return 0x27; | |
769 | case '\\': | |
770 | return 0x5C; | |
771 | default: | |
772 | return c; | |
773 | } | |
774 | } | |
775 | ||
776 | unsigned short Lexer::convertOctal(int c1, int c2, int c3) | |
777 | { | |
778 | return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); | |
779 | } | |
780 | ||
781 | unsigned char Lexer::convertHex(int c) | |
782 | { | |
783 | if (c >= '0' && c <= '9') | |
784 | return static_cast<unsigned char>(c - '0'); | |
785 | if (c >= 'a' && c <= 'f') | |
786 | return static_cast<unsigned char>(c - 'a' + 10); | |
787 | return static_cast<unsigned char>(c - 'A' + 10); | |
788 | } | |
789 | ||
790 | unsigned char Lexer::convertHex(int c1, int c2) | |
791 | { | |
792 | return ((convertHex(c1) << 4) + convertHex(c2)); | |
793 | } | |
794 | ||
795 | KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) | |
796 | { | |
797 | return KJS::UChar((convertHex(c1) << 4) + convertHex(c2), | |
798 | (convertHex(c3) << 4) + convertHex(c4)); | |
799 | } | |
800 | ||
801 | void Lexer::record8(int c) | |
802 | { | |
803 | ASSERT(c >= 0); | |
804 | ASSERT(c <= 0xff); | |
805 | m_buffer8.append(static_cast<char>(c)); | |
806 | } | |
807 | ||
808 | void Lexer::record16(int c) | |
809 | { | |
810 | ASSERT(c >= 0); | |
811 | ASSERT(c <= USHRT_MAX); | |
812 | record16(UChar(static_cast<unsigned short>(c))); | |
813 | } | |
814 | ||
815 | void Lexer::record16(KJS::UChar c) | |
816 | { | |
817 | m_buffer16.append(c); | |
818 | } | |
819 | ||
820 | bool Lexer::scanRegExp() | |
821 | { | |
822 | m_buffer16.clear(); | |
823 | bool lastWasEscape = false; | |
824 | bool inBrackets = false; | |
825 | ||
826 | while (1) { | |
827 | if (isLineTerminator() || current == -1) | |
828 | return false; | |
829 | else if (current != '/' || lastWasEscape == true || inBrackets == true) | |
830 | { | |
831 | // keep track of '[' and ']' | |
832 | if (!lastWasEscape) { | |
833 | if ( current == '[' && !inBrackets ) | |
834 | inBrackets = true; | |
835 | if ( current == ']' && inBrackets ) | |
836 | inBrackets = false; | |
837 | } | |
838 | record16(current); | |
839 | lastWasEscape = | |
840 | !lastWasEscape && (current == '\\'); | |
841 | } else { // end of regexp | |
842 | m_pattern = UString(m_buffer16); | |
843 | m_buffer16.clear(); | |
844 | shift(1); | |
845 | break; | |
846 | } | |
847 | shift(1); | |
848 | } | |
849 | ||
850 | while (isIdentPart(current)) { | |
851 | record16(current); | |
852 | shift(1); | |
853 | } | |
854 | m_flags = UString(m_buffer16); | |
855 | ||
856 | return true; | |
857 | } | |
858 | ||
859 | void Lexer::clear() | |
860 | { | |
861 | deleteAllValues(m_strings); | |
862 | Vector<UString*> newStrings; | |
863 | newStrings.reserveCapacity(initialStringTableCapacity); | |
864 | m_strings.swap(newStrings); | |
865 | ||
866 | deleteAllValues(m_identifiers); | |
867 | Vector<KJS::Identifier*> newIdentifiers; | |
868 | newIdentifiers.reserveCapacity(initialStringTableCapacity); | |
869 | m_identifiers.swap(newIdentifiers); | |
870 | ||
871 | Vector<char> newBuffer8; | |
872 | newBuffer8.reserveCapacity(initialReadBufferCapacity); | |
873 | m_buffer8.swap(newBuffer8); | |
874 | ||
875 | Vector<UChar> newBuffer16; | |
876 | newBuffer16.reserveCapacity(initialReadBufferCapacity); | |
877 | m_buffer16.swap(newBuffer16); | |
878 | ||
879 | m_pattern = 0; | |
880 | m_flags = 0; | |
881 | } | |
882 | ||
883 | Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer) | |
884 | { | |
885 | KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size()); | |
886 | m_identifiers.append(identifier); | |
887 | return identifier; | |
888 | } | |
889 | ||
890 | UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer) | |
891 | { | |
892 | UString* string = new UString(buffer); | |
893 | m_strings.append(string); | |
894 | return string; | |
895 | } | |
896 | ||
897 | } // namespace KJS |