]> git.saurik.com Git - apple/javascriptcore.git/blob - kjs/lexer.cpp
18a117ff9236fa64bdb533dc37fe2ae948f01ed4
[apple/javascriptcore.git] / kjs / lexer.cpp
1 // -*- c-basic-offset: 2 -*-
2 /*
3 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
4 * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #include "config.h"
25 #include "lexer.h"
26
27 #include "dtoa.h"
28 #include "function.h"
29 #include "nodes.h"
30 #include "NodeInfo.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35 #include <wtf/unicode/Unicode.h>
36
37 using namespace WTF;
38 using namespace Unicode;
39
40 // we can't specify the namespace in yacc's C output, so do it here
41 using namespace KJS;
42
43 #ifndef KDE_USE_FINAL
44 #include "grammar.h"
45 #endif
46
47 #include "lookup.h"
48 #include "lexer.lut.h"
49
50 extern YYLTYPE kjsyylloc; // global bison variable holding token info
51
52 // a bridge for yacc from the C world to C++
53 int kjsyylex()
54 {
55 return lexer().lex();
56 }
57
58 namespace KJS {
59
60 static bool isDecimalDigit(int);
61
62 static const size_t initialReadBufferCapacity = 32;
63 static const size_t initialStringTableCapacity = 64;
64
65 Lexer& lexer()
66 {
67 ASSERT(JSLock::currentThreadIsHoldingLock());
68
69 // FIXME: We'd like to avoid calling new here, but we don't currently
70 // support tearing down the Lexer at app quit time, since that would involve
71 // tearing down its UString data members without holding the JSLock.
72 static Lexer* staticLexer = new Lexer;
73 return *staticLexer;
74 }
75
76 Lexer::Lexer()
77 : yylineno(1)
78 , restrKeyword(false)
79 , eatNextIdentifier(false)
80 , stackToken(-1)
81 , lastToken(-1)
82 , pos(0)
83 , code(0)
84 , length(0)
85 , atLineStart(true)
86 , current(0)
87 , next1(0)
88 , next2(0)
89 , next3(0)
90 , m_currentOffset(0)
91 , m_nextOffset1(0)
92 , m_nextOffset2(0)
93 , m_nextOffset3(0)
94 {
95 m_buffer8.reserveCapacity(initialReadBufferCapacity);
96 m_buffer16.reserveCapacity(initialReadBufferCapacity);
97 m_strings.reserveCapacity(initialStringTableCapacity);
98 m_identifiers.reserveCapacity(initialStringTableCapacity);
99 }
100
101 void Lexer::setCode(const SourceCode& source)
102 {
103 yylineno = source.firstLine();
104 restrKeyword = false;
105 delimited = false;
106 eatNextIdentifier = false;
107 stackToken = -1;
108 lastToken = -1;
109 pos = 0;
110 m_source = &source;
111 code = source.provider()->data() + source.startOffset();
112 length = source.length();
113 skipLF = false;
114 skipCR = false;
115 error = false;
116 atLineStart = true;
117
118 // read first characters
119 shift(4);
120 }
121
122 void Lexer::shift(unsigned p)
123 {
124 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
125 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
126
127 while (p--) {
128 current = next1;
129 next1 = next2;
130 next2 = next3;
131 m_currentOffset = m_nextOffset1;
132 m_nextOffset1 = m_nextOffset2;
133 m_nextOffset2 = m_nextOffset3;
134 do {
135 if (pos >= length) {
136 m_nextOffset3 = pos;
137 pos++;
138 next3 = -1;
139 break;
140 }
141 m_nextOffset3 = pos;
142 next3 = code[pos++].uc;
143 } while (next3 == 0xFEFF);
144 }
145 }
146
147 // called on each new line
148 void Lexer::nextLine()
149 {
150 yylineno++;
151 atLineStart = true;
152 }
153
154 void Lexer::setDone(State s)
155 {
156 state = s;
157 done = true;
158 }
159
160 int Lexer::lex()
161 {
162 int token = 0;
163 state = Start;
164 unsigned short stringType = 0; // either single or double quotes
165 m_buffer8.clear();
166 m_buffer16.clear();
167 done = false;
168 terminator = false;
169 skipLF = false;
170 skipCR = false;
171
172 // did we push a token on the stack previously ?
173 // (after an automatic semicolon insertion)
174 if (stackToken >= 0) {
175 setDone(Other);
176 token = stackToken;
177 stackToken = 0;
178 }
179
180 int startOffset = m_currentOffset;
181 while (!done) {
182 if (skipLF && current != '\n') // found \r but not \n afterwards
183 skipLF = false;
184 if (skipCR && current != '\r') // found \n but not \r afterwards
185 skipCR = false;
186 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
187 {
188 skipLF = false;
189 skipCR = false;
190 shift(1);
191 }
192 switch (state) {
193 case Start:
194 startOffset = m_currentOffset;
195 if (isWhiteSpace()) {
196 // do nothing
197 } else if (current == '/' && next1 == '/') {
198 shift(1);
199 state = InSingleLineComment;
200 } else if (current == '/' && next1 == '*') {
201 shift(1);
202 state = InMultiLineComment;
203 } else if (current == -1) {
204 if (!terminator && !delimited) {
205 // automatic semicolon insertion if program incomplete
206 token = ';';
207 stackToken = 0;
208 setDone(Other);
209 } else
210 setDone(Eof);
211 } else if (isLineTerminator()) {
212 nextLine();
213 terminator = true;
214 if (restrKeyword) {
215 token = ';';
216 setDone(Other);
217 }
218 } else if (current == '"' || current == '\'') {
219 state = InString;
220 stringType = static_cast<unsigned short>(current);
221 } else if (isIdentStart(current)) {
222 record16(current);
223 state = InIdentifierOrKeyword;
224 } else if (current == '\\') {
225 state = InIdentifierStartUnicodeEscapeStart;
226 } else if (current == '0') {
227 record8(current);
228 state = InNum0;
229 } else if (isDecimalDigit(current)) {
230 record8(current);
231 state = InNum;
232 } else if (current == '.' && isDecimalDigit(next1)) {
233 record8(current);
234 state = InDecimal;
235 // <!-- marks the beginning of a line comment (for www usage)
236 } else if (current == '<' && next1 == '!' &&
237 next2 == '-' && next3 == '-') {
238 shift(3);
239 state = InSingleLineComment;
240 // same for -->
241 } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
242 shift(2);
243 state = InSingleLineComment;
244 } else {
245 token = matchPunctuator(kjsyylval.intValue, current, next1, next2, next3);
246 if (token != -1) {
247 setDone(Other);
248 } else {
249 // cerr << "encountered unknown character" << endl;
250 setDone(Bad);
251 }
252 }
253 break;
254 case InString:
255 if (current == stringType) {
256 shift(1);
257 setDone(String);
258 } else if (isLineTerminator() || current == -1) {
259 setDone(Bad);
260 } else if (current == '\\') {
261 state = InEscapeSequence;
262 } else {
263 record16(current);
264 }
265 break;
266 // Escape Sequences inside of strings
267 case InEscapeSequence:
268 if (isOctalDigit(current)) {
269 if (current >= '0' && current <= '3' &&
270 isOctalDigit(next1) && isOctalDigit(next2)) {
271 record16(convertOctal(current, next1, next2));
272 shift(2);
273 state = InString;
274 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
275 record16(convertOctal('0', current, next1));
276 shift(1);
277 state = InString;
278 } else if (isOctalDigit(current)) {
279 record16(convertOctal('0', '0', current));
280 state = InString;
281 } else {
282 setDone(Bad);
283 }
284 } else if (current == 'x')
285 state = InHexEscape;
286 else if (current == 'u')
287 state = InUnicodeEscape;
288 else if (isLineTerminator()) {
289 nextLine();
290 state = InString;
291 } else {
292 record16(singleEscape(static_cast<unsigned short>(current)));
293 state = InString;
294 }
295 break;
296 case InHexEscape:
297 if (isHexDigit(current) && isHexDigit(next1)) {
298 state = InString;
299 record16(convertHex(current, next1));
300 shift(1);
301 } else if (current == stringType) {
302 record16('x');
303 shift(1);
304 setDone(String);
305 } else {
306 record16('x');
307 record16(current);
308 state = InString;
309 }
310 break;
311 case InUnicodeEscape:
312 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
313 record16(convertUnicode(current, next1, next2, next3));
314 shift(3);
315 state = InString;
316 } else if (current == stringType) {
317 record16('u');
318 shift(1);
319 setDone(String);
320 } else {
321 setDone(Bad);
322 }
323 break;
324 case InSingleLineComment:
325 if (isLineTerminator()) {
326 nextLine();
327 terminator = true;
328 if (restrKeyword) {
329 token = ';';
330 setDone(Other);
331 } else
332 state = Start;
333 } else if (current == -1) {
334 setDone(Eof);
335 }
336 break;
337 case InMultiLineComment:
338 if (current == -1) {
339 setDone(Bad);
340 } else if (isLineTerminator()) {
341 nextLine();
342 } else if (current == '*' && next1 == '/') {
343 state = Start;
344 shift(1);
345 }
346 break;
347 case InIdentifierOrKeyword:
348 case InIdentifier:
349 if (isIdentPart(current))
350 record16(current);
351 else if (current == '\\')
352 state = InIdentifierPartUnicodeEscapeStart;
353 else
354 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
355 break;
356 case InNum0:
357 if (current == 'x' || current == 'X') {
358 record8(current);
359 state = InHex;
360 } else if (current == '.') {
361 record8(current);
362 state = InDecimal;
363 } else if (current == 'e' || current == 'E') {
364 record8(current);
365 state = InExponentIndicator;
366 } else if (isOctalDigit(current)) {
367 record8(current);
368 state = InOctal;
369 } else if (isDecimalDigit(current)) {
370 record8(current);
371 state = InDecimal;
372 } else {
373 setDone(Number);
374 }
375 break;
376 case InHex:
377 if (isHexDigit(current)) {
378 record8(current);
379 } else {
380 setDone(Hex);
381 }
382 break;
383 case InOctal:
384 if (isOctalDigit(current)) {
385 record8(current);
386 }
387 else if (isDecimalDigit(current)) {
388 record8(current);
389 state = InDecimal;
390 } else
391 setDone(Octal);
392 break;
393 case InNum:
394 if (isDecimalDigit(current)) {
395 record8(current);
396 } else if (current == '.') {
397 record8(current);
398 state = InDecimal;
399 } else if (current == 'e' || current == 'E') {
400 record8(current);
401 state = InExponentIndicator;
402 } else
403 setDone(Number);
404 break;
405 case InDecimal:
406 if (isDecimalDigit(current)) {
407 record8(current);
408 } else if (current == 'e' || current == 'E') {
409 record8(current);
410 state = InExponentIndicator;
411 } else
412 setDone(Number);
413 break;
414 case InExponentIndicator:
415 if (current == '+' || current == '-') {
416 record8(current);
417 } else if (isDecimalDigit(current)) {
418 record8(current);
419 state = InExponent;
420 } else
421 setDone(Bad);
422 break;
423 case InExponent:
424 if (isDecimalDigit(current)) {
425 record8(current);
426 } else
427 setDone(Number);
428 break;
429 case InIdentifierStartUnicodeEscapeStart:
430 if (current == 'u')
431 state = InIdentifierStartUnicodeEscape;
432 else
433 setDone(Bad);
434 break;
435 case InIdentifierPartUnicodeEscapeStart:
436 if (current == 'u')
437 state = InIdentifierPartUnicodeEscape;
438 else
439 setDone(Bad);
440 break;
441 case InIdentifierStartUnicodeEscape:
442 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
443 setDone(Bad);
444 break;
445 }
446 token = convertUnicode(current, next1, next2, next3).uc;
447 shift(3);
448 if (!isIdentStart(token)) {
449 setDone(Bad);
450 break;
451 }
452 record16(token);
453 state = InIdentifier;
454 break;
455 case InIdentifierPartUnicodeEscape:
456 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
457 setDone(Bad);
458 break;
459 }
460 token = convertUnicode(current, next1, next2, next3).uc;
461 shift(3);
462 if (!isIdentPart(token)) {
463 setDone(Bad);
464 break;
465 }
466 record16(token);
467 state = InIdentifier;
468 break;
469 default:
470 ASSERT(!"Unhandled state in switch statement");
471 }
472
473 // move on to the next character
474 if (!done)
475 shift(1);
476 if (state != Start && state != InSingleLineComment)
477 atLineStart = false;
478 }
479
480 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
481 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
482 state = Bad;
483
484 // terminate string
485 m_buffer8.append('\0');
486
487 #ifdef KJS_DEBUG_LEX
488 fprintf(stderr, "line: %d ", lineNo());
489 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
490 fprintf(stderr, "%s ", buffer8.data());
491 #endif
492
493 double dval = 0;
494 if (state == Number) {
495 dval = kjs_strtod(m_buffer8.data(), 0L);
496 } else if (state == Hex) { // scan hex numbers
497 const char* p = m_buffer8.data() + 2;
498 while (char c = *p++) {
499 dval *= 16;
500 dval += convertHex(c);
501 }
502
503 if (dval >= mantissaOverflowLowerBound)
504 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
505
506 state = Number;
507 } else if (state == Octal) { // scan octal number
508 const char* p = m_buffer8.data() + 1;
509 while (char c = *p++) {
510 dval *= 8;
511 dval += c - '0';
512 }
513
514 if (dval >= mantissaOverflowLowerBound)
515 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
516
517 state = Number;
518 }
519
520 #ifdef KJS_DEBUG_LEX
521 switch (state) {
522 case Eof:
523 printf("(EOF)\n");
524 break;
525 case Other:
526 printf("(Other)\n");
527 break;
528 case Identifier:
529 printf("(Identifier)/(Keyword)\n");
530 break;
531 case String:
532 printf("(String)\n");
533 break;
534 case Number:
535 printf("(Number)\n");
536 break;
537 default:
538 printf("(unknown)");
539 }
540 #endif
541
542 if (state != Identifier && eatNextIdentifier)
543 eatNextIdentifier = false;
544
545 restrKeyword = false;
546 delimited = false;
547 kjsyylloc.first_line = yylineno; // ???
548 kjsyylloc.last_line = yylineno;
549
550 switch (state) {
551 case Eof:
552 token = 0;
553 break;
554 case Other:
555 if(token == '}' || token == ';') {
556 delimited = true;
557 }
558 break;
559 case IdentifierOrKeyword:
560 if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
561 case Identifier:
562 // Lookup for keyword failed, means this is an identifier
563 // Apply anonymous-function hack below (eat the identifier)
564 if (eatNextIdentifier) {
565 eatNextIdentifier = false;
566 token = lex();
567 break;
568 }
569 kjsyylval.ident = makeIdentifier(m_buffer16);
570 token = IDENT;
571 break;
572 }
573
574 eatNextIdentifier = false;
575 // Hack for "f = function somename() { ... }", too hard to get into the grammar
576 if (token == FUNCTION && lastToken == '=' )
577 eatNextIdentifier = true;
578
579 if (token == CONTINUE || token == BREAK ||
580 token == RETURN || token == THROW)
581 restrKeyword = true;
582 break;
583 case String:
584 kjsyylval.string = makeUString(m_buffer16);
585 token = STRING;
586 break;
587 case Number:
588 kjsyylval.doubleValue = dval;
589 token = NUMBER;
590 break;
591 case Bad:
592 #ifdef KJS_DEBUG_LEX
593 fprintf(stderr, "yylex: ERROR.\n");
594 #endif
595 error = true;
596 return -1;
597 default:
598 ASSERT(!"unhandled numeration value in switch");
599 error = true;
600 return -1;
601 }
602 lastToken = token;
603 return token;
604 }
605
606 bool Lexer::isWhiteSpace() const
607 {
608 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
609 }
610
611 bool Lexer::isLineTerminator()
612 {
613 bool cr = (current == '\r');
614 bool lf = (current == '\n');
615 if (cr)
616 skipLF = true;
617 else if (lf)
618 skipCR = true;
619 return cr || lf || current == 0x2028 || current == 0x2029;
620 }
621
622 bool Lexer::isIdentStart(int c)
623 {
624 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
625 || c == '$' || c == '_';
626 }
627
628 bool Lexer::isIdentPart(int c)
629 {
630 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
631 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
632 || c == '$' || c == '_';
633 }
634
635 static bool isDecimalDigit(int c)
636 {
637 return (c >= '0' && c <= '9');
638 }
639
640 bool Lexer::isHexDigit(int c)
641 {
642 return (c >= '0' && c <= '9' ||
643 c >= 'a' && c <= 'f' ||
644 c >= 'A' && c <= 'F');
645 }
646
647 bool Lexer::isOctalDigit(int c)
648 {
649 return (c >= '0' && c <= '7');
650 }
651
652 int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
653 {
654 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
655 shift(4);
656 return URSHIFTEQUAL;
657 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
658 shift(3);
659 return STREQ;
660 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
661 shift(3);
662 return STRNEQ;
663 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
664 shift(3);
665 return URSHIFT;
666 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
667 shift(3);
668 return LSHIFTEQUAL;
669 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
670 shift(3);
671 return RSHIFTEQUAL;
672 } else if (c1 == '<' && c2 == '=') {
673 shift(2);
674 return LE;
675 } else if (c1 == '>' && c2 == '=') {
676 shift(2);
677 return GE;
678 } else if (c1 == '!' && c2 == '=') {
679 shift(2);
680 return NE;
681 } else if (c1 == '+' && c2 == '+') {
682 shift(2);
683 if (terminator)
684 return AUTOPLUSPLUS;
685 else
686 return PLUSPLUS;
687 } else if (c1 == '-' && c2 == '-') {
688 shift(2);
689 if (terminator)
690 return AUTOMINUSMINUS;
691 else
692 return MINUSMINUS;
693 } else if (c1 == '=' && c2 == '=') {
694 shift(2);
695 return EQEQ;
696 } else if (c1 == '+' && c2 == '=') {
697 shift(2);
698 return PLUSEQUAL;
699 } else if (c1 == '-' && c2 == '=') {
700 shift(2);
701 return MINUSEQUAL;
702 } else if (c1 == '*' && c2 == '=') {
703 shift(2);
704 return MULTEQUAL;
705 } else if (c1 == '/' && c2 == '=') {
706 shift(2);
707 return DIVEQUAL;
708 } else if (c1 == '&' && c2 == '=') {
709 shift(2);
710 return ANDEQUAL;
711 } else if (c1 == '^' && c2 == '=') {
712 shift(2);
713 return XOREQUAL;
714 } else if (c1 == '%' && c2 == '=') {
715 shift(2);
716 return MODEQUAL;
717 } else if (c1 == '|' && c2 == '=') {
718 shift(2);
719 return OREQUAL;
720 } else if (c1 == '<' && c2 == '<') {
721 shift(2);
722 return LSHIFT;
723 } else if (c1 == '>' && c2 == '>') {
724 shift(2);
725 return RSHIFT;
726 } else if (c1 == '&' && c2 == '&') {
727 shift(2);
728 return AND;
729 } else if (c1 == '|' && c2 == '|') {
730 shift(2);
731 return OR;
732 }
733
734 switch(c1) {
735 case '=':
736 case '>':
737 case '<':
738 case ',':
739 case '!':
740 case '~':
741 case '?':
742 case ':':
743 case '.':
744 case '+':
745 case '-':
746 case '*':
747 case '/':
748 case '&':
749 case '|':
750 case '^':
751 case '%':
752 case '(':
753 case ')':
754 case '[':
755 case ']':
756 case ';':
757 shift(1);
758 return static_cast<int>(c1);
759 case '{':
760 charPos = pos - 4;
761 shift(1);
762 return OPENBRACE;
763 case '}':
764 charPos = pos - 4;
765 shift(1);
766 return CLOSEBRACE;
767 default:
768 return -1;
769 }
770 }
771
772 unsigned short Lexer::singleEscape(unsigned short c)
773 {
774 switch(c) {
775 case 'b':
776 return 0x08;
777 case 't':
778 return 0x09;
779 case 'n':
780 return 0x0A;
781 case 'v':
782 return 0x0B;
783 case 'f':
784 return 0x0C;
785 case 'r':
786 return 0x0D;
787 case '"':
788 return 0x22;
789 case '\'':
790 return 0x27;
791 case '\\':
792 return 0x5C;
793 default:
794 return c;
795 }
796 }
797
798 unsigned short Lexer::convertOctal(int c1, int c2, int c3)
799 {
800 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
801 }
802
803 unsigned char Lexer::convertHex(int c)
804 {
805 if (c >= '0' && c <= '9')
806 return static_cast<unsigned char>(c - '0');
807 if (c >= 'a' && c <= 'f')
808 return static_cast<unsigned char>(c - 'a' + 10);
809 return static_cast<unsigned char>(c - 'A' + 10);
810 }
811
812 unsigned char Lexer::convertHex(int c1, int c2)
813 {
814 return ((convertHex(c1) << 4) + convertHex(c2));
815 }
816
817 KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
818 {
819 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
820 (convertHex(c3) << 4) + convertHex(c4));
821 }
822
823 void Lexer::record8(int c)
824 {
825 ASSERT(c >= 0);
826 ASSERT(c <= 0xff);
827 m_buffer8.append(static_cast<char>(c));
828 }
829
830 void Lexer::record16(int c)
831 {
832 ASSERT(c >= 0);
833 ASSERT(c <= USHRT_MAX);
834 record16(UChar(static_cast<unsigned short>(c)));
835 }
836
837 void Lexer::record16(KJS::UChar c)
838 {
839 m_buffer16.append(c);
840 }
841
842 bool Lexer::scanRegExp()
843 {
844 m_buffer16.clear();
845 bool lastWasEscape = false;
846 bool inBrackets = false;
847
848 while (1) {
849 if (isLineTerminator() || current == -1)
850 return false;
851 else if (current != '/' || lastWasEscape == true || inBrackets == true)
852 {
853 // keep track of '[' and ']'
854 if (!lastWasEscape) {
855 if ( current == '[' && !inBrackets )
856 inBrackets = true;
857 if ( current == ']' && inBrackets )
858 inBrackets = false;
859 }
860 record16(current);
861 lastWasEscape =
862 !lastWasEscape && (current == '\\');
863 } else { // end of regexp
864 m_pattern = UString(m_buffer16);
865 m_buffer16.clear();
866 shift(1);
867 break;
868 }
869 shift(1);
870 }
871
872 while (isIdentPart(current)) {
873 record16(current);
874 shift(1);
875 }
876 m_flags = UString(m_buffer16);
877
878 return true;
879 }
880
881 void Lexer::clear()
882 {
883 deleteAllValues(m_strings);
884 Vector<UString*> newStrings;
885 newStrings.reserveCapacity(initialStringTableCapacity);
886 m_strings.swap(newStrings);
887
888 deleteAllValues(m_identifiers);
889 Vector<KJS::Identifier*> newIdentifiers;
890 newIdentifiers.reserveCapacity(initialStringTableCapacity);
891 m_identifiers.swap(newIdentifiers);
892
893 Vector<char> newBuffer8;
894 newBuffer8.reserveCapacity(initialReadBufferCapacity);
895 m_buffer8.swap(newBuffer8);
896
897 Vector<UChar> newBuffer16;
898 newBuffer16.reserveCapacity(initialReadBufferCapacity);
899 m_buffer16.swap(newBuffer16);
900
901 m_pattern = 0;
902 m_flags = 0;
903 }
904
905 Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
906 {
907 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
908 m_identifiers.append(identifier);
909 return identifier;
910 }
911
912 UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
913 {
914 UString* string = new UString(buffer);
915 m_strings.append(string);
916 return string;
917 }
918
919 } // namespace KJS