]> git.saurik.com Git - apple/javascriptcore.git/blame - kjs/lexer.cpp
JavaScriptCore-466.1.tar.gz
[apple/javascriptcore.git] / kjs / lexer.cpp
CommitLineData
b37bf2e1
A
1// -*- c-basic-offset: 2 -*-
2/*
3 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
4 * Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
5 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "lexer.h"
26
27#include "dtoa.h"
28#include "function.h"
29#include "nodes.h"
30#include "NodeInfo.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35#include <wtf/unicode/Unicode.h>
36
37using namespace WTF;
38using namespace Unicode;
39
40// we can't specify the namespace in yacc's C output, so do it here
41using namespace KJS;
42
43#ifndef KDE_USE_FINAL
44#include "grammar.h"
45#endif
46
47#include "lookup.h"
48#include "lexer.lut.h"
49
50extern YYLTYPE kjsyylloc; // global bison variable holding token info
51
52// a bridge for yacc from the C world to C++
53int kjsyylex()
54{
55 return lexer().lex();
56}
57
58namespace KJS {
59
60static bool isDecimalDigit(int);
61
62static const size_t initialReadBufferCapacity = 32;
63static const size_t initialStringTableCapacity = 64;
64
65Lexer& lexer()
66{
67 ASSERT(JSLock::currentThreadIsHoldingLock());
68
69 // FIXME: We'd like to avoid calling new here, but we don't currently
70 // support tearing down the Lexer at app quit time, since that would involve
71 // tearing down its UString data members without holding the JSLock.
72 static Lexer* staticLexer = new Lexer;
73 return *staticLexer;
74}
75
76Lexer::Lexer()
77 : yylineno(1)
78 , restrKeyword(false)
79 , eatNextIdentifier(false)
80 , stackToken(-1)
81 , lastToken(-1)
82 , pos(0)
83 , code(0)
84 , length(0)
85 , atLineStart(true)
86 , current(0)
87 , next1(0)
88 , next2(0)
89 , next3(0)
90{
91 m_buffer8.reserveCapacity(initialReadBufferCapacity);
92 m_buffer16.reserveCapacity(initialReadBufferCapacity);
93 m_strings.reserveCapacity(initialStringTableCapacity);
94 m_identifiers.reserveCapacity(initialStringTableCapacity);
95}
96
97void Lexer::setCode(int startingLineNumber, const KJS::UChar *c, unsigned int len)
98{
99 yylineno = 1 + startingLineNumber;
100 restrKeyword = false;
101 delimited = false;
102 eatNextIdentifier = false;
103 stackToken = -1;
104 lastToken = -1;
105 pos = 0;
106 code = c;
107 length = len;
108 skipLF = false;
109 skipCR = false;
110 error = false;
111 atLineStart = true;
112
113 // read first characters
114 current = (length > 0) ? code[0].uc : -1;
115 next1 = (length > 1) ? code[1].uc : -1;
116 next2 = (length > 2) ? code[2].uc : -1;
117 next3 = (length > 3) ? code[3].uc : -1;
118}
119
120void Lexer::shift(unsigned int p)
121{
122 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
123 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
124 while (p--) {
125 pos++;
126 current = next1;
127 next1 = next2;
128 next2 = next3;
129 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
130 }
131}
132
133// called on each new line
134void Lexer::nextLine()
135{
136 yylineno++;
137 atLineStart = true;
138}
139
140void Lexer::setDone(State s)
141{
142 state = s;
143 done = true;
144}
145
146int Lexer::lex()
147{
148 int token = 0;
149 state = Start;
150 unsigned short stringType = 0; // either single or double quotes
151 m_buffer8.clear();
152 m_buffer16.clear();
153 done = false;
154 terminator = false;
155 skipLF = false;
156 skipCR = false;
157
158 // did we push a token on the stack previously ?
159 // (after an automatic semicolon insertion)
160 if (stackToken >= 0) {
161 setDone(Other);
162 token = stackToken;
163 stackToken = 0;
164 }
165
166 while (!done) {
167 if (skipLF && current != '\n') // found \r but not \n afterwards
168 skipLF = false;
169 if (skipCR && current != '\r') // found \n but not \r afterwards
170 skipCR = false;
171 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
172 {
173 skipLF = false;
174 skipCR = false;
175 shift(1);
176 }
177 switch (state) {
178 case Start:
179 if (isWhiteSpace()) {
180 // do nothing
181 } else if (current == '/' && next1 == '/') {
182 shift(1);
183 state = InSingleLineComment;
184 } else if (current == '/' && next1 == '*') {
185 shift(1);
186 state = InMultiLineComment;
187 } else if (current == -1) {
188 if (!terminator && !delimited) {
189 // automatic semicolon insertion if program incomplete
190 token = ';';
191 stackToken = 0;
192 setDone(Other);
193 } else
194 setDone(Eof);
195 } else if (isLineTerminator()) {
196 nextLine();
197 terminator = true;
198 if (restrKeyword) {
199 token = ';';
200 setDone(Other);
201 }
202 } else if (current == '"' || current == '\'') {
203 state = InString;
204 stringType = static_cast<unsigned short>(current);
205 } else if (isIdentStart(current)) {
206 record16(current);
207 state = InIdentifierOrKeyword;
208 } else if (current == '\\') {
209 state = InIdentifierStartUnicodeEscapeStart;
210 } else if (current == '0') {
211 record8(current);
212 state = InNum0;
213 } else if (isDecimalDigit(current)) {
214 record8(current);
215 state = InNum;
216 } else if (current == '.' && isDecimalDigit(next1)) {
217 record8(current);
218 state = InDecimal;
219 // <!-- marks the beginning of a line comment (for www usage)
220 } else if (current == '<' && next1 == '!' &&
221 next2 == '-' && next3 == '-') {
222 shift(3);
223 state = InSingleLineComment;
224 // same for -->
225 } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') {
226 shift(2);
227 state = InSingleLineComment;
228 } else {
229 token = matchPunctuator(current, next1, next2, next3);
230 if (token != -1) {
231 setDone(Other);
232 } else {
233 // cerr << "encountered unknown character" << endl;
234 setDone(Bad);
235 }
236 }
237 break;
238 case InString:
239 if (current == stringType) {
240 shift(1);
241 setDone(String);
242 } else if (isLineTerminator() || current == -1) {
243 setDone(Bad);
244 } else if (current == '\\') {
245 state = InEscapeSequence;
246 } else {
247 record16(current);
248 }
249 break;
250 // Escape Sequences inside of strings
251 case InEscapeSequence:
252 if (isOctalDigit(current)) {
253 if (current >= '0' && current <= '3' &&
254 isOctalDigit(next1) && isOctalDigit(next2)) {
255 record16(convertOctal(current, next1, next2));
256 shift(2);
257 state = InString;
258 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
259 record16(convertOctal('0', current, next1));
260 shift(1);
261 state = InString;
262 } else if (isOctalDigit(current)) {
263 record16(convertOctal('0', '0', current));
264 state = InString;
265 } else {
266 setDone(Bad);
267 }
268 } else if (current == 'x')
269 state = InHexEscape;
270 else if (current == 'u')
271 state = InUnicodeEscape;
272 else if (isLineTerminator()) {
273 nextLine();
274 state = InString;
275 } else {
276 record16(singleEscape(static_cast<unsigned short>(current)));
277 state = InString;
278 }
279 break;
280 case InHexEscape:
281 if (isHexDigit(current) && isHexDigit(next1)) {
282 state = InString;
283 record16(convertHex(current, next1));
284 shift(1);
285 } else if (current == stringType) {
286 record16('x');
287 shift(1);
288 setDone(String);
289 } else {
290 record16('x');
291 record16(current);
292 state = InString;
293 }
294 break;
295 case InUnicodeEscape:
296 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
297 record16(convertUnicode(current, next1, next2, next3));
298 shift(3);
299 state = InString;
300 } else if (current == stringType) {
301 record16('u');
302 shift(1);
303 setDone(String);
304 } else {
305 setDone(Bad);
306 }
307 break;
308 case InSingleLineComment:
309 if (isLineTerminator()) {
310 nextLine();
311 terminator = true;
312 if (restrKeyword) {
313 token = ';';
314 setDone(Other);
315 } else
316 state = Start;
317 } else if (current == -1) {
318 setDone(Eof);
319 }
320 break;
321 case InMultiLineComment:
322 if (current == -1) {
323 setDone(Bad);
324 } else if (isLineTerminator()) {
325 nextLine();
326 } else if (current == '*' && next1 == '/') {
327 state = Start;
328 shift(1);
329 }
330 break;
331 case InIdentifierOrKeyword:
332 case InIdentifier:
333 if (isIdentPart(current))
334 record16(current);
335 else if (current == '\\')
336 state = InIdentifierPartUnicodeEscapeStart;
337 else
338 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
339 break;
340 case InNum0:
341 if (current == 'x' || current == 'X') {
342 record8(current);
343 state = InHex;
344 } else if (current == '.') {
345 record8(current);
346 state = InDecimal;
347 } else if (current == 'e' || current == 'E') {
348 record8(current);
349 state = InExponentIndicator;
350 } else if (isOctalDigit(current)) {
351 record8(current);
352 state = InOctal;
353 } else if (isDecimalDigit(current)) {
354 record8(current);
355 state = InDecimal;
356 } else {
357 setDone(Number);
358 }
359 break;
360 case InHex:
361 if (isHexDigit(current)) {
362 record8(current);
363 } else {
364 setDone(Hex);
365 }
366 break;
367 case InOctal:
368 if (isOctalDigit(current)) {
369 record8(current);
370 }
371 else if (isDecimalDigit(current)) {
372 record8(current);
373 state = InDecimal;
374 } else
375 setDone(Octal);
376 break;
377 case InNum:
378 if (isDecimalDigit(current)) {
379 record8(current);
380 } else if (current == '.') {
381 record8(current);
382 state = InDecimal;
383 } else if (current == 'e' || current == 'E') {
384 record8(current);
385 state = InExponentIndicator;
386 } else
387 setDone(Number);
388 break;
389 case InDecimal:
390 if (isDecimalDigit(current)) {
391 record8(current);
392 } else if (current == 'e' || current == 'E') {
393 record8(current);
394 state = InExponentIndicator;
395 } else
396 setDone(Number);
397 break;
398 case InExponentIndicator:
399 if (current == '+' || current == '-') {
400 record8(current);
401 } else if (isDecimalDigit(current)) {
402 record8(current);
403 state = InExponent;
404 } else
405 setDone(Bad);
406 break;
407 case InExponent:
408 if (isDecimalDigit(current)) {
409 record8(current);
410 } else
411 setDone(Number);
412 break;
413 case InIdentifierStartUnicodeEscapeStart:
414 if (current == 'u')
415 state = InIdentifierStartUnicodeEscape;
416 else
417 setDone(Bad);
418 break;
419 case InIdentifierPartUnicodeEscapeStart:
420 if (current == 'u')
421 state = InIdentifierPartUnicodeEscape;
422 else
423 setDone(Bad);
424 break;
425 case InIdentifierStartUnicodeEscape:
426 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
427 setDone(Bad);
428 break;
429 }
430 token = convertUnicode(current, next1, next2, next3).uc;
431 shift(3);
432 if (!isIdentStart(token)) {
433 setDone(Bad);
434 break;
435 }
436 record16(token);
437 state = InIdentifier;
438 break;
439 case InIdentifierPartUnicodeEscape:
440 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
441 setDone(Bad);
442 break;
443 }
444 token = convertUnicode(current, next1, next2, next3).uc;
445 shift(3);
446 if (!isIdentPart(token)) {
447 setDone(Bad);
448 break;
449 }
450 record16(token);
451 state = InIdentifier;
452 break;
453 default:
454 ASSERT(!"Unhandled state in switch statement");
455 }
456
457 // move on to the next character
458 if (!done)
459 shift(1);
460 if (state != Start && state != InSingleLineComment)
461 atLineStart = false;
462 }
463
464 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
465 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
466 state = Bad;
467
468 // terminate string
469 m_buffer8.append('\0');
470
471#ifdef KJS_DEBUG_LEX
472 fprintf(stderr, "line: %d ", lineNo());
473 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
474 fprintf(stderr, "%s ", buffer8.data());
475#endif
476
477 double dval = 0;
478 if (state == Number) {
479 dval = kjs_strtod(m_buffer8.data(), 0L);
480 } else if (state == Hex) { // scan hex numbers
481 const char* p = m_buffer8.data() + 2;
482 while (char c = *p++) {
483 dval *= 16;
484 dval += convertHex(c);
485 }
486
487 if (dval >= mantissaOverflowLowerBound)
488 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
489
490 state = Number;
491 } else if (state == Octal) { // scan octal number
492 const char* p = m_buffer8.data() + 1;
493 while (char c = *p++) {
494 dval *= 8;
495 dval += c - '0';
496 }
497
498 if (dval >= mantissaOverflowLowerBound)
499 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
500
501 state = Number;
502 }
503
504#ifdef KJS_DEBUG_LEX
505 switch (state) {
506 case Eof:
507 printf("(EOF)\n");
508 break;
509 case Other:
510 printf("(Other)\n");
511 break;
512 case Identifier:
513 printf("(Identifier)/(Keyword)\n");
514 break;
515 case String:
516 printf("(String)\n");
517 break;
518 case Number:
519 printf("(Number)\n");
520 break;
521 default:
522 printf("(unknown)");
523 }
524#endif
525
526 if (state != Identifier && eatNextIdentifier)
527 eatNextIdentifier = false;
528
529 restrKeyword = false;
530 delimited = false;
531 kjsyylloc.first_line = yylineno; // ???
532 kjsyylloc.last_line = yylineno;
533
534 switch (state) {
535 case Eof:
536 token = 0;
537 break;
538 case Other:
539 if(token == '}' || token == ';') {
540 delimited = true;
541 }
542 break;
543 case IdentifierOrKeyword:
544 if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
545 case Identifier:
546 // Lookup for keyword failed, means this is an identifier
547 // Apply anonymous-function hack below (eat the identifier)
548 if (eatNextIdentifier) {
549 eatNextIdentifier = false;
550 token = lex();
551 break;
552 }
553 kjsyylval.ident = makeIdentifier(m_buffer16);
554 token = IDENT;
555 break;
556 }
557
558 eatNextIdentifier = false;
559 // Hack for "f = function somename() { ... }", too hard to get into the grammar
560 if (token == FUNCTION && lastToken == '=' )
561 eatNextIdentifier = true;
562
563 if (token == CONTINUE || token == BREAK ||
564 token == RETURN || token == THROW)
565 restrKeyword = true;
566 break;
567 case String:
568 kjsyylval.string = makeUString(m_buffer16);
569 token = STRING;
570 break;
571 case Number:
572 kjsyylval.doubleValue = dval;
573 token = NUMBER;
574 break;
575 case Bad:
576#ifdef KJS_DEBUG_LEX
577 fprintf(stderr, "yylex: ERROR.\n");
578#endif
579 error = true;
580 return -1;
581 default:
582 ASSERT(!"unhandled numeration value in switch");
583 error = true;
584 return -1;
585 }
586 lastToken = token;
587 return token;
588}
589
590bool Lexer::isWhiteSpace() const
591{
592 return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current);
593}
594
595bool Lexer::isLineTerminator()
596{
597 bool cr = (current == '\r');
598 bool lf = (current == '\n');
599 if (cr)
600 skipLF = true;
601 else if (lf)
602 skipCR = true;
603 return cr || lf || current == 0x2028 || current == 0x2029;
604}
605
606bool Lexer::isIdentStart(int c)
607{
608 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))
609 || c == '$' || c == '_';
610}
611
612bool Lexer::isIdentPart(int c)
613{
614 return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
615 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))
616 || c == '$' || c == '_';
617}
618
619static bool isDecimalDigit(int c)
620{
621 return (c >= '0' && c <= '9');
622}
623
624bool Lexer::isHexDigit(int c)
625{
626 return (c >= '0' && c <= '9' ||
627 c >= 'a' && c <= 'f' ||
628 c >= 'A' && c <= 'F');
629}
630
631bool Lexer::isOctalDigit(int c)
632{
633 return (c >= '0' && c <= '7');
634}
635
636int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
637{
638 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
639 shift(4);
640 return URSHIFTEQUAL;
641 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
642 shift(3);
643 return STREQ;
644 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
645 shift(3);
646 return STRNEQ;
647 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
648 shift(3);
649 return URSHIFT;
650 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
651 shift(3);
652 return LSHIFTEQUAL;
653 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
654 shift(3);
655 return RSHIFTEQUAL;
656 } else if (c1 == '<' && c2 == '=') {
657 shift(2);
658 return LE;
659 } else if (c1 == '>' && c2 == '=') {
660 shift(2);
661 return GE;
662 } else if (c1 == '!' && c2 == '=') {
663 shift(2);
664 return NE;
665 } else if (c1 == '+' && c2 == '+') {
666 shift(2);
667 if (terminator)
668 return AUTOPLUSPLUS;
669 else
670 return PLUSPLUS;
671 } else if (c1 == '-' && c2 == '-') {
672 shift(2);
673 if (terminator)
674 return AUTOMINUSMINUS;
675 else
676 return MINUSMINUS;
677 } else if (c1 == '=' && c2 == '=') {
678 shift(2);
679 return EQEQ;
680 } else if (c1 == '+' && c2 == '=') {
681 shift(2);
682 return PLUSEQUAL;
683 } else if (c1 == '-' && c2 == '=') {
684 shift(2);
685 return MINUSEQUAL;
686 } else if (c1 == '*' && c2 == '=') {
687 shift(2);
688 return MULTEQUAL;
689 } else if (c1 == '/' && c2 == '=') {
690 shift(2);
691 return DIVEQUAL;
692 } else if (c1 == '&' && c2 == '=') {
693 shift(2);
694 return ANDEQUAL;
695 } else if (c1 == '^' && c2 == '=') {
696 shift(2);
697 return XOREQUAL;
698 } else if (c1 == '%' && c2 == '=') {
699 shift(2);
700 return MODEQUAL;
701 } else if (c1 == '|' && c2 == '=') {
702 shift(2);
703 return OREQUAL;
704 } else if (c1 == '<' && c2 == '<') {
705 shift(2);
706 return LSHIFT;
707 } else if (c1 == '>' && c2 == '>') {
708 shift(2);
709 return RSHIFT;
710 } else if (c1 == '&' && c2 == '&') {
711 shift(2);
712 return AND;
713 } else if (c1 == '|' && c2 == '|') {
714 shift(2);
715 return OR;
716 }
717
718 switch(c1) {
719 case '=':
720 case '>':
721 case '<':
722 case ',':
723 case '!':
724 case '~':
725 case '?':
726 case ':':
727 case '.':
728 case '+':
729 case '-':
730 case '*':
731 case '/':
732 case '&':
733 case '|':
734 case '^':
735 case '%':
736 case '(':
737 case ')':
738 case '{':
739 case '}':
740 case '[':
741 case ']':
742 case ';':
743 shift(1);
744 return static_cast<int>(c1);
745 default:
746 return -1;
747 }
748}
749
750unsigned short Lexer::singleEscape(unsigned short c)
751{
752 switch(c) {
753 case 'b':
754 return 0x08;
755 case 't':
756 return 0x09;
757 case 'n':
758 return 0x0A;
759 case 'v':
760 return 0x0B;
761 case 'f':
762 return 0x0C;
763 case 'r':
764 return 0x0D;
765 case '"':
766 return 0x22;
767 case '\'':
768 return 0x27;
769 case '\\':
770 return 0x5C;
771 default:
772 return c;
773 }
774}
775
776unsigned short Lexer::convertOctal(int c1, int c2, int c3)
777{
778 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
779}
780
781unsigned char Lexer::convertHex(int c)
782{
783 if (c >= '0' && c <= '9')
784 return static_cast<unsigned char>(c - '0');
785 if (c >= 'a' && c <= 'f')
786 return static_cast<unsigned char>(c - 'a' + 10);
787 return static_cast<unsigned char>(c - 'A' + 10);
788}
789
790unsigned char Lexer::convertHex(int c1, int c2)
791{
792 return ((convertHex(c1) << 4) + convertHex(c2));
793}
794
795KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
796{
797 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
798 (convertHex(c3) << 4) + convertHex(c4));
799}
800
801void Lexer::record8(int c)
802{
803 ASSERT(c >= 0);
804 ASSERT(c <= 0xff);
805 m_buffer8.append(static_cast<char>(c));
806}
807
808void Lexer::record16(int c)
809{
810 ASSERT(c >= 0);
811 ASSERT(c <= USHRT_MAX);
812 record16(UChar(static_cast<unsigned short>(c)));
813}
814
815void Lexer::record16(KJS::UChar c)
816{
817 m_buffer16.append(c);
818}
819
820bool Lexer::scanRegExp()
821{
822 m_buffer16.clear();
823 bool lastWasEscape = false;
824 bool inBrackets = false;
825
826 while (1) {
827 if (isLineTerminator() || current == -1)
828 return false;
829 else if (current != '/' || lastWasEscape == true || inBrackets == true)
830 {
831 // keep track of '[' and ']'
832 if (!lastWasEscape) {
833 if ( current == '[' && !inBrackets )
834 inBrackets = true;
835 if ( current == ']' && inBrackets )
836 inBrackets = false;
837 }
838 record16(current);
839 lastWasEscape =
840 !lastWasEscape && (current == '\\');
841 } else { // end of regexp
842 m_pattern = UString(m_buffer16);
843 m_buffer16.clear();
844 shift(1);
845 break;
846 }
847 shift(1);
848 }
849
850 while (isIdentPart(current)) {
851 record16(current);
852 shift(1);
853 }
854 m_flags = UString(m_buffer16);
855
856 return true;
857}
858
859void Lexer::clear()
860{
861 deleteAllValues(m_strings);
862 Vector<UString*> newStrings;
863 newStrings.reserveCapacity(initialStringTableCapacity);
864 m_strings.swap(newStrings);
865
866 deleteAllValues(m_identifiers);
867 Vector<KJS::Identifier*> newIdentifiers;
868 newIdentifiers.reserveCapacity(initialStringTableCapacity);
869 m_identifiers.swap(newIdentifiers);
870
871 Vector<char> newBuffer8;
872 newBuffer8.reserveCapacity(initialReadBufferCapacity);
873 m_buffer8.swap(newBuffer8);
874
875 Vector<UChar> newBuffer16;
876 newBuffer16.reserveCapacity(initialReadBufferCapacity);
877 m_buffer16.swap(newBuffer16);
878
879 m_pattern = 0;
880 m_flags = 0;
881}
882
883Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
884{
885 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
886 m_identifiers.append(identifier);
887 return identifier;
888}
889
890UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
891{
892 UString* string = new UString(buffer);
893 m_strings.append(string);
894 return string;
895}
896
897} // namespace KJS