]> git.saurik.com Git - apple/javascriptcore.git/blob - kjs/lexer.h
JavaScriptCore-466.1.tar.gz
[apple/javascriptcore.git] / kjs / lexer.h
1 // -*- c-basic-offset: 2 -*-
2 /*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
5 * Copyright (C) 2007 Apple Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #ifndef Lexer_h
25 #define Lexer_h
26
27 #include "ustring.h"
28 #include <wtf/Vector.h>
29
30 namespace KJS {
31
32 class Identifier;
33 class RegExp;
34
35 class Lexer : Noncopyable {
36 public:
37 void setCode(int startingLineNumber, const UChar *c, unsigned int len);
38 int lex();
39
40 int lineNo() const { return yylineno; }
41
42 bool prevTerminator() const { return terminator; }
43
44 enum State { Start,
45 IdentifierOrKeyword,
46 Identifier,
47 InIdentifierOrKeyword,
48 InIdentifier,
49 InIdentifierStartUnicodeEscapeStart,
50 InIdentifierStartUnicodeEscape,
51 InIdentifierPartUnicodeEscapeStart,
52 InIdentifierPartUnicodeEscape,
53 InSingleLineComment,
54 InMultiLineComment,
55 InNum,
56 InNum0,
57 InHex,
58 InOctal,
59 InDecimal,
60 InExponentIndicator,
61 InExponent,
62 Hex,
63 Octal,
64 Number,
65 String,
66 Eof,
67 InString,
68 InEscapeSequence,
69 InHexEscape,
70 InUnicodeEscape,
71 Other,
72 Bad };
73
74 bool scanRegExp();
75 const UString& pattern() const { return m_pattern; }
76 const UString& flags() const { return m_flags; }
77
78 static unsigned char convertHex(int);
79 static unsigned char convertHex(int c1, int c2);
80 static UChar convertUnicode(int c1, int c2, int c3, int c4);
81 static bool isIdentStart(int);
82 static bool isIdentPart(int);
83 static bool isHexDigit(int);
84
85 bool sawError() const { return error; }
86
87 void clear();
88
89 private:
90 friend Lexer& lexer();
91 Lexer();
92
93 int yylineno;
94 bool done;
95 Vector<char> m_buffer8;
96 Vector<UChar> m_buffer16;
97 bool terminator;
98 bool restrKeyword;
99 // encountered delimiter like "'" and "}" on last run
100 bool delimited;
101 bool skipLF;
102 bool skipCR;
103 bool eatNextIdentifier;
104 int stackToken;
105 int lastToken;
106
107 State state;
108 void setDone(State);
109 unsigned int pos;
110 void shift(unsigned int p);
111 void nextLine();
112 int lookupKeyword(const char *);
113
114 bool isWhiteSpace() const;
115 bool isLineTerminator();
116 static bool isOctalDigit(int);
117
118 int matchPunctuator(int c1, int c2, int c3, int c4);
119 static unsigned short singleEscape(unsigned short);
120 static unsigned short convertOctal(int c1, int c2, int c3);
121
122 void record8(int);
123 void record16(int);
124 void record16(UChar);
125
126 KJS::Identifier* makeIdentifier(const Vector<UChar>& buffer);
127 UString* makeUString(const Vector<UChar>& buffer);
128
129 const UChar* code;
130 unsigned int length;
131 int yycolumn;
132 int atLineStart;
133 bool error;
134
135 // current and following unicode characters (int to allow for -1 for end-of-file marker)
136 int current, next1, next2, next3;
137
138 Vector<UString*> m_strings;
139 Vector<KJS::Identifier*> m_identifiers;
140
141 UString m_pattern;
142 UString m_flags;
143 };
144
145 Lexer& lexer(); // Returns the singletone JavaScript lexer.
146
147 } // namespace KJS
148
149 #endif // Lexer_h