]> git.saurik.com Git - apple/javascriptcore.git/blob - kjs/lexer.h
69b68b8461f42d1a93d57cc665fd4b5864639db0
[apple/javascriptcore.git] / kjs / lexer.h
1 // -*- c-basic-offset: 2 -*-
2 /*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
5 * Copyright (C) 2007 Apple Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #ifndef Lexer_h
25 #define Lexer_h
26
27 #include "SourceCode.h"
28 #include "ustring.h"
29 #include <wtf/Vector.h>
30
31 namespace KJS {
32
33 class Identifier;
34 class RegExp;
35
36 class Lexer : Noncopyable {
37 public:
38 void setCode(const SourceCode&);
39 int lex();
40
41 int lineNo() const { return yylineno; }
42
43 bool prevTerminator() const { return terminator; }
44
45 enum State { Start,
46 IdentifierOrKeyword,
47 Identifier,
48 InIdentifierOrKeyword,
49 InIdentifier,
50 InIdentifierStartUnicodeEscapeStart,
51 InIdentifierStartUnicodeEscape,
52 InIdentifierPartUnicodeEscapeStart,
53 InIdentifierPartUnicodeEscape,
54 InSingleLineComment,
55 InMultiLineComment,
56 InNum,
57 InNum0,
58 InHex,
59 InOctal,
60 InDecimal,
61 InExponentIndicator,
62 InExponent,
63 Hex,
64 Octal,
65 Number,
66 String,
67 Eof,
68 InString,
69 InEscapeSequence,
70 InHexEscape,
71 InUnicodeEscape,
72 Other,
73 Bad };
74
75 bool scanRegExp();
76 const UString& pattern() const { return m_pattern; }
77 const UString& flags() const { return m_flags; }
78
79 static unsigned char convertHex(int);
80 static unsigned char convertHex(int c1, int c2);
81 static UChar convertUnicode(int c1, int c2, int c3, int c4);
82 static bool isIdentStart(int);
83 static bool isIdentPart(int);
84 static bool isHexDigit(int);
85
86 bool sawError() const { return error; }
87
88 void clear();
89 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine)
90 {
91 // The SourceCode constructor adds 1 to the line number to account for
92 // all of the callers in WebCore that use zero-based line numbers, so
93 // we regrettably subtract 1 here to deal with that.
94 return SourceCode(m_source->provider(), m_source->startOffset() + openBrace + 1, m_source->startOffset() + closeBrace, firstLine - 1);
95 }
96
97 private:
98 friend Lexer& lexer();
99 Lexer();
100
101 int yylineno;
102 bool done;
103 Vector<char> m_buffer8;
104 Vector<UChar> m_buffer16;
105 bool terminator;
106 bool restrKeyword;
107 // encountered delimiter like "'" and "}" on last run
108 bool delimited;
109 bool skipLF;
110 bool skipCR;
111 bool eatNextIdentifier;
112 int stackToken;
113 int lastToken;
114
115 State state;
116 void setDone(State);
117 unsigned int pos;
118 void shift(unsigned int p);
119 void nextLine();
120 int lookupKeyword(const char *);
121
122 bool isWhiteSpace() const;
123 bool isLineTerminator();
124 static bool isOctalDigit(int);
125
126 int matchPunctuator(int& charPos, int c1, int c2, int c3, int c4);
127 static unsigned short singleEscape(unsigned short);
128 static unsigned short convertOctal(int c1, int c2, int c3);
129
130 void record8(int);
131 void record16(int);
132 void record16(UChar);
133
134 KJS::Identifier* makeIdentifier(const Vector<UChar>& buffer);
135 UString* makeUString(const Vector<UChar>& buffer);
136
137 const SourceCode* m_source;
138 const UChar* code;
139 unsigned int length;
140 int yycolumn;
141 int atLineStart;
142 bool error;
143
144 // current and following unicode characters (int to allow for -1 for end-of-file marker)
145 int current, next1, next2, next3;
146
147 int m_currentOffset;
148 int m_nextOffset1;
149 int m_nextOffset2;
150 int m_nextOffset3;
151
152 Vector<UString*> m_strings;
153 Vector<KJS::Identifier*> m_identifiers;
154
155 UString m_pattern;
156 UString m_flags;
157 };
158
159 Lexer& lexer(); // Returns the singletone JavaScript lexer.
160
161 } // namespace KJS
162
163 #endif // Lexer_h