]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.h
63d38927e0a93a27a319aa795bea017963d3b79e
[apple/javascriptcore.git] / parser / Lexer.h
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #ifndef Lexer_h
23 #define Lexer_h
24
25 #include "Identifier.h"
26 #include "Lookup.h"
27 #include "SegmentedVector.h"
28 #include "SourceCode.h"
29 #include <wtf/Vector.h>
30
31 namespace JSC {
32
33 class RegExp;
34
35 class Lexer : Noncopyable {
36 public:
37 void setCode(const SourceCode&);
38 void setIsReparsing() { m_isReparsing = true; }
39 int lex(void* lvalp, void* llocp);
40
41 int lineNo() const { return yylineno; }
42
43 bool prevTerminator() const { return m_terminator; }
44
45 enum State {
46 Start,
47 IdentifierOrKeyword,
48 Identifier,
49 InIdentifierOrKeyword,
50 InIdentifier,
51 InIdentifierStartUnicodeEscapeStart,
52 InIdentifierStartUnicodeEscape,
53 InIdentifierPartUnicodeEscapeStart,
54 InIdentifierPartUnicodeEscape,
55 InSingleLineComment,
56 InMultiLineComment,
57 InNum,
58 InNum0,
59 InHex,
60 InOctal,
61 InDecimal,
62 InExponentIndicator,
63 InExponent,
64 Hex,
65 Octal,
66 Number,
67 String,
68 Eof,
69 InString,
70 InEscapeSequence,
71 InHexEscape,
72 InUnicodeEscape,
73 Other,
74 Bad
75 };
76
77 bool scanRegExp();
78 const UString& pattern() const { return m_pattern; }
79 const UString& flags() const { return m_flags; }
80
81 static unsigned char convertHex(int);
82 static unsigned char convertHex(int c1, int c2);
83 static UChar convertUnicode(int c1, int c2, int c3, int c4);
84 static bool isIdentStart(int);
85 static bool isIdentPart(int);
86 static bool isHexDigit(int);
87
88 bool sawError() const { return m_error; }
89
90 void clear();
91 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine) { return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); }
92
93 private:
94 friend class JSGlobalData;
95 Lexer(JSGlobalData*);
96 ~Lexer();
97
98 void setDone(State);
99 void shift(unsigned int p);
100 void nextLine();
101 int lookupKeyword(const char *);
102
103 bool isWhiteSpace() const;
104 bool isLineTerminator();
105 static bool isOctalDigit(int);
106
107 ALWAYS_INLINE int matchPunctuator(int& charPos, int c1, int c2, int c3, int c4);
108 static unsigned short singleEscape(unsigned short);
109 static unsigned short convertOctal(int c1, int c2, int c3);
110
111 void record8(int);
112 void record16(int);
113 void record16(UChar);
114
115 JSC::Identifier* makeIdentifier(const Vector<UChar>& buffer)
116 {
117 m_identifiers.append(JSC::Identifier(m_globalData, buffer.data(), buffer.size()));
118 return &m_identifiers.last();
119 }
120
121 static const size_t initialReadBufferCapacity = 32;
122 static const size_t initialIdentifierTableCapacity = 64;
123
124 int yylineno;
125 int yycolumn;
126
127 bool m_done;
128 Vector<char> m_buffer8;
129 Vector<UChar> m_buffer16;
130 bool m_terminator;
131 bool m_restrKeyword;
132 bool m_delimited; // encountered delimiter like "'" and "}" on last run
133 bool m_skipLF;
134 bool m_skipCR;
135 bool m_eatNextIdentifier;
136 int m_stackToken;
137 int m_lastToken;
138
139 State m_state;
140 unsigned int m_position;
141 const SourceCode* m_source;
142 const UChar* m_code;
143 unsigned int m_length;
144 bool m_isReparsing;
145 int m_atLineStart;
146 bool m_error;
147
148 // current and following unicode characters (int to allow for -1 for end-of-file marker)
149 int m_current;
150 int m_next1;
151 int m_next2;
152 int m_next3;
153
154 int m_currentOffset;
155 int m_nextOffset1;
156 int m_nextOffset2;
157 int m_nextOffset3;
158
159 SegmentedVector<JSC::Identifier, initialIdentifierTableCapacity> m_identifiers;
160
161 JSGlobalData* m_globalData;
162
163 UString m_pattern;
164 UString m_flags;
165
166 const HashTable m_mainTable;
167 };
168
169 } // namespace JSC
170
171 #endif // Lexer_h