]> git.saurik.com Git - apple/javascriptcore.git/blob - parser/Lexer.h
JavaScriptCore-621.1.tar.gz
[apple/javascriptcore.git] / parser / Lexer.h
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #ifndef Lexer_h
23 #define Lexer_h
24
25 #include "Lookup.h"
26 #include "ParserArena.h"
27 #include "SourceCode.h"
28 #include <wtf/ASCIICType.h>
29 #include <wtf/SegmentedVector.h>
30 #include <wtf/Vector.h>
31 #include <wtf/unicode/Unicode.h>
32
33 namespace JSC {
34
35 class RegExp;
36
37 class Lexer : public Noncopyable {
38 public:
39 // Character manipulation functions.
40 static bool isWhiteSpace(int character);
41 static bool isLineTerminator(int character);
42 static unsigned char convertHex(int c1, int c2);
43 static UChar convertUnicode(int c1, int c2, int c3, int c4);
44
45 // Functions to set up parsing.
46 void setCode(const SourceCode&, ParserArena&);
47 void setIsReparsing() { m_isReparsing = true; }
48
49 // Functions for the parser itself.
50 int lex(void* lvalp, void* llocp);
51 int lineNumber() const { return m_lineNumber; }
52 bool prevTerminator() const { return m_terminator; }
53 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
54 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
55 bool skipRegExp();
56
57 // Functions for use after parsing.
58 bool sawError() const { return m_error; }
59 void clear();
60
61 private:
62 friend class JSGlobalData;
63
64 Lexer(JSGlobalData*);
65 ~Lexer();
66
67 void shift1();
68 void shift2();
69 void shift3();
70 void shift4();
71 void shiftLineTerminator();
72
73 void record8(int);
74 void record16(int);
75 void record16(UChar);
76
77 void copyCodeWithoutBOMs();
78
79 int currentOffset() const;
80 const UChar* currentCharacter() const;
81
82 const Identifier* makeIdentifier(const UChar* characters, size_t length);
83
84 bool lastTokenWasRestrKeyword() const;
85
86 static const size_t initialReadBufferCapacity = 32;
87
88 int m_lineNumber;
89
90 Vector<char> m_buffer8;
91 Vector<UChar> m_buffer16;
92 bool m_terminator;
93 bool m_delimited; // encountered delimiter like "'" and "}" on last run
94 int m_lastToken;
95
96 const SourceCode* m_source;
97 const UChar* m_code;
98 const UChar* m_codeStart;
99 const UChar* m_codeEnd;
100 bool m_isReparsing;
101 bool m_atLineStart;
102 bool m_error;
103
104 // current and following unicode characters (int to allow for -1 for end-of-file marker)
105 int m_current;
106 int m_next1;
107 int m_next2;
108 int m_next3;
109
110 IdentifierArena* m_arena;
111
112 JSGlobalData* m_globalData;
113
114 const HashTable m_keywordTable;
115
116 Vector<UChar> m_codeWithoutBOMs;
117 };
118
119 inline bool Lexer::isWhiteSpace(int ch)
120 {
121 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
122 }
123
124 inline bool Lexer::isLineTerminator(int ch)
125 {
126 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
127 }
128
129 inline unsigned char Lexer::convertHex(int c1, int c2)
130 {
131 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
132 }
133
134 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
135 {
136 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
137 }
138
139 // A bridge for yacc from the C world to the C++ world.
140 inline int jscyylex(void* lvalp, void* llocp, void* globalData)
141 {
142 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
143 }
144
145 } // namespace JSC
146
147 #endif // Lexer_h