]> git.saurik.com Git - apple/javascriptcore.git/blame - parser/Lexer.h
JavaScriptCore-621.1.tar.gz
[apple/javascriptcore.git] / parser / Lexer.h
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
ba379fdc 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
9dae56ea
A
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef Lexer_h
23#define Lexer_h
24
9dae56ea 25#include "Lookup.h"
f9bf01c6 26#include "ParserArena.h"
9dae56ea 27#include "SourceCode.h"
ba379fdc
A
28#include <wtf/ASCIICType.h>
29#include <wtf/SegmentedVector.h>
9dae56ea 30#include <wtf/Vector.h>
ba379fdc 31#include <wtf/unicode/Unicode.h>
9dae56ea
A
32
33namespace JSC {
34
35 class RegExp;
36
f9bf01c6 37 class Lexer : public Noncopyable {
9dae56ea 38 public:
ba379fdc
A
39 // Character manipulation functions.
40 static bool isWhiteSpace(int character);
41 static bool isLineTerminator(int character);
42 static unsigned char convertHex(int c1, int c2);
43 static UChar convertUnicode(int c1, int c2, int c3, int c4);
44
45 // Functions to set up parsing.
f9bf01c6 46 void setCode(const SourceCode&, ParserArena&);
9dae56ea 47 void setIsReparsing() { m_isReparsing = true; }
9dae56ea 48
ba379fdc
A
49 // Functions for the parser itself.
50 int lex(void* lvalp, void* llocp);
51 int lineNumber() const { return m_lineNumber; }
9dae56ea 52 bool prevTerminator() const { return m_terminator; }
ba379fdc 53 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
f9bf01c6
A
54 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
55 bool skipRegExp();
9dae56ea 56
ba379fdc 57 // Functions for use after parsing.
9dae56ea 58 bool sawError() const { return m_error; }
9dae56ea 59 void clear();
9dae56ea
A
60
61 private:
62 friend class JSGlobalData;
ba379fdc 63
9dae56ea
A
64 Lexer(JSGlobalData*);
65 ~Lexer();
66
ba379fdc
A
67 void shift1();
68 void shift2();
69 void shift3();
70 void shift4();
71 void shiftLineTerminator();
9dae56ea
A
72
73 void record8(int);
74 void record16(int);
75 void record16(UChar);
76
ba379fdc
A
77 void copyCodeWithoutBOMs();
78
79 int currentOffset() const;
80 const UChar* currentCharacter() const;
81
f9bf01c6 82 const Identifier* makeIdentifier(const UChar* characters, size_t length);
ba379fdc
A
83
84 bool lastTokenWasRestrKeyword() const;
9dae56ea
A
85
86 static const size_t initialReadBufferCapacity = 32;
9dae56ea 87
ba379fdc 88 int m_lineNumber;
9dae56ea 89
9dae56ea
A
90 Vector<char> m_buffer8;
91 Vector<UChar> m_buffer16;
92 bool m_terminator;
9dae56ea 93 bool m_delimited; // encountered delimiter like "'" and "}" on last run
9dae56ea
A
94 int m_lastToken;
95
9dae56ea
A
96 const SourceCode* m_source;
97 const UChar* m_code;
ba379fdc
A
98 const UChar* m_codeStart;
99 const UChar* m_codeEnd;
9dae56ea 100 bool m_isReparsing;
ba379fdc 101 bool m_atLineStart;
9dae56ea
A
102 bool m_error;
103
104 // current and following unicode characters (int to allow for -1 for end-of-file marker)
105 int m_current;
106 int m_next1;
107 int m_next2;
108 int m_next3;
109
f9bf01c6 110 IdentifierArena* m_arena;
9dae56ea
A
111
112 JSGlobalData* m_globalData;
113
ba379fdc
A
114 const HashTable m_keywordTable;
115
116 Vector<UChar> m_codeWithoutBOMs;
9dae56ea
A
117 };
118
ba379fdc
A
119 inline bool Lexer::isWhiteSpace(int ch)
120 {
121 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
122 }
123
124 inline bool Lexer::isLineTerminator(int ch)
125 {
126 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
127 }
128
129 inline unsigned char Lexer::convertHex(int c1, int c2)
130 {
131 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
132 }
133
134 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
135 {
136 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
137 }
138
f9bf01c6
A
139 // A bridge for yacc from the C world to the C++ world.
140 inline int jscyylex(void* lvalp, void* llocp, void* globalData)
141 {
142 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
143 }
144
9dae56ea
A
145} // namespace JSC
146
147#endif // Lexer_h