]> git.saurik.com Git - apple/javascriptcore.git/blob - wrec/WRECParser.h
a3e151b7e0c651d8b9d27490181289576a33a558
[apple/javascriptcore.git] / wrec / WRECParser.h
1 /*
2 * Copyright (C) 2008 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef Parser_h
27 #define Parser_h
28
29 #include <wtf/Platform.h>
30
31 #if ENABLE(WREC)
32
33 #include "Escapes.h"
34 #include "Quantifier.h"
35 #include "UString.h"
36 #include "WRECGenerator.h"
37 #include <wtf/ASCIICType.h>
38
39 namespace JSC { namespace WREC {
40
41 struct CharacterClass;
42
43 class Parser {
44 typedef Generator::JumpList JumpList;
45 typedef Generator::ParenthesesType ParenthesesType;
46
47 friend class SavedState;
48
49 public:
50 Parser(const UString& pattern, bool ignoreCase, bool multiline)
51 : m_generator(*this)
52 , m_data(pattern.data())
53 , m_size(pattern.size())
54 , m_ignoreCase(ignoreCase)
55 , m_multiline(multiline)
56 {
57 reset();
58 }
59
60 Generator& generator() { return m_generator; }
61
62 bool ignoreCase() const { return m_ignoreCase; }
63 bool multiline() const { return m_multiline; }
64
65 void recordSubpattern() { ++m_numSubpatterns; }
66 unsigned numSubpatterns() const { return m_numSubpatterns; }
67
68 const char* error() const { return m_error; }
69 const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
70
71 void parsePattern(JumpList& failures)
72 {
73 reset();
74
75 parseDisjunction(failures);
76
77 if (peek() != EndOfPattern)
78 setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
79 }
80
81 void parseDisjunction(JumpList& failures);
82 void parseAlternative(JumpList& failures);
83 bool parseTerm(JumpList& failures);
84 bool parseNonCharacterEscape(JumpList& failures, const Escape&);
85 bool parseParentheses(JumpList& failures);
86 bool parseCharacterClass(JumpList& failures);
87 bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
88 bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);
89
90 private:
91 class SavedState {
92 public:
93 SavedState(Parser& parser)
94 : m_parser(parser)
95 , m_index(parser.m_index)
96 {
97 }
98
99 void restore()
100 {
101 m_parser.m_index = m_index;
102 }
103
104 private:
105 Parser& m_parser;
106 unsigned m_index;
107 };
108
109 void reset()
110 {
111 m_index = 0;
112 m_numSubpatterns = 0;
113 m_error = 0;
114 }
115
116 void setError(const char* error)
117 {
118 if (m_error)
119 return;
120 m_error = error;
121 }
122
123 int peek()
124 {
125 if (m_index >= m_size)
126 return EndOfPattern;
127 return m_data[m_index];
128 }
129
130 int consume()
131 {
132 if (m_index >= m_size)
133 return EndOfPattern;
134 return m_data[m_index++];
135 }
136
137 bool peekIsDigit()
138 {
139 return WTF::isASCIIDigit(peek());
140 }
141
142 unsigned peekDigit()
143 {
144 ASSERT(peekIsDigit());
145 return peek() - '0';
146 }
147
148 unsigned consumeDigit()
149 {
150 ASSERT(peekIsDigit());
151 return consume() - '0';
152 }
153
154 unsigned consumeNumber()
155 {
156 int n = consumeDigit();
157 while (peekIsDigit()) {
158 n *= 10;
159 n += consumeDigit();
160 }
161 return n;
162 }
163
164 int consumeHex(int count)
165 {
166 int n = 0;
167 while (count--) {
168 if (!WTF::isASCIIHexDigit(peek()))
169 return -1;
170 n = (n << 4) | WTF::toASCIIHexValue(consume());
171 }
172 return n;
173 }
174
175 unsigned consumeOctal()
176 {
177 unsigned n = 0;
178 while (n < 32 && WTF::isASCIIOctalDigit(peek()))
179 n = n * 8 + consumeDigit();
180 return n;
181 }
182
183 ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
184 Quantifier consumeQuantifier();
185 Escape consumeEscape(bool inCharacterClass);
186 ParenthesesType consumeParenthesesType();
187
188 static const int EndOfPattern = -1;
189
190 // Error messages.
191 static const char* QuantifierOutOfOrder;
192 static const char* QuantifierWithoutAtom;
193 static const char* ParenthesesUnmatched;
194 static const char* ParenthesesTypeInvalid;
195 static const char* ParenthesesNotSupported;
196 static const char* CharacterClassUnmatched;
197 static const char* CharacterClassOutOfOrder;
198 static const char* EscapeUnterminated;
199
200 Generator m_generator;
201 const UChar* m_data;
202 unsigned m_size;
203 unsigned m_index;
204 bool m_ignoreCase;
205 bool m_multiline;
206 unsigned m_numSubpatterns;
207 const char* m_error;
208 };
209
210 } } // namespace JSC::WREC
211
212 #endif // ENABLE(WREC)
213
214 #endif // Parser_h