]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 2008 Apple Inc. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #ifndef Parser_h | |
27 | #define Parser_h | |
28 | ||
29 | #include <wtf/Platform.h> | |
30 | ||
31 | #if ENABLE(WREC) | |
32 | ||
33 | #include "Escapes.h" | |
34 | #include "Quantifier.h" | |
35 | #include "UString.h" | |
36 | #include "WRECGenerator.h" | |
37 | #include <wtf/ASCIICType.h> | |
38 | ||
39 | namespace JSC { namespace WREC { | |
40 | ||
41 | struct CharacterClass; | |
42 | ||
43 | class Parser { | |
44 | typedef Generator::JumpList JumpList; | |
45 | typedef Generator::ParenthesesType ParenthesesType; | |
46 | ||
47 | friend class SavedState; | |
48 | ||
49 | public: | |
50 | Parser(const UString& pattern, bool ignoreCase, bool multiline) | |
51 | : m_generator(*this) | |
52 | , m_data(pattern.data()) | |
53 | , m_size(pattern.size()) | |
54 | , m_ignoreCase(ignoreCase) | |
55 | , m_multiline(multiline) | |
56 | { | |
57 | reset(); | |
58 | } | |
59 | ||
60 | Generator& generator() { return m_generator; } | |
61 | ||
62 | bool ignoreCase() const { return m_ignoreCase; } | |
63 | bool multiline() const { return m_multiline; } | |
64 | ||
65 | void recordSubpattern() { ++m_numSubpatterns; } | |
66 | unsigned numSubpatterns() const { return m_numSubpatterns; } | |
67 | ||
68 | const char* error() const { return m_error; } | |
69 | const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } | |
70 | ||
71 | void parsePattern(JumpList& failures) | |
72 | { | |
73 | reset(); | |
74 | ||
75 | parseDisjunction(failures); | |
76 | ||
77 | if (peek() != EndOfPattern) | |
78 | setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. | |
79 | } | |
80 | ||
81 | void parseDisjunction(JumpList& failures); | |
82 | void parseAlternative(JumpList& failures); | |
83 | bool parseTerm(JumpList& failures); | |
84 | bool parseNonCharacterEscape(JumpList& failures, const Escape&); | |
85 | bool parseParentheses(JumpList& failures); | |
86 | bool parseCharacterClass(JumpList& failures); | |
87 | bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); | |
88 | bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); | |
89 | ||
90 | private: | |
91 | class SavedState { | |
92 | public: | |
93 | SavedState(Parser& parser) | |
94 | : m_parser(parser) | |
95 | , m_index(parser.m_index) | |
96 | { | |
97 | } | |
98 | ||
99 | void restore() | |
100 | { | |
101 | m_parser.m_index = m_index; | |
102 | } | |
103 | ||
104 | private: | |
105 | Parser& m_parser; | |
106 | unsigned m_index; | |
107 | }; | |
108 | ||
109 | void reset() | |
110 | { | |
111 | m_index = 0; | |
112 | m_numSubpatterns = 0; | |
113 | m_error = 0; | |
114 | } | |
115 | ||
116 | void setError(const char* error) | |
117 | { | |
118 | if (m_error) | |
119 | return; | |
120 | m_error = error; | |
121 | } | |
122 | ||
123 | int peek() | |
124 | { | |
125 | if (m_index >= m_size) | |
126 | return EndOfPattern; | |
127 | return m_data[m_index]; | |
128 | } | |
129 | ||
130 | int consume() | |
131 | { | |
132 | if (m_index >= m_size) | |
133 | return EndOfPattern; | |
134 | return m_data[m_index++]; | |
135 | } | |
136 | ||
137 | bool peekIsDigit() | |
138 | { | |
139 | return WTF::isASCIIDigit(peek()); | |
140 | } | |
141 | ||
142 | unsigned peekDigit() | |
143 | { | |
144 | ASSERT(peekIsDigit()); | |
145 | return peek() - '0'; | |
146 | } | |
147 | ||
148 | unsigned consumeDigit() | |
149 | { | |
150 | ASSERT(peekIsDigit()); | |
151 | return consume() - '0'; | |
152 | } | |
153 | ||
154 | unsigned consumeNumber() | |
155 | { | |
156 | int n = consumeDigit(); | |
157 | while (peekIsDigit()) { | |
158 | n *= 10; | |
159 | n += consumeDigit(); | |
160 | } | |
161 | return n; | |
162 | } | |
163 | ||
164 | int consumeHex(int count) | |
165 | { | |
166 | int n = 0; | |
167 | while (count--) { | |
168 | if (!WTF::isASCIIHexDigit(peek())) | |
169 | return -1; | |
170 | n = (n << 4) | WTF::toASCIIHexValue(consume()); | |
171 | } | |
172 | return n; | |
173 | } | |
174 | ||
175 | unsigned consumeOctal() | |
176 | { | |
177 | unsigned n = 0; | |
178 | while (n < 32 && WTF::isASCIIOctalDigit(peek())) | |
179 | n = n * 8 + consumeDigit(); | |
180 | return n; | |
181 | } | |
182 | ||
183 | ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); | |
184 | Quantifier consumeQuantifier(); | |
185 | Escape consumeEscape(bool inCharacterClass); | |
186 | ParenthesesType consumeParenthesesType(); | |
187 | ||
188 | static const int EndOfPattern = -1; | |
189 | ||
190 | // Error messages. | |
191 | static const char* QuantifierOutOfOrder; | |
192 | static const char* QuantifierWithoutAtom; | |
193 | static const char* ParenthesesUnmatched; | |
194 | static const char* ParenthesesTypeInvalid; | |
195 | static const char* ParenthesesNotSupported; | |
196 | static const char* CharacterClassUnmatched; | |
197 | static const char* CharacterClassOutOfOrder; | |
198 | static const char* EscapeUnterminated; | |
199 | ||
200 | Generator m_generator; | |
201 | const UChar* m_data; | |
202 | unsigned m_size; | |
203 | unsigned m_index; | |
204 | bool m_ignoreCase; | |
205 | bool m_multiline; | |
206 | unsigned m_numSubpatterns; | |
207 | const char* m_error; | |
208 | }; | |
209 | ||
210 | } } // namespace JSC::WREC | |
211 | ||
212 | #endif // ENABLE(WREC) | |
213 | ||
214 | #endif // Parser_h |